55 lines
1.7 KiB
Python
55 lines
1.7 KiB
Python
# Copyright (c) Opendatalab. All rights reserved.
|
|
import re
|
|
|
|
|
|
def is_hyphen_at_line_end(line):
|
|
"""Check if a line ends with one or more letters followed by a hyphen.
|
|
|
|
Args:
|
|
line (str): The line of text to check.
|
|
|
|
Returns:
|
|
bool: True if the line ends with one or more letters followed by a hyphen, False otherwise.
|
|
"""
|
|
# Use regex to check if the line ends with one or more letters followed by a hyphen
|
|
return bool(re.search(r'[A-Za-z]+-\s*$', line))
|
|
|
|
|
|
def full_to_half_exclude_marks(text: str) -> str:
|
|
"""Convert full-width characters to half-width characters using code point manipulation.
|
|
|
|
Args:
|
|
text: String containing full-width characters
|
|
|
|
Returns:
|
|
String with full-width characters converted to half-width
|
|
"""
|
|
result = []
|
|
for char in text:
|
|
code = ord(char)
|
|
# Full-width letters and numbers (FF21-FF3A for A-Z, FF41-FF5A for a-z, FF10-FF19 for 0-9)
|
|
if (0xFF21 <= code <= 0xFF3A) or (0xFF41 <= code <= 0xFF5A) or (0xFF10 <= code <= 0xFF19):
|
|
result.append(chr(code - 0xFEE0)) # Shift to ASCII range
|
|
else:
|
|
result.append(char)
|
|
return ''.join(result)
|
|
|
|
|
|
def full_to_half(text: str) -> str:
|
|
"""Convert full-width characters to half-width characters using code point manipulation.
|
|
|
|
Args:
|
|
text: String containing full-width characters
|
|
|
|
Returns:
|
|
String with full-width characters converted to half-width
|
|
"""
|
|
result = []
|
|
for char in text:
|
|
code = ord(char)
|
|
# Full-width letters, numbers and punctuation (FF01-FF5E)
|
|
if 0xFF01 <= code <= 0xFF5E:
|
|
result.append(chr(code - 0xFEE0)) # Shift to ASCII range
|
|
else:
|
|
result.append(char)
|
|
return ''.join(result) |