Functions | |
def | expand_abbreviations (text) |
def | expand_numbers (text) |
def | lowercase (text) |
def | collapse_whitespace (text) |
def | convert_to_ascii (text) |
def | basic_cleaners (text) |
def | transliteration_cleaners (text) |
def | english_cleaners (text) |
Variables | |
_whitespace_re = re.compile(r'\s+') | |
list | _abbreviations |
def text.cleaners.expand_abbreviations | ( | text | ) |
def text.cleaners.expand_numbers | ( | text | ) |
def text.cleaners.lowercase | ( | text | ) |
def text.cleaners.collapse_whitespace | ( | text | ) |
def text.cleaners.convert_to_ascii | ( | text | ) |
def text.cleaners.basic_cleaners | ( | text | ) |
Basic pipeline that lowercases and collapses whitespace without transliteration.
def text.cleaners.transliteration_cleaners | ( | text | ) |
Pipeline for non-English text that transliterates to ASCII.
def text.cleaners.english_cleaners | ( | text | ) |
Pipeline for English text, including number and abbreviation expansion.
|
private |
|
private |