A data preprocessing for some specific task.
pip install --upgrade git+'https://github.com/p4zaa/DataPrepKit.git'
from ThaiTextPrepKit import pretextkit as preprocess
from TabularPrepKit import tablekit as tablekit
df = df.with_columns(
pl.col('text_column')\
.map_batches(lambda text: preprocess.preprocess_text_polars(series=text,
keep_stopwords=False,
keep_format=True,
return_token_list=Fasle))\
.alias('preprocessed_text')
)