Python clean_text示例

编程语言: Python

命名空间/包名称: clean

方法/功能: clean_text

hotexamples.com的示例: 7

Python clean_text - 已找到7个示例。这些是从开源项目中提取的最受好评的clean.clean_text现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： Reference.py 项目： deadlift1226/wfa-book-of-mormon

def make_references(data):
    references = []

    for i in data:
        ref = i.get('reference')
        text = i.get('text')
        clean = clean_text(text)
        
        R = Reference(ref, clean)
    
        references.append(R)
    
    return references

示例#2

显示文件

文件： calcular_tfidf.py 项目： JuanBojorquez/Practica1

def carga_textos(folder,termina):
  try:
    lista_textos=[]#lista de contenidos
    lista_archivos=os.listdir(folder)
    lista_txt=[archivo for archivo in lista_archivos if archivo.endswith(termina)]
    #a leer!
    for archivo in lista_txt:
      texto=lector.leer_archivo(os.path.join(folder,archivo))
      texto_limpio=clean.clean_text(texto)
      lista_textos.append(texto_limpio)
  except IOError as e:
    pirnt(e)
    lista_textos=[]
  return lista_textos

示例#3

显示文件

文件： spacy_module.py 项目： zachhartzog/nlp-relationship-extraction

 def get_nlp(self, text, display = False):
     clean_text = clean.clean_text(text)
     doc = self.nlp(clean_text)
     if display:
         displacy.serve(doc, style="dep")
     return doc

示例#4

显示文件

文件： data.py 项目： Deepayan137/spell-check

 def _clean_(self, text):
     return clean_text(text)

示例#5

显示文件


def read_datapoints(FILE_PATH: str) -> List[Dict]:
    with open(FILE_PATH) as f:
        reader = csv.DictReader(f,
                                delimiter='\t',
                                fieldnames=['target', 'sms'])

        return [row for row in reader]


if __name__ == "__main__":
    args = read_args()
    data = read_datapoints(args.data_path)
    df = pd.DataFrame(data)

    df['sms'] = df['sms'].apply(lambda x: clean_text(x))
    df['sms_length'] = df['sms'].apply(len)
    df['target'] = df['target'].replace({'ham': True, 'spam': False})

    df["kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)

    kf = StratifiedKFold(n_splits=5)

    for f, (t_, v_) in enumerate(kf.split(X=df, y=df.target)):
        df.loc[v_, 'kfold'] = f

    df.to_csv(os.path.join(args.output_dir, 'cleaned_data.csv'),
              index=False,
              sep='\t')

示例#6

显示文件

文件： prompt.py 项目： milesmcc/StoryCleaner

import clean

articles = []

def give_prompt():
    print("----------")
    print("Currently stored: %s\nPlease enter each article below, followed by 'end'. Enter 'stop' when finished.\n" % len(articles))

recent_inputs = []

give_prompt()
while True:
    text_input = input("> ") + "\n"
    if text_input.lower() == "save\n":
        with open("output.txt", "w") as outfile:
            for i in range(len(articles)):
                outfile.write("ARTICLE %s of %s -- Ordering: ___ / %s\n\n" % (i, len(articles), len(articles)))
                outfile.write(articles[i])
                outfile.write("\n----------\n\n")
        break
    if text_input.lower() == "end\n":
        article = "".join(recent_inputs)
        cleaned = clean.clean_text(article)
        print(cleaned)
        articles.append(cleaned)
        recent_inputs = []
        give_prompt()
    else:
        recent_inputs.append(text_input)

示例#7

显示文件

def normalize(file_text):
    file_text = clean_html_tags(file_text)
    file_text = clean_text(file_text)
    # file_text = normalize_number(file_text)
    file_text = neologdn.normalize(file_text)
    return file_text