def wordlist_docrequest(docpath, request_list): #filemd5 = md5(docpath) gettext = docsimlarity.fileio(docpath) gettext = docsimlarity.textread(gettext) request_list = docsimlarity.tf_text(gettext) #request_list["FileMD5"] = filemd5 return json.dumps(request_list)
def wordlist_train(dirpath, listall): list_scripts = os.listdir(dirpath) dictall_md5 = {} for sample in list_scripts: filemd5 = md5(dirpath + sample) gettext = docsimlarity.fileio(dirpath + sample) gettext = docsimlarity.textread(gettext) listall = docsimlarity.tf_text(gettext) dictall_md5[filemd5] = listall return json.dumps(dictall_md5)