def classify_text_files(files_root_path, result_path): count = 0 flist = os.listdir(files_root_path) for f in flist: print '%s:%s' % (count, f) count += 1 ftext = codecs.open('%s/%s' % (files_root_path, f), 'r').read() try: json_obj = json.loads(ftext) result = passage_first_level_classify(json_obj['post_content']) except Exception, e: # 懒得差各种异常了,直接重复 print e continue try: fout = codecs.open('%s/%s/%s' % (result_path, result, f), 'w') except Exception, e: print e os.mkdir('%s/%s' % (result_path, result)) fout = codecs.open('%s/%s/%s' % (result_path, result, f), 'w')
def classify_rawtext_files(files_root_path, result_path, pass_num=-1): count = 0 flist = os.listdir(files_root_path) for f in flist: print '%s:%s' % (count, f) count += 1 if count < pass_num: continue ftext = codecs.open(os.path.join(files_root_path, f), 'r').read() try: # json_obj = json.loads(ftext) result = passage_first_level_classify(ftext) except Exception, e: # 懒得差各种异常了,直接重复 print e continue try: fout = codecs.open(os.path.join(result_path, result, f), 'w') except Exception, e: print e os.mkdir(os.path.join(result_path, result)) fout = codecs.open(os.path.join(result_path, result, f), 'w')