Exemplo n.º 1
0
def classify_text_files(files_root_path, result_path):
    count = 0
    flist = os.listdir(files_root_path)
    for f in flist:
        print '%s:%s' % (count, f)
        count += 1
        ftext = codecs.open('%s/%s' % (files_root_path, f), 'r').read()
        try:
            json_obj = json.loads(ftext)
            result = passage_first_level_classify(json_obj['post_content'])
        except Exception, e:  # 懒得差各种异常了,直接重复
            print e
            continue
        try:
            fout = codecs.open('%s/%s/%s' % (result_path, result, f), 'w')
        except Exception, e:
            print e
            os.mkdir('%s/%s' % (result_path, result))
            fout = codecs.open('%s/%s/%s' % (result_path, result, f), 'w')
Exemplo n.º 2
0
def classify_rawtext_files(files_root_path, result_path, pass_num=-1):
    count = 0
    flist = os.listdir(files_root_path)
    for f in flist:

        print '%s:%s' % (count, f)
        count += 1
        if count < pass_num:
            continue
        ftext = codecs.open(os.path.join(files_root_path, f), 'r').read()
        try:
            # json_obj = json.loads(ftext)
            result = passage_first_level_classify(ftext)
        except Exception, e:  # 懒得差各种异常了,直接重复
            print e
            continue
        try:
            fout = codecs.open(os.path.join(result_path, result, f), 'w')
        except Exception, e:
            print e
            os.mkdir(os.path.join(result_path, result))
            fout = codecs.open(os.path.join(result_path, result, f), 'w')