def json_converter_2(): """ json_converter_2はTMI tree用 """ all_thompson_tree=return_range.load_all_thompson_tree(raw_doc_path_2); for key_index, key_1st in enumerate(all_thompson_tree): json_f_str={}; document=[]; parent_node=key_1st; class_training_stack=construct_class_training_1st(parent_node, all_thompson_tree); for label_description_tuple in class_training_stack: description=label_description_tuple[1]; s=tokenize.wordpunct_tokenize(description); document.append(s); json_f_str['labels']=[parent_node]; json_f_str['doc_str']=document; filename=parent_node+'.json'; with codecs.open(json_doc_path_2+filename,'w','utf-8') as f: json.dump(json_f_str,f,ensure_ascii='False',indent=4); """
for key_1st in all_thompson_tree: for key_2nd in all_thompson_tree[key_1st]: parent_node=key_2nd; sub_thompson_tree=all_thompson_tree[key_1st][key_2nd]; big_document_stack=construct_2nd_level(parent_node, sub_thompson_tree); parent_node=re.sub(ur'([A-Z]_\d+_\d+).+', r'\1', parent_node); filename=u'{}_level_{}'.format(parent_node, 2); tokens_s=cleanup_bigdocument_stack(filename, big_document_stack, stop); with codecs.open('./big_document/'+filename, 'w', 'utf-8') as f: json.dump(tokens_s, f, indent=4, ensure_ascii=False); #TODO 必要に応じて3層目を作成する elif mode=='class': training_map={}; feature_map={}; feature_max=0; num_of_training_instance=0; if level==1: construct_classifier_for_1st_layer(all_thompson_tree, stop, dutch) if __name__=='__main__': parser=argparse.ArgumentParser(description=''); parser.add_argument('-level', '--level', help='level which you want to construct big doc.', default=1) parser.add_argument('-mode', '--mode', help='classification problem(class) or big-document(big)', required=True); parser.add_argument('-stop', help='If added, stop words are eliminated from training file', action='store_true'); parser.add_argument('-dutch', help='If added, document from dutch folktale database is added to training corpus', action='store_true'); args=parser.parse_args(); dir_path='./parsed_json/' all_thompson_tree=return_range.load_all_thompson_tree(dir_path); result_stack=main(args.level, args.mode, all_thompson_tree, args.stop, args.dutch);
num_of_training_instance = 0 if level == 1: construct_classifier_for_1st_layer(all_thompson_tree, stop, dutch) if __name__ == '__main__': parser = argparse.ArgumentParser(description='') parser.add_argument('-level', '--level', help='level which you want to construct big doc.', default=1) parser.add_argument( '-mode', '--mode', help='classification problem(class) or big-document(big)', required=True) parser.add_argument( '-stop', help='If added, stop words are eliminated from training file', action='store_true') parser.add_argument( '-dutch', help= 'If added, document from dutch folktale database is added to training corpus', action='store_true') args = parser.parse_args() dir_path = './parsed_json/' all_thompson_tree = return_range.load_all_thompson_tree(dir_path) result_stack = main(args.level, args.mode, all_thompson_tree, args.stop, args.dutch)