def json_converter_2():
    """
    json_converter_2はTMI tree用    
    """            
    all_thompson_tree=return_range.load_all_thompson_tree(raw_doc_path_2);

    for key_index, key_1st in enumerate(all_thompson_tree):
        json_f_str={};        
        document=[];
        parent_node=key_1st;

        class_training_stack=construct_class_training_1st(parent_node, all_thompson_tree);
        for label_description_tuple in class_training_stack:
            description=label_description_tuple[1];
            s=tokenize.wordpunct_tokenize(description);
            document.append(s);
        
        json_f_str['labels']=[parent_node];
        json_f_str['doc_str']=document;

        filename=parent_node+'.json';
        with codecs.open(json_doc_path_2+filename,'w','utf-8') as f:
            json.dump(json_f_str,f,ensure_ascii='False',indent=4);
        """        
            for key_1st in all_thompson_tree:
                for key_2nd in all_thompson_tree[key_1st]:
                    parent_node=key_2nd;
                    sub_thompson_tree=all_thompson_tree[key_1st][key_2nd];
                    big_document_stack=construct_2nd_level(parent_node, sub_thompson_tree); 
                    parent_node=re.sub(ur'([A-Z]_\d+_\d+).+', r'\1', parent_node);
                    filename=u'{}_level_{}'.format(parent_node, 2);
                    tokens_s=cleanup_bigdocument_stack(filename, big_document_stack, stop);
                    with codecs.open('./big_document/'+filename, 'w', 'utf-8') as f:
                        json.dump(tokens_s, f, indent=4, ensure_ascii=False);
        #TODO 必要に応じて3層目を作成する
    
    elif mode=='class':
        training_map={};
        feature_map={};
        feature_max=0;
        num_of_training_instance=0;
        if level==1:
            construct_classifier_for_1st_layer(all_thompson_tree, stop, dutch)

if __name__=='__main__':
    parser=argparse.ArgumentParser(description='');
    parser.add_argument('-level', '--level', help='level which you want to construct big doc.', default=1)
    parser.add_argument('-mode', '--mode', help='classification problem(class) or big-document(big)', required=True);
    parser.add_argument('-stop', help='If added, stop words are eliminated from training file', action='store_true');
    parser.add_argument('-dutch', help='If added, document from dutch folktale database is added to training corpus', action='store_true');
    args=parser.parse_args();
    dir_path='./parsed_json/'
    all_thompson_tree=return_range.load_all_thompson_tree(dir_path);
    result_stack=main(args.level, args.mode, all_thompson_tree, args.stop, args.dutch);
Exemplo n.º 3
0
        num_of_training_instance = 0
        if level == 1:
            construct_classifier_for_1st_layer(all_thompson_tree, stop, dutch)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-level',
                        '--level',
                        help='level which you want to construct big doc.',
                        default=1)
    parser.add_argument(
        '-mode',
        '--mode',
        help='classification problem(class) or big-document(big)',
        required=True)
    parser.add_argument(
        '-stop',
        help='If added, stop words are eliminated from training file',
        action='store_true')
    parser.add_argument(
        '-dutch',
        help=
        'If added, document from dutch folktale database is added to training corpus',
        action='store_true')
    args = parser.parse_args()
    dir_path = './parsed_json/'
    all_thompson_tree = return_range.load_all_thompson_tree(dir_path)
    result_stack = main(args.level, args.mode, all_thompson_tree, args.stop,
                        args.dutch)