Пример #1
0
    print("Test file:", test_file)
    print("Char emb:", char_emb)
    print("Bichar emb:", bichar_emb)
    print("Gaz file:", gaz_file)
    if status == 'train':
        print("Model saved to:", save_model_dir)
    # 立即把stdout缓存内容输出
    sys.stdout.flush()

    if status == 'train':
        data = Data()
        data.model_name = model_name
        data.HP_gpu = gpu
        data.use_bichar = conf_dict['use_bichar']
        data.HP_batch_size = conf_dict['HP_batch_size']  # 1
        data.HP_iteration = conf_dict['HP_iteration']  # 100
        data.HP_lr = conf_dict['HP_lr']  # 0.015
        data.HP_lr_decay = conf_dict['HP_lr_decay']  # 0.5
        data.HP_hidden_dim = conf_dict['HP_hidden_dim']
        data.MAX_SENTENCE_LENGTH = conf_dict['MAX_SENTENCE_LENGTH']
        data.HP_lstm_layer = conf_dict['HP_lstm_layer']
        data_initialization(data, gaz_file, train_file, dev_file, test_file)

        if data.model_name in ['CNN_model', 'LSTM_model']:
            data.generate_instance_with_gaz_2(train_file, 'train')
            data.generate_instance_with_gaz_2(dev_file, 'dev')
            data.generate_instance_with_gaz_2(test_file, 'test')
        elif data.model_name in ['WC-LSTM_model']:
            data.generate_instance_with_gaz_3(train_file, 'train')
            data.generate_instance_with_gaz_3(dev_file, 'dev')
            data.generate_instance_with_gaz_3(test_file, 'test')
Пример #2
0
            data.HP_batch_size = args.batch_size
            data.HP_iteration = args.num_iter
            data.label_comment = args.labelcomment
            data.result_file = args.resultfile
            data.HP_lr = args.lr
            data.use_bigram = args.use_biword
            data.HP_hidden_dim = args.hidden_dim
            data.HP_use_posi = args.use_posi
            data.HP_rethink_iter = args.rethink_iter

        else:
            data = Data()
            data.HP_gpu = gpu
            data.HP_batch_size = args.batch_size
            data.HP_num_layer = args.num_layer
            data.HP_iteration = args.num_iter
            data.use_bigram = args.use_biword
            data.gaz_dropout = 0.5
            data.norm_gaz_emb = False
            data.HP_fix_gaz_emb = False
            data.label_comment = args.labelcomment
            data.result_file = args.resultfile
            data.HP_lr = args.lr
            data.HP_hidden_dim = args.hidden_dim
            data.HP_use_posi = args.use_posi
            data.HP_rethink_iter = args.rethink_iter
            data_initialization(data, gaz_file, train_file, dev_file,
                                test_file)
            data.generate_instance_with_gaz(train_file, 'train')
            data.generate_instance_with_gaz(dev_file, 'dev')
            data.generate_instance_with_gaz(test_file, 'test')
Пример #3
0
        data = Data()
        data.HP_use_char = False
        data.use_bigram = True  # ner: False, cws: True
        data.gaz_dropout = args.gaz_dropout
        data.HP_lr = args.HP_lr  # cws
        data.HP_dropout = args.HP_dropout  # cws
        data.HP_use_glyph = args.HP_use_glyph
        data.HP_glyph_ratio = args.HP_glyph_ratio
        data.HP_font_channels = args.HP_font_channels
        data.HP_glyph_highway = args.HP_glyph_highway
        data.HP_glyph_embsize = args.HP_glyph_embsize
        data.HP_glyph_output_size = args.HP_glyph_output_size
        data.HP_glyph_dropout = args.HP_glyph_dropout
        data.HP_glyph_cnn_dropout = args.HP_glyph_cnn_dropout

        data.HP_iteration = 50  # cws
        data.norm_gaz_emb = True  # ner: False, cws: True

        data.HP_fix_gaz_emb = False
        data_initialization(data, gaz_file, train_file, dev_file, test_file)
        data.generate_instance_with_gaz(train_file, 'train')
        data.generate_instance_with_gaz(dev_file, 'dev')
        data.generate_instance_with_gaz(test_file, 'test')
        data.build_word_pretrain_emb(char_emb)
        data.build_biword_pretrain_emb(bichar_emb)
        data.build_gaz_pretrain_emb(gaz_file)
        torch.save(data, save_dir + '/data.set')
        data = torch.load(save_dir + '/data.set')
        train(data, save_dir)
    elif args.status == 'test':
        data = load_data_setting(args.loadmodel + '/data.set')
Пример #4
0
    dev_file = "data/Weibo/weiboNER.dev"
    test_file = "data/Weibo/weiboNER.test"

    word_emb_file = "data/gigaword_chn.all.a2b.uni.ite50.vec"
    print(train_file)
    data = Data()
    data.HP_gpu = False  #是否使用GPU
    data.norm_gaz_emb = False  #词向量是否归一化
    data.HP_fix_gaz_emb = True  #词向量表大小是否固定
    data.HP_bilstm = True
    data.random_seed = seed_num

    # 整体参数设定位置
    data.HP_lr = 0.01
    data.HP_lr_decay = 0.01
    data.HP_iteration = 150
    data.HP_batch_size = 20
    data.gaz_dropout = 0.4
    data.weight_decay = 0.00000005
    data.use_clip = False  #是否控制梯度
    data.HP_clip = 30  #最大梯度
    # LSTM参数
    data.HP_hidden_dim = 300
    data.HP_dropout = 0.7
    data_initialization(data, train_file, dev_file, test_file)
    data.build_word_pretrain_emb(word_emb_file)
    print('finish loading')
    data.generate_instance(train_file, 'train')
    print("train_file done")
    data.generate_instance(dev_file, 'dev')
    print("dev_file done")