示例#1
0
    print ("Seg: ", seg)
    print ("Train file:", train_file)
    print ("Dev file:", dev_file)
    print ("Test file:", test_file)
    print ("Raw file:", raw_file)
    print ("Char emb:", char_emb)
    print ("Bichar emb:", bichar_emb)
    print ("Gaz file:",gaz_file)
    if status == 'train':
        print ("Model saved to:", save_model_dir)
    sys.stdout.flush()
    
    if status == 'train':
        data = Data()
        data.HP_gpu = gpu
        data.HP_use_char = False
        data.HP_batch_size = 10
        data.use_bigram = False
        data.gaz_dropout = 0.5
        data.norm_gaz_emb = False
        data.HP_fix_gaz_emb = False
        data_initialization(data, gaz_file, train_file, dev_file, test_file)

        data.generate_instance_with_gaz(train_file,'train')
        data.generate_instance_with_gaz(dev_file,'dev')
        data.generate_instance_with_gaz(test_file,'test')

        data.build_word_pretrain_emb(char_emb)
        data.build_biword_pretrain_emb(bichar_emb)
        data.build_gaz_pretrain_emb(gaz_file)
        train(data, save_model_dir,dset_dir, seg)
示例#2
0
    print "Seg: ", seg
    print "Train file:", train_file
    print "Dev file:", dev_file
    print "Test file:", test_file
    print "Raw file:", raw_file
    print "Char emb:", char_emb
    print "Bichar emb:", bichar_emb
    print "Gaz file:", gaz_file
    if status == 'train':
        print "Model saved to:", save_model_dir
    sys.stdout.flush()

    if status == 'train':
        data = Data()
        data.HP_gpu = gpu
        data.HP_use_char = False
        data.HP_batch_size = 1
        data.use_bigram = False
        data.gaz_dropout = 0.5
        data.norm_gaz_emb = False
        data.HP_fix_gaz_emb = False
        data_initialization(data, gaz_file, train_file, dev_file, test_file)
        data.generate_instance_with_gaz(train_file, 'train')
        data.generate_instance_with_gaz(dev_file, 'dev')
        data.generate_instance_with_gaz(test_file, 'test')
        data.build_word_pretrain_emb(char_emb)
        data.build_biword_pretrain_emb(bichar_emb)
        data.build_gaz_pretrain_emb(gaz_file)
        train(data, save_model_dir, seg)
    elif status == 'test':
        data = load_data_setting(dset_dir)
示例#3
0
     data.HP_batch_size = args.batch_size
     data.HP_iteration = args.num_iter
     data.label_comment = args.labelcomment
     data.result_file = args.resultfile
     data.HP_lr = args.lr
     data.use_bigram = args.use_biword
     data.HP_use_char = args.use_char
     data.HP_hidden_dim = args.hidden_dim
     data.HP_dropout = args.drop
     data.HP_use_count = args.use_count
     data.model_type = args.model_type
     data.use_bert = args.use_bert
 else:
     data = Data()
     data.HP_gpu = gpu
     data.HP_use_char = args.use_char
     data.HP_batch_size = args.batch_size
     data.HP_num_layer = args.num_layer
     data.HP_iteration = args.num_iter
     data.use_bigram = args.use_biword
     data.HP_dropout = args.drop
     data.norm_gaz_emb = False
     data.HP_fix_gaz_emb = False
     data.label_comment = args.labelcomment
     data.result_file = args.resultfile
     data.HP_lr = args.lr
     data.HP_hidden_dim = args.hidden_dim
     data.HP_use_count = args.use_count
     data.model_type = args.model_type
     data.use_bert = args.use_bert
     data_initialization(data, gaz_file, train_file, dev_file,
示例#4
0
文件: main.py 项目: catnlp/CatNER
            # emb_file = "../data/gigaword_chn.all.a2b.uni.ite50.vec"
            emb_file = "../data/joint4.all.b10c1.2h.iter17.mchar"   ### catner
        else:
            emb_file = None
        char_emb_file = args.charemb.lower()
        print "Char Embedding:", char_emb_file
        if char_emb_file == "rich":
            char_emb_file = "../data/joint4.all.b10c1.2h.iter17.mchar"  ### catner
        elif char_emb_file == "normal":
            char_emb_file = "../data/gigaword_chn.all.a2b.uni.ite50.vec"  ### catner

        data = Data()
        data.number_normalized = True
        data_initialization(data, train_file, dev_file, test_file)
        data.HP_gpu = gpu
        data.HP_use_char = True
        data.HP_batch_size = 10  ## catner
        data.HP_lr = 0.015
        # data.char_features = "CNN"
        data.generate_instance(train_file,'train')
        data.generate_instance(dev_file,'dev')
        data.generate_instance(test_file,'test')
        if emb_file:
            print "load word emb file... norm:", data.norm_word_emb
            data.build_word_pretrain_emb(emb_file)
        if char_emb_file != "none":
            print "load char emb file... norm:", data.norm_char_emb
            data.build_char_pretrain_emb(char_emb_file)
        train(data, save_model_dir, seg)
    elif status == 'test':      
        data = load_data_setting(dset_dir)
示例#5
0
# -*- coding: utf-8 -*-
# @Author: Jie
# @Date:   2017-06-15 14:11:08
# @Last Modified by:   Jie Yang,     Contact: [email protected]
# @Last Modified time: 2018-07-06 11:08:27
import time
import sys
import argparse
import random
import copy
import torch
import gc
import pickle as pickle
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from utils.metric import get_ner_fmeasure
from model.bilstmcrf import BiLSTM_CRF as SeqModel
from utils.data import Data
seed_num = 100
random.seed(seed_num)
torch.manual_seed(seed_num)
np.random.seed(seed_num)

def data_initialization(data, gaz_file, train_file, dev_file, test_file):
    data.build_alphabet(train_file)
    data.build_alphabet(dev_file)
    data.build_alphabet(test_file)