Exemplo n.º 1
0
    dataname = 'cckscner.user.data.SensitiV'

    user_datafile = "./model/model_data/" + dataname + ".pkl"
    batch_size = 8

    data_split = 1

    retrain = False
    Test = True
    valid = False
    Label = True
    if not os.path.exists(user_datafile):
        print("Process data....")
        get_data(trainfile=trainfile, testfile=testfile,
                 w2v_file=word2v_file, c2v_file=char2v_file,
                 base_datafile=base_datafile, user_datafile=user_datafile,
                 w2v_k=300, c2v_k=100,
                 data_split=data_split, maxlen=50)

    print("data has extisted: " + user_datafile)
    print('loading base data ...')
    char_vob, target_vob, \
    idex_2char, idex_2target, \
    char_W, \
    char_k, \
    max_s = pickle.load(open(base_datafile, 'rb'))
    print('loading user data ...')
    train, train_SensitiV, train_label,\
    test, test_SensitiV, test_label = pickle.load(open(user_datafile, 'rb'))

    trainx_char = np.asarray(train, dtype="int32")
Exemplo n.º 2
0
    testfile = "./data/test.txt"

    char2v_file = "./data/CCKS18CNER_Char2Vec.txt"
    word2v_file = "./data/CCKS18CNER_Word2Vec.txt"
    datafile = "./data/model/data5.pkl"
    modelfile = "./data/model/model_char_word_CRF51.h5"
    resultdir = "./data/result/"

    batch_size = 32
    retrain = True
    Test = True
    valid = False
    Label = True
    if not os.path.exists(datafile):
        print("Precess data....")
        get_data(trainfile=trainfile, testfile=testfile, w2v_file=word2v_file, char2v_file=char2v_file, datafile=datafile, w2v_k=100, char_emd_dim=100, maxlen=50)

    if not os.path.exists(modelfile):
        print("Lstm data has extisted: " + datafile)
        print("Training EE model....")
        print(modelfile)
        train_e2e_model(modelname, datafile, modelfile, resultdir,
                        npochos=100, hidden_dim=200, batch_size=batch_size, retrain=False)
    else:
        if retrain:
            print("ReTraining EE model....")
            train_e2e_model(modelname, datafile, modelfile, resultdir,
                            npochos=100, hidden_dim=200, batch_size=batch_size, retrain=retrain)

    if Test:
        print("test EE model....")
Exemplo n.º 3
0
    # word2v_file = ""
    # dataname = 'MSRA.data_WSUI.2'
    # datafile = "./data/model_data/" + dataname + ".pkl"
    # batch_size = 32
    # # ------------------2

    retrain = False
    Test = True
    valid = False
    Label = True
    if not os.path.exists(datafile):
        print("Process data....")
        get_data(trainfile=trainfile,
                 testfile=testfile,
                 w2v_file='',
                 c2v_file=char2v_file,
                 datafile=datafile,
                 w2v_k=300,
                 c2v_k=100,
                 maxlen=50)

    print("data has extisted: " + datafile)
    print('loading data ...')
    train, train_label, test, test_label,\
    train_posi, test_posi, train_word, test_word,\
    char_vob, target_vob, posi_vob, word_vob,\
    idex_2char, idex_2target,\
    character_W, posi_W, word_W,\
    character_k, posi_k, word_k, max_s = pickle.load(open(datafile, 'rb'))

    trainx_char = np.asarray(train, dtype="int32")
    trainy = np.asarray(train_label, dtype="int32")