def test_neural_language_model():

    home = os.path.expanduser('~')
    train_file_path = os.path.join(home,'Data/conll05/training-set.1')
    #train_file_path = os.path.join(home,'Data/conll05/dev-set.1')
    valid_file_path = os.path.join(home,'Data/conll05/dev-set.1')

    train_corpora = Conll05Corpora()
    train_corpora.load(train_file_path,2)

    valid_corpora = Conll05Corpora()
    valid_corpora.load(valid_file_path,2)

    window_size = 11
    train_problem = ChunkProblem(train_corpora,window_size)
    valid_problem = ChunkProblem(valid_corpora,window_size)

    problem_character = train_problem.get_problem_property()


    X_train, y_train = train_problem.get_data_batch()

    X_valid, y_valid = valid_problem.get_data_batch()

    print 'train X shape',X_train.shape
    print 'train y shape',y_train.shape
    print 'valid X shape',X_valid.shape
    print 'valid y shape',y_valid.shape

    rng = numpy.random.RandomState(1234)

    params = dict()
    params['word_num'] = problem_character['word_num']
    params['window_size'] = window_size
    params['feature_num'] = 50
    params['hidden_layer_size'] = 300
    params['n_outs'] = problem_character['CHUNKING_type_num']
    params['L1_reg'] = 0
    params['L2_reg'] = 0.0001

    print params

    #model = WordLevelNeuralModel(word_num = corpora.get_word_num(), window_size = 11, feature_num = 100,
    #             hidden_layer_size = 1000, n_outs = problem.get_class_num(), L1_reg = 0.00, L2_reg = 0.0001,
    #             numpy_rng= rng)

    model_name = 'chunk'
    load = False
    dump = False
    model_folder = '/home/kingsfield/workspace/knowledge.py'
    init_model_name = None
    model = WordLevelNeuralModel(model_name,load,dump,model_folder,init_model_name,rng, **params)

    model.fit(X_train,y_train, X_valid, y_valid)
Exemplo n.º 2
0
def test_neural_language_model():

    corpora = Corpora()
    corpora.load_nltk_conll2000()

    problem = PosTagProblem(corpora)

    X, y = problem.get_data_set(window_size=11)

    X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(X, y, test_size=0.2, random_state=0)

    X_train, X_valid, y_train, y_valid = sklearn.cross_validation.train_test_split(
        X_train, y_train, test_size=0.2, random_state=0
    )

    rng = numpy.random.RandomState(1234)

    print >>sys.stderr, "Problem Size: ", X.shape

    print >>sys.stderr, "word num = ", corpora.get_word_num()

    print >>sys.stderr, "x_train.shape ", X_train.shape
    print >>sys.stderr, X_train

    # print >> sys.stderr, " y_valid shape", y_valid.shape
    # print >> sys.stderr, y_valid
    params = dict()
    params["word_num"] = corpora.get_word_num()
    params["window_size"] = 11
    params["feature_num"] = 50
    params["hidden_layer_size"] = 300
    params["n_outs"] = problem.get_class_num()
    params["L1_reg"] = 0
    params["L2_reg"] = 0.0001

    # model = WordLevelNeuralModel(word_num = corpora.get_word_num(), window_size = 11, feature_num = 100,
    #             hidden_layer_size = 1000, n_outs = problem.get_class_num(), L1_reg = 0.00, L2_reg = 0.0001,
    #             numpy_rng= rng)

    model_name = "pos"
    load = False
    dump = True
    model_folder = "/home/kingsfield/workspace/knowledge.py"
    init_model_name = None
    model = WordLevelNeuralModel(model_name, load, dump, model_folder, init_model_name, numpy_rng=rng, **params)

    model.fit(X_train, y_train, X_valid, y_valid)