Exemplos de load_dataset em Python, exemplos de data_helper.load_dataset em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: train.py Projeto: cmfyj/Some-deep-model-for-text-classification

#其它参数
tf.flags.DEFINE_boolean('log_device_placement', False,
                        'log placement of ops on devices')  #是否打印设备分配的日志
tf.flags.DEFINE_boolean('allow_soft_placement', True,
                        'allow TF soft placement')  #如果指定的设备不存在，允许TF自动分配设备

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print('all related parameters in :')
for attr, value in sorted(FLAGS.__flags.items()):
    print('{}={}'.format(attr.upper(), value))

print('参数打印完毕.....')

#加载数据
train_x, train_y, dev_x, dev_y = data_helper.load_dataset(FLAGS.raw_file)
print('load data finished!')

with tf.Session() as sess:

    han = HAN_model.HAN(FLAGS.vocab_size, FLAGS.num_classes,
                        FLAGS.embedding_size, FLAGS.hidden_size)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=han.input_y,
                                                    logits=han.out,
                                                    name='loss'))

    with tf.name_scope('accuracy'):
        predict = tf.argmax(han.out, axis=1, name='predict')

Exemplo n.º 2

0

Exibir arquivo

Arquivo: train.py Projeto: VincentGaoHJ/Text-Classification

tf.flags.DEFINE_integer("max_sent_in_doc", 30,
                        "Number of checkpoints to store (default: 5)")
tf.flags.DEFINE_integer("max_word_in_sent", 30,
                        "Number of checkpoints to store (default: 5)")
tf.flags.DEFINE_integer("evaluate_every", 300,
                        "evaluate every this many batches")
tf.flags.DEFINE_float("learning_rate", 0.001, "learning rate")
tf.flags.DEFINE_float("grad_clip", 5, "grad clip to prevent gradient explode")

FLAGS = tf.flags.FLAGS

print(FLAGS.max_sent_in_doc)
print(FLAGS.max_word_in_sent)

train_x, train_y, dev_x, dev_y, vocab = load_dataset(FLAGS.yelp_json_path,
                                                     FLAGS.labels_json_path,
                                                     FLAGS.max_sent_in_doc,
                                                     FLAGS.max_word_in_sent)
print("data load finished")

#print(train_x)

with tf.Session() as sess:
    han = HAN(vocab_size=FLAGS.vocab_size,
              num_classes=FLAGS.num_classes,
              embedding_size=FLAGS.embedding_size,
              hidden_size=FLAGS.hidden_size)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=han.input_y,
                                                    logits=han.out,

Exemplo n.º 3

0

Exibir arquivo

tf.flags.DEFINE_float("learning_rate", 1e-2, "Starter Learning Rate (default: 1e-3)")
tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 120, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_boolean("enable_moving_average", False, "Enable usage of Exponential Moving Average (default: False)")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("Parameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr, value))
print("")

# Loading database here
print("Loading database...")
x_train, y_train, x_test, y_test = data_helper.load_dataset(FLAGS.database_path, zca_whitening=FLAGS.zca_whitening)
num_batches_per_epoch = int((len(x_train)-1)/FLAGS.batch_size) + 1
print("Shape:",x_train.shape, y_train.shape, x_test.shape, y_test.shape)
print("Success!")

sess = tf.Session()
cnn = CNN()

# Optimizer and LR Decay
#update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#with tf.control_dependencies(update_ops):
global_step = tf.Variable(0, name="global_step", trainable=False)
learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True)
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
#lr_decay_fn = lambda lr, global_step : tf.train.exponential_decay(lr, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True)
#train_op = tf.contrib.layers.optimize_loss(loss=cnn.loss, global_step=global_step, clip_gradients=4.0,

Exemplo n.º 4

0

Exibir arquivo

Arquivo: train.py Projeto: ZDstandup/ai_smp

tf.flags.DEFINE_boolean('allow_soft_placement',True,'allow TF soft placement')          #如果指定的设备不存在，允许TF自动分配设备


FLAGS=tf.flags.FLAGS
FLAGS._parse_flags()
print ('all related parameters in :')
for attr,value in sorted(FLAGS.__flags.items()):
    print ('{}={}'.format(attr.upper(),value))


print ('参数打印完毕.....')


#加载数据

train_x,train_y=data_helper.load_dataset(fullfile='../text_for_word2vec.txt',trainOrtest='train',trainOrtestFile='../smp_train.txt',max_sent_in_doc=20,max_word_in_sent=20,vocab_path='smp_contest_vocab.pk')
test_x,test_y=data_helper.load_dataset(fullfile='../text_for_word2vec.txt',trainOrtest='test',trainOrtestFile='../smp_test.txt',max_sent_in_doc=20,max_word_in_sent=20,vocab_path='smp_contest_vocab.pk')



dev_x=test_x
dev_y=test_y





print ('load data finished!')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: train.py Projeto: davidie/HAN-text-classification

tf.flags.DEFINE_integer("max_sent_in_doc", 10,
                        "Number of checkpoints to store (default: 5)")
tf.flags.DEFINE_integer("max_word_in_sent", 20,
                        "Number of checkpoints to store (default: 5)")
tf.flags.DEFINE_float("lr", 0.01, "learning rate")
tf.flags.DEFINE_float("grad_clip", 5, "grad clip to prevent gradient explode")
tf.flags.DEFINE_float("lr_decay", 0.5, "learning rate decay (default: 0.5)")
tf.flags.DEFINE_float("nepoch_no_imprv", 3, "early stopping (default: 5)")
tf.flags.DEFINE_float("nepoch_lr_decay", 2,
                      "decay of lr if no improvement (default: 3)")
tf.flags.DEFINE_string("dir_model", "models",
                       "path to save model files (default: word_char_models)")

FLAGS = tf.flags.FLAGS

train_x, train_y, dev_x, dev_y, _vocab_size = load_dataset(
    FLAGS.input_path, FLAGS.max_sent_in_doc, FLAGS.max_word_in_sent)
print "training samples: %d" % train_x.shape[0]
print "dev samples: %d" % dev_x.shape[0]
print "data load finished"

with tf.Session() as sess:
    han = HAN(vocab_size=_vocab_size,
              num_classes=FLAGS.num_classes,
              embedding_size=FLAGS.embedding_size,
              hidden_size=FLAGS.hidden_size)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=han.input_y,
                                                    logits=han.out,
                                                    name='loss'))

Exemplo n.º 6

0

Exibir arquivo

Arquivo: train_test.py Projeto: chenyang-charles/Affect-In-Tweets

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn import linear_model
from sklearn import neighbors
import xgboost as xgb

tasks = ['anger', 'fear', 'joy', 'sadness']
gensim_model = None

for task in tasks:
    print 'Running for task', task
    # load or create your dataset
    print('Load data...')
    X_train, y_train, train_id, train_raw, gensim_model = data_helper.load_dataset(
        'train', task, gensim_model)
    X_test, y_test, test_id, test_raw, gensim_model = data_helper.load_dataset(
        'test', task, gensim_model)

    #--------------------------------- Lightgbm--------------------------------------

    # create dataset for lightgbm
    #print X_train, y_train
    #print type(X_train), type(y_train)
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

    # specify your configurations as a dict
    '''
    params = {
        'task': 'train',

Exemplo n.º 7

0

Exibir arquivo

max_acc_test = 0
max_Avg_prec_train = 0
max_Avg_prec_test = 0
max_one_err_train = 100
max_one_err_test = 100
count_TaxoRead = 0

t_set = [0.004]
p_set = [0.2]

for t in t_set:
    for p in p_set:
        path_A_matrix = 'A_matrix_%s_%s.npy' % (str(t), str(p))
        A_matrix = np.load(path_A_matrix)
        train_x, dev_x, length, vocab, train_y, dev_y, train_y_8, dev_y_8 = load_dataset(
            FLAGS.yelp_json_path, FLAGS.max_sent_in_doc,
            FLAGS.max_word_in_sent)
        Y = train_y
        Y1 = dev_y
        Y_8 = train_y_8
        Y1_8 = dev_y_8
        N = FLAGS.train_batch_size
        print("data load finished")
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        save_file = './data_A/checkpoint_dir/model_shuffle_%s_%s.ckpt' % (
            str(t), str(p))
        tf.reset_default_graph()
        count = 0
        count_train = 0
        count_dev = 0

Exemplo n.º 8

0

Exibir arquivo

Arquivo: train.py Projeto: udianand/Very-Deep-CNN-for-text-classification-and-sentiment-analysis

                        "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer(
    "evaluate_every", 50,
    "Evaluate model on dev set after this many steps (default: 50)")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("Parameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr, value))
print("")

# Data Preparation
# Load data
print("Loading data...")
train_data, train_label, test_data, test_label = data_helper.load_dataset(
    FLAGS.database_path)
print("Loading data succees...")

# ConvNet
acc_list = [0]
sess = tf.Session()
cnn = VDCNN(num_classes=len(train_label[0]),
            l2_reg_lambda=FLAGS.l2_reg_lambda,
            sequence_max_length=FLAGS.sequence_max_length,
            num_quantized_chars=69,
            embedding_size=16,
            use_k_max_pooling=FLAGS.use_k_max_pooling)

# Optimizer and LR Decay
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):

Exemplo n.º 9

0

Exibir arquivo

tf.flags.DEFINE_integer("sequence_length", 198, "max sentence length")
tf.flags.DEFINE_integer("embed_size", 100, "embedding size")
tf.flags.DEFINE_boolean("is_training", True,
                        "is traning.true:tranining,false:testing/inference")
tf.flags.DEFINE_integer("num_epochs", 60, "number of epochs to run.")
tf.flags.DEFINE_integer("evaluation_every", 100,
                        "Validate every validate_every epochs.")  #每10轮做一次验证
tf.flags.DEFINE_integer('checkpoint_every', 100,
                        'save the model after this many steps default:100')
tf.flags.DEFINE_integer('num_checkpoints', 5, 'num of model saving')
tf.flags.DEFINE_integer('dropout_keep_prob', 0.5, 'the dropout prob')
tf.flags.DEFINE_boolean("use_embedding", False,
                        "whether to use embedding or not.")

#load data
train_x_text, train_y = data_helper.load_dataset(FLAGS.raw_train_file)
test_x_text, test_y = data_helper.load_dataset(FLAGS.raw_test_file)
all_x_text = train_x_text + test_x_text

#build vocabulary
max_document_length = max([len(x.split(' ')) for x in all_x_text])  #198有点长
vocab_processor = learn.preprocessing.VocabularyProcessor(
    max_document_length=max_document_length, min_frequency=3)

train_x = np.array(list(vocab_processor.fit_transform(train_x_text)))
text_x = np.array(list(vocab_processor.fit_transform(test_x_text)))

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print('all related parameters in RCNN:')
for attr, value in sorted(FLAGS.__flags.items()):