#其它参数 tf.flags.DEFINE_boolean('log_device_placement', False, 'log placement of ops on devices') #是否打印设备分配的日志 tf.flags.DEFINE_boolean('allow_soft_placement', True, 'allow TF soft placement') #如果指定的设备不存在,允许TF自动分配设备 FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print('all related parameters in :') for attr, value in sorted(FLAGS.__flags.items()): print('{}={}'.format(attr.upper(), value)) print('参数打印完毕.....') #加载数据 train_x, train_y, dev_x, dev_y = data_helper.load_dataset(FLAGS.raw_file) print('load data finished!') with tf.Session() as sess: han = HAN_model.HAN(FLAGS.vocab_size, FLAGS.num_classes, FLAGS.embedding_size, FLAGS.hidden_size) with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=han.input_y, logits=han.out, name='loss')) with tf.name_scope('accuracy'): predict = tf.argmax(han.out, axis=1, name='predict')
tf.flags.DEFINE_integer("max_sent_in_doc", 30, "Number of checkpoints to store (default: 5)") tf.flags.DEFINE_integer("max_word_in_sent", 30, "Number of checkpoints to store (default: 5)") tf.flags.DEFINE_integer("evaluate_every", 300, "evaluate every this many batches") tf.flags.DEFINE_float("learning_rate", 0.001, "learning rate") tf.flags.DEFINE_float("grad_clip", 5, "grad clip to prevent gradient explode") FLAGS = tf.flags.FLAGS print(FLAGS.max_sent_in_doc) print(FLAGS.max_word_in_sent) train_x, train_y, dev_x, dev_y, vocab = load_dataset(FLAGS.yelp_json_path, FLAGS.labels_json_path, FLAGS.max_sent_in_doc, FLAGS.max_word_in_sent) print("data load finished") #print(train_x) with tf.Session() as sess: han = HAN(vocab_size=FLAGS.vocab_size, num_classes=FLAGS.num_classes, embedding_size=FLAGS.embedding_size, hidden_size=FLAGS.hidden_size) with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=han.input_y, logits=han.out,
tf.flags.DEFINE_float("learning_rate", 1e-2, "Starter Learning Rate (default: 1e-3)") tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)") tf.flags.DEFINE_integer("num_epochs", 120, "Number of training epochs (default: 200)") tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)") tf.flags.DEFINE_boolean("enable_moving_average", False, "Enable usage of Exponential Moving Average (default: False)") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("Parameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr, value)) print("") # Loading database here print("Loading database...") x_train, y_train, x_test, y_test = data_helper.load_dataset(FLAGS.database_path, zca_whitening=FLAGS.zca_whitening) num_batches_per_epoch = int((len(x_train)-1)/FLAGS.batch_size) + 1 print("Shape:",x_train.shape, y_train.shape, x_test.shape, y_test.shape) print("Success!") sess = tf.Session() cnn = CNN() # Optimizer and LR Decay #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #with tf.control_dependencies(update_ops): global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) #lr_decay_fn = lambda lr, global_step : tf.train.exponential_decay(lr, global_step, FLAGS.num_epochs*num_batches_per_epoch, 0.95, staircase=True) #train_op = tf.contrib.layers.optimize_loss(loss=cnn.loss, global_step=global_step, clip_gradients=4.0,
tf.flags.DEFINE_boolean('allow_soft_placement',True,'allow TF soft placement') #如果指定的设备不存在,允许TF自动分配设备 FLAGS=tf.flags.FLAGS FLAGS._parse_flags() print ('all related parameters in :') for attr,value in sorted(FLAGS.__flags.items()): print ('{}={}'.format(attr.upper(),value)) print ('参数打印完毕.....') #加载数据 train_x,train_y=data_helper.load_dataset(fullfile='../text_for_word2vec.txt',trainOrtest='train',trainOrtestFile='../smp_train.txt',max_sent_in_doc=20,max_word_in_sent=20,vocab_path='smp_contest_vocab.pk') test_x,test_y=data_helper.load_dataset(fullfile='../text_for_word2vec.txt',trainOrtest='test',trainOrtestFile='../smp_test.txt',max_sent_in_doc=20,max_word_in_sent=20,vocab_path='smp_contest_vocab.pk') dev_x=test_x dev_y=test_y print ('load data finished!')
tf.flags.DEFINE_integer("max_sent_in_doc", 10, "Number of checkpoints to store (default: 5)") tf.flags.DEFINE_integer("max_word_in_sent", 20, "Number of checkpoints to store (default: 5)") tf.flags.DEFINE_float("lr", 0.01, "learning rate") tf.flags.DEFINE_float("grad_clip", 5, "grad clip to prevent gradient explode") tf.flags.DEFINE_float("lr_decay", 0.5, "learning rate decay (default: 0.5)") tf.flags.DEFINE_float("nepoch_no_imprv", 3, "early stopping (default: 5)") tf.flags.DEFINE_float("nepoch_lr_decay", 2, "decay of lr if no improvement (default: 3)") tf.flags.DEFINE_string("dir_model", "models", "path to save model files (default: word_char_models)") FLAGS = tf.flags.FLAGS train_x, train_y, dev_x, dev_y, _vocab_size = load_dataset( FLAGS.input_path, FLAGS.max_sent_in_doc, FLAGS.max_word_in_sent) print "training samples: %d" % train_x.shape[0] print "dev samples: %d" % dev_x.shape[0] print "data load finished" with tf.Session() as sess: han = HAN(vocab_size=_vocab_size, num_classes=FLAGS.num_classes, embedding_size=FLAGS.embedding_size, hidden_size=FLAGS.hidden_size) with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=han.input_y, logits=han.out, name='loss'))
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from sklearn.svm import SVR from sklearn import linear_model from sklearn import neighbors import xgboost as xgb tasks = ['anger', 'fear', 'joy', 'sadness'] gensim_model = None for task in tasks: print 'Running for task', task # load or create your dataset print('Load data...') X_train, y_train, train_id, train_raw, gensim_model = data_helper.load_dataset( 'train', task, gensim_model) X_test, y_test, test_id, test_raw, gensim_model = data_helper.load_dataset( 'test', task, gensim_model) #--------------------------------- Lightgbm-------------------------------------- # create dataset for lightgbm #print X_train, y_train #print type(X_train), type(y_train) lgb_train = lgb.Dataset(X_train, y_train) lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) # specify your configurations as a dict ''' params = { 'task': 'train',
max_acc_test = 0 max_Avg_prec_train = 0 max_Avg_prec_test = 0 max_one_err_train = 100 max_one_err_test = 100 count_TaxoRead = 0 t_set = [0.004] p_set = [0.2] for t in t_set: for p in p_set: path_A_matrix = 'A_matrix_%s_%s.npy' % (str(t), str(p)) A_matrix = np.load(path_A_matrix) train_x, dev_x, length, vocab, train_y, dev_y, train_y_8, dev_y_8 = load_dataset( FLAGS.yelp_json_path, FLAGS.max_sent_in_doc, FLAGS.max_word_in_sent) Y = train_y Y1 = dev_y Y_8 = train_y_8 Y1_8 = dev_y_8 N = FLAGS.train_batch_size print("data load finished") config = tf.ConfigProto() config.gpu_options.allow_growth = True save_file = './data_A/checkpoint_dir/model_shuffle_%s_%s.ckpt' % ( str(t), str(p)) tf.reset_default_graph() count = 0 count_train = 0 count_dev = 0
"Number of training epochs (default: 200)") tf.flags.DEFINE_integer( "evaluate_every", 50, "Evaluate model on dev set after this many steps (default: 50)") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("Parameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr, value)) print("") # Data Preparation # Load data print("Loading data...") train_data, train_label, test_data, test_label = data_helper.load_dataset( FLAGS.database_path) print("Loading data succees...") # ConvNet acc_list = [0] sess = tf.Session() cnn = VDCNN(num_classes=len(train_label[0]), l2_reg_lambda=FLAGS.l2_reg_lambda, sequence_max_length=FLAGS.sequence_max_length, num_quantized_chars=69, embedding_size=16, use_k_max_pooling=FLAGS.use_k_max_pooling) # Optimizer and LR Decay update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops):
tf.flags.DEFINE_integer("sequence_length", 198, "max sentence length") tf.flags.DEFINE_integer("embed_size", 100, "embedding size") tf.flags.DEFINE_boolean("is_training", True, "is traning.true:tranining,false:testing/inference") tf.flags.DEFINE_integer("num_epochs", 60, "number of epochs to run.") tf.flags.DEFINE_integer("evaluation_every", 100, "Validate every validate_every epochs.") #每10轮做一次验证 tf.flags.DEFINE_integer('checkpoint_every', 100, 'save the model after this many steps default:100') tf.flags.DEFINE_integer('num_checkpoints', 5, 'num of model saving') tf.flags.DEFINE_integer('dropout_keep_prob', 0.5, 'the dropout prob') tf.flags.DEFINE_boolean("use_embedding", False, "whether to use embedding or not.") #load data train_x_text, train_y = data_helper.load_dataset(FLAGS.raw_train_file) test_x_text, test_y = data_helper.load_dataset(FLAGS.raw_test_file) all_x_text = train_x_text + test_x_text #build vocabulary max_document_length = max([len(x.split(' ')) for x in all_x_text]) #198有点长 vocab_processor = learn.preprocessing.VocabularyProcessor( max_document_length=max_document_length, min_frequency=3) train_x = np.array(list(vocab_processor.fit_transform(train_x_text))) text_x = np.array(list(vocab_processor.fit_transform(test_x_text))) FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print('all related parameters in RCNN:') for attr, value in sorted(FLAGS.__flags.items()):