for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparation # ================================================== cluster_config = ast.literal_eval(os.environ['CLUSTER_CONFIG']) master_device = '/job:{}/task:0'.format(FLAGS.master_job) master_address = 'grpc://{}'.format(cluster_config[FLAGS.master_job][0]) # Load data print("Loading data...") timestamp = str(int(time.time())) # will use this for the run dir x, y, vocabulary, vocabulary_inv = data_helpers.load_data( run=timestamp, cat1="./data/subreddit_news", cat2="./data/subreddit_aww") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_size = 1000 x_train, x_dev = x_shuffled[:-dev_size], x_shuffled[-dev_size:] y_train, y_dev = y_shuffled[:-dev_size], y_shuffled[-dev_size:] print("(Capped) Vocabulary Size: {:d}".format(len(vocabulary))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparation # ================================================== cluster_config = ast.literal_eval(os.environ['CLUSTER_CONFIG']) master_device = '/job:{}/task:0'.format(FLAGS.master_job) master_address = 'grpc://{}'.format(cluster_config[FLAGS.master_job][0]) # Load data print("Loading data...") timestamp = str(int(time.time())) # will use this for the run dir x, y, vocabulary, vocabulary_inv = data_helpers.load_data( run=timestamp, cat1="./data/subreddit_news", cat2="./data/subreddit_aww") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_size = 1000 x_train, x_dev = x_shuffled[:-dev_size], x_shuffled[-dev_size:] y_train, y_dev = y_shuffled[:-dev_size], y_shuffled[-dev_size:] print("(Capped) Vocabulary Size: {:d}".format(len(vocabulary))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ==================================================
# Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") tf.flags.DEFINE_boolean("use_pretrain", True, "use pretrained embedding") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") x_train_arg1, x_train_arg2, y_train, x_test_arg1, x_test_arg2, y_test, vocabulary = data_helpers2.load_data( FLAGS.train_data_file, FLAGS.test_data_file, FLAGS.max_sequence_length, FLAGS.label) print("train/test already!") embedding = defaultdict(list) model = gensim.models.KeyedVectors.load_word2vec_format( "GoogleNews-vectors-negative300.bin", unicode_errors="ignore", binary=True) words = [] for word in model.vocab: words.append(word) embedding_dim = len(model[words[0]]) print("embedding_dim:{}".format(embedding_dim)) for key in vocabulary:
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)") tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS print('-----Parameters-----') for attr, value in sorted(FLAGS.__flags.iteritems()): print('{} = {}'.format(attr.upper(), value)) print('\n') # Get Data X, y, vocabulary, vocabulary_inv = data_helpers2.load_data() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2) print 'Vocabulary Size: {}'.format(len(vocabulary)) print 'Train / Test Split: {} / {}'.format(X_train.shape[0], X_test.shape[0]) # Train with tf.Graph().as_default(): session_config = tf.ConfigProto( allow_soft_placement = FLAGS.allow_soft_placement log_device_placement = FLAGS.log_device_placement) sess = tf.Session(config = session_config) with sess.as_default():
"allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean( "log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Load data. Load your own data here print("Loading data...") x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data( eval=True, vocab_file=FLAGS.vocab_file, cat1="./data/subreddit_news", cat2="./data/subreddit_aww") y_test = np.argmax(y_test, axis=1) print("Vocabulary size: {:d}".format(len(vocabulary))) print("Test set size {:d}".format(len(y_test))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) print("checkpoint file: {}".format(checkpoint_file)) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement,
vocab_file = "./vocab1487512553.json" # Misc Parameters allow_soft_placement = True log_device_placement = False # Load data. Load your own data here print("Loading data...") #x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data( # eval=True, vocab_file=vocab_file, # cat1="./data/sentiment.positive", cat2="./data/sentiment.negative") x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data( eval=True, vocab_file=vocab_file,cat1= "./data/sentiment.positive", cat2= "./data/sentiment.negative") # Evaluation # ================================================== #checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) checkpoint_file = "./runs/1487512553/checkpoints/model-18200" #print("checkpoint file: {}".format(checkpoint_file)) session_conf = tf.ConfigProto( allow_soft_placement=allow_soft_placement, log_device_placement=log_device_placement) sess = tf.Session(config=session_conf) # with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
"Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Load data. Load your own data here print("Loading data...") x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data( eval=True, vocab_file=FLAGS.vocab_file, cat1="./data/subreddit_news", cat2="./data/subreddit_aww") y_test = np.argmax(y_test, axis=1) print("Vocabulary size: {:d}".format(len(vocabulary))) print("Test set size {:d}".format(len(y_test))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) print("checkpoint file: {}".format(checkpoint_file)) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto(