示例#1
0
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")


# Data Preparation
# ==================================================

cluster_config = ast.literal_eval(os.environ['CLUSTER_CONFIG'])
master_device = '/job:{}/task:0'.format(FLAGS.master_job)
master_address = 'grpc://{}'.format(cluster_config[FLAGS.master_job][0])

# Load data
print("Loading data...")
timestamp = str(int(time.time()))  # will use this for the run dir
x, y, vocabulary, vocabulary_inv = data_helpers.load_data(
    run=timestamp, cat1="./data/subreddit_news", cat2="./data/subreddit_aww")
# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]
# Split train/test set
# TODO: This is very crude, should use cross-validation
dev_size = 1000
x_train, x_dev = x_shuffled[:-dev_size], x_shuffled[-dev_size:]
y_train, y_dev = y_shuffled[:-dev_size], y_shuffled[-dev_size:]
print("(Capped) Vocabulary Size: {:d}".format(len(vocabulary)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))


示例#2
0
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparation
# ==================================================

cluster_config = ast.literal_eval(os.environ['CLUSTER_CONFIG'])
master_device = '/job:{}/task:0'.format(FLAGS.master_job)
master_address = 'grpc://{}'.format(cluster_config[FLAGS.master_job][0])

# Load data
print("Loading data...")
timestamp = str(int(time.time()))  # will use this for the run dir
x, y, vocabulary, vocabulary_inv = data_helpers.load_data(
    run=timestamp, cat1="./data/subreddit_news", cat2="./data/subreddit_aww")
# Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]
# Split train/test set
# TODO: This is very crude, should use cross-validation
dev_size = 1000
x_train, x_dev = x_shuffled[:-dev_size], x_shuffled[-dev_size:]
y_train, y_dev = y_shuffled[:-dev_size], y_shuffled[-dev_size:]
print("(Capped) Vocabulary Size: {:d}".format(len(vocabulary)))
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

# Training
# ==================================================
示例#3
0
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")
tf.flags.DEFINE_boolean("use_pretrain", True, "use pretrained embedding")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

x_train_arg1, x_train_arg2, y_train, x_test_arg1, x_test_arg2, y_test, vocabulary = data_helpers2.load_data(
    FLAGS.train_data_file, FLAGS.test_data_file, FLAGS.max_sequence_length,
    FLAGS.label)
print("train/test already!")

embedding = defaultdict(list)

model = gensim.models.KeyedVectors.load_word2vec_format(
    "GoogleNews-vectors-negative300.bin", unicode_errors="ignore", binary=True)
words = []
for word in model.vocab:
    words.append(word)
embedding_dim = len(model[words[0]])

print("embedding_dim:{}".format(embedding_dim))

for key in vocabulary:
示例#4
0
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")

tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")


FLAGS = tf.flags.FLAGS

print('-----Parameters-----')
for attr, value in sorted(FLAGS.__flags.iteritems()):
    print('{} = {}'.format(attr.upper(), value))
print('\n')


# Get Data
X, y, vocabulary, vocabulary_inv = data_helpers2.load_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

print 'Vocabulary Size: {}'.format(len(vocabulary))
print 'Train / Test Split: {} / {}'.format(X_train.shape[0], X_test.shape[0])

# Train

with tf.Graph().as_default():
	session_config = tf.ConfigProto(
		allow_soft_placement = FLAGS.allow_soft_placement
		log_device_placement = FLAGS.log_device_placement)
	
	sess = tf.Session(config = session_config)

	with sess.as_default():
示例#5
0
    "allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean(
    "log_device_placement", False, "Log placement of ops on devices")


FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data. Load your own data here
print("Loading data...")
x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data(
    eval=True, vocab_file=FLAGS.vocab_file,
    cat1="./data/subreddit_news", cat2="./data/subreddit_aww")
y_test = np.argmax(y_test, axis=1)
print("Vocabulary size: {:d}".format(len(vocabulary)))
print("Test set size {:d}".format(len(y_test)))

print("\nEvaluating...\n")

# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
print("checkpoint file: {}".format(checkpoint_file))
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
示例#6
0
vocab_file =  "./vocab1487512553.json"

# Misc Parameters
allow_soft_placement = True
log_device_placement =  False



# Load data. Load your own data here
print("Loading data...")
#x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data(
#    eval=True, vocab_file=vocab_file,
#    cat1="./data/sentiment.positive", cat2="./data/sentiment.negative")

    
x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data(
eval=True, vocab_file=vocab_file,cat1= "./data/sentiment.positive", cat2= "./data/sentiment.negative")

# Evaluation
# ==================================================
#checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
checkpoint_file = "./runs/1487512553/checkpoints/model-18200"
#print("checkpoint file: {}".format(checkpoint_file))

session_conf = tf.ConfigProto(
    allow_soft_placement=allow_soft_placement,
    log_device_placement=log_device_placement)
sess = tf.Session(config=session_conf)
#    with sess.as_default():

# Load the saved meta graph and restore variables
saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
示例#7
0
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Load data. Load your own data here
print("Loading data...")
x_test, y_test, vocabulary, vocabulary_inv = data_helpers.load_data(
    eval=True,
    vocab_file=FLAGS.vocab_file,
    cat1="./data/subreddit_news",
    cat2="./data/subreddit_aww")
y_test = np.argmax(y_test, axis=1)
print("Vocabulary size: {:d}".format(len(vocabulary)))
print("Test set size {:d}".format(len(y_test)))

print("\nEvaluating...\n")

# Evaluation
# ==================================================
checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
print("checkpoint file: {}".format(checkpoint_file))
graph = tf.Graph()
with graph.as_default():
    session_conf = tf.ConfigProto(