Exemplo n.º 1
0
# Prepare output directory for models and summaries
# =======================================================

timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Data preprocess
# =======================================================
# Load data
print("Loading data...")
x_text, y = data_helpers.load_positive_negative_data_files(
    FLAGS.positive_data_file,
    FLAGS.negative_data_file,
    cut=False,
    stop_words_list_file=None,
)  #不进行切分词
#x_text, y = data_helpers.load_positive_negative_data_files(FLAGS.positive_data_file, FLAGS.negative_data_file,
#                                                           cut=True, stop_words_list_file=FLAGS.stop_word_file) #切分词版本
#print(x_text)
# Get embedding vector
sentences = data_helpers.padding_sentences(x_text,
                                           '<PADDING>',
                                           padding_sentence_length=20)
x = np.array(
    word2vec_helpers.embedding_sentences(sentences,
                                         embedding_size=FLAGS.embedding_dim,
                                         file_to_save=os.path.join(
                                             out_dir,
                                             'trained_word2vec.model')))
Exemplo n.º 2
0
# Prepare output directory for models and summaries
# =======================================================

timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Data preprocess
# =======================================================

# Load data
print("Loading data...")
x_text, y = data_helpers.load_positive_negative_data_files(
    FLAGS.positive_data_file, FLAGS.negative_data_file)

# Get embedding vector
sentences, max_document_length = data_helpers.padding_sentences(
    x_text, '<PADDING>')
print('max_document_length:' + str(max_document_length))
x = np.array(
    word2vec_helpers.embedding_sentences(sentences,
                                         embedding_size=FLAGS.embedding_dim,
                                         file_to_save=os.path.join(
                                             out_dir,
                                             'trained_word2vec.model')))
print("x.shape = {}".format(x.shape))
print("y.shape = {}".format(y.shape))  #原结果是

# Save params
Exemplo n.º 3
0
# Prepare output directory for models and summaries
# =======================================================

timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Data preprocess
# =======================================================

# Load data
print("Loading data...")
x_text, y = data_helpers.load_positive_negative_data_files(FLAGS.positive_data_file, FLAGS.negative_data_file)

# Get embedding vector
sentences, max_document_length = data_helpers.padding_sentences(x_text, '<PADDING>')
x = np.array(word2vec_helpers.embedding_sentences(sentences, embedding_size = FLAGS.embedding_dim, file_to_save = os.path.join(out_dir, 'trained_word2vec.model')))
print("x.shape = {}".format(x.shape))
print("y.shape = {}".format(y.shape))

# Save params
training_params_file = os.path.join(out_dir, 'training_params.pickle')
params = {'num_labels' : FLAGS.num_labels, 'max_document_length' : max_document_length}
data_helpers.saveDict(params, training_params_file)

# Shuffle data randomly
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
Exemplo n.º 4
0
print("Using word2vec model file : {}".format(trained_word2vec_model_file))

# validate training params file
training_params_file = os.path.join(FLAGS.checkpoint_dir, "..", "training_params.pickle")
if not os.path.exists(training_params_file):
    print("Training params file \'{}\' is missing!".format(training_params_file))
print("Using training params file : {}".format(training_params_file))

# Load params
params = data_helpers.loadDict(training_params_file)
num_labels = int(params['num_labels'])
max_document_length = int(params['max_document_length'])

# Load data
if FLAGS.eval_train:
    x_raw, y_test = data_helpers.load_positive_negative_data_files(FLAGS)
else:
    x_raw = ["a masterpiece four years in the making", "everything is off."]
    y_test = [1, 0]

# Get Embedding vector x_test
print max_document_length
x_test, max_document_length = data_helpers.padding_sentences(x_raw, '<PADDING>', padding_sentence_length = max_document_length)
_, w2vModel = word2vec_helpers.embedding_sentences(file_to_load = trained_word2vec_model_file)
x_test = np.array(x_test)

print("x_test.shape = {}".format(x_test.shape))


# Evaluation
# ==================================================
Exemplo n.º 5
0
# Prepare output directory for models and summaries
# =======================================================

timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Data preprocess
# =======================================================

# Load data
print("Loading data...")
x_text, y = data_helpers.load_positive_negative_data_files(
    FLAGS.cooking_data_file, FLAGS.music_data_file, FLAGS.video_data_file)

#测试集
x_test, y_test = data_helpers.load_positive_negative_data_files(
    FLAGS.cooking_test, FLAGS.music_test, FLAGS.video_test)
print('=============', len(x_test), len(x_test[0]))

# Get embedding vector
sentences, max_document_length = data_helpers.padding_sentences(
    x_text, '<PADDING>')
x = np.array(
    word2vec_helpers.embedding_sentences(sentences,
                                         embedding_size=FLAGS.embedding_dim,
                                         file_to_save=os.path.join(
                                             out_dir,
                                             'trained_word2vec.model')))
Exemplo n.º 6
0
# Prepare output directory for models and summaries
# =======================================================

timestamp = str(int(time.time()))
out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
print("Writing to {}\n".format(out_dir))
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

# Data preprocess
# =======================================================

# Load data
print("Loading data...")
x_text, y = data_helpers.load_positive_negative_data_files(
    FLAGS.bingyin_data_file, FLAGS.zhenduan_data_file, FLAGS.zhiliao_data_file,
    FLAGS.zhengzhuang_data_file)

# Get embedding vector
sentences, max_document_length = data_helpers.padding_sentences(
    x_text, '<PADDING>')
x = np.array(
    word2vec_helpers.embedding_sentences(sentences,
                                         embedding_size=FLAGS.embedding_dim,
                                         file_to_save=os.path.join(
                                             out_dir,
                                             'trained_word2vec.model')))
print("x.shape = {}".format(x.shape))
print("y.shape = {}".format(y.shape))

# Save params