stat_lines = [] adagrad = elephas_optimizers.Adagrad() for i in range(0, 200): # Train Spark model # Initialize SparkModel from Keras model and Spark context spark_model = SparkModel(sc, model, mode='asynchronous', frequency='epoch', num_workers=1, optimizer=adagrad) spark_model.train(rdd, nb_epoch=num_epoch_in_one_step, batch_size=batch_size, verbose=0, validation_split=0.1) score1 = model.evaluate(x_train, y_train, verbose=0) score2 = model.evaluate(x_test, y_test, verbose=0) print('#############################') print('Finished epochs', (i + 1) * num_epoch_in_one_step) print('Train accuracy:', score1[1]) print('Test accuracy:', score2[1]) print('#############################') stat_lines.append( str((i + 1) * 10) + ', ' + str(score1[1]) + ', ' + str(score2[1])) FileIO.write_lines_to_file('./cnn_1.log', stat_lines) if (i + 1) % 10 == 0 and i != 0: model.save('./models/cnn_1_' + str((i + 1) * 10) + 'ep.h5') # sc.stop() ## END OF SPARK ##
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) ## END OF MODEL ## loss = [] acc = [] val_acc = [] start_time = datetime.datetime.now() for i in range(0, 20): history = model.fit(x_train, y_train, 128, epoch_step, verbose=1, validation_data=(x_test, y_test)) end_time = datetime.datetime.now() print(str(end_time - start_time)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) ## SAVE loss = loss + history.history['loss'] acc = acc + history.history['acc'] val_acc = val_acc + history.history['val_acc'] print(len(loss)) lines = [] lines.append(str(end_time - start_time)) lines.append(','.join([str(a) for a in loss])) lines.append(','.join([str(a) for a in acc])) lines.append(','.join([str(a) for a in val_acc])) FileIO.write_lines_to_file('./gpu_cnn_' + str(num_conv_block) + '_convB_6_layers.log', lines) model.save( './models/gpu_cnn_epoch_' + str((i + 1) * epoch_step) + 'ep_' + str( num_conv_block) + '_convB_6_layers.h5')
base_path = "https://graph.facebook.com" token = "1825924290976649|f4a421b77888587f351418a5aa84762c" # page_ids = ['healthzone.tips'] page_ids = ['hillaryclinton'] for page_id in page_ids: texts = [] feedRequestUrl = base_path + "/" + page_id + "/feed?access_token=" + token def do(url): r = requests.get(url) json_dict = r.json() for data in json_dict['data']: print(data.keys()) if 'message' in data.keys(): data = data['message'].replace('\n', ' ') if len(data) >= 50: texts.append(data) print(len(texts)) if len(texts) >= 1000: return if 'paging' in json_dict.keys(): do(url) do(feedRequestUrl) # write text FileIO.write_lines_to_file('./fb_posts/' + page_id + '.txt', texts)
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) ## END OF MODEL ## loss = [] acc = [] val_acc = [] start_time = datetime.datetime.now() for i in range(0, 100): model.fit(x_train, y_train, 128, epoch_step, verbose=1, validation_data=(x_test, y_test)) end_time = datetime.datetime.now() print(str(end_time - start_time)) score1 = model.evaluate(x_train, y_train, verbose=0) score2 = model.evaluate(x_test, y_test, verbose=0) print('Train accuracy:', score1[1]) print('Test accuracy:', score2[1]) ## SAVE acc.append(score1[1]) val_acc.append(score2[1]) lines = [] lines.append(str(end_time - start_time)) lines.append(','.join([str(a) for a in acc])) lines.append(','.join([str(a) for a in val_acc])) FileIO.write_lines_to_file('./am_review_dataset_7blkup.log', lines) model.save('./models/bbc_dataset_7blkup.h5')
score1 = training_model.evaluate(x_train['ag2'], y_train['ag2'], verbose=0) score2 = training_model.evaluate(x_test['ag2'], y_test['ag2'], verbose=0) print('Train accuracy:', score1[1]) print('Test accuracy:', score2[1]) acc['ag2'].append(score1[1]) val_acc['ag2'].append(score2[1]) else: # train on bbc renew_fc_layers(training_model, num_classes['bbc']) print('bbc:', training_model.output_shape) training_model.fit(x_train['bbc'], y_train['bbc'], 128, epoch_step, verbose=1, validation_data=(x_test['bbc'], y_test['bbc'])) score1 = training_model.evaluate(x_train['bbc'], y_train['bbc'], verbose=0) score2 = training_model.evaluate(x_test['bbc'], y_test['bbc'], verbose=0) print('Train accuracy:', score1[1]) print('Test accuracy:', score2[1]) acc['bbc'].append(score1[1]) val_acc['bbc'].append(score2[1]) lines = [] lines.append('ag1') lines.append(','.join([str(a) for a in acc['ag1']])) lines.append(','.join([str(a) for a in val_acc['ag1']])) lines.append('ag2') lines.append(','.join([str(a) for a in acc['ag2']])) lines.append(','.join([str(a) for a in val_acc['ag2']])) lines.append('bbc') lines.append(','.join([str(a) for a in acc['bbc']])) lines.append(','.join([str(a) for a in val_acc['bbc']])) FileIO.write_lines_to_file('./switch_learning_ag12bbc.log', lines) training_model.save('./models/switch_learning_ag12bbc.h5')
# shuffle x, y = shuffle(x, y, random_state=0) f.close() return x, y, len(classes) model_path = './models/switch_learning_ag12bbc.h5' model = load_model(model_path) model.summary() print('num of layers', len(model.layers)) intermediate_layer_model = Model(input=model.input, output=model.layers[12].output) intermediate_layer_model.summary() x, y, num_classes = get_data() x = x.reshape(x.shape[0], x.shape[1], x.shape[2], 1) print(x.shape) lines = [] while len(x) != 0 and len(lines) < 100000: batch_x = x[:128] batch_y = y[:128] x = x[128:] y = y[128:] print(x.shape) intermediate_output = intermediate_layer_model.predict(batch_x) for i in range(len(intermediate_output)): output = ["%.4f" % item for item in intermediate_output[i].tolist()] f = ','.join(output) lines.append(batch_y[i] + '|sep|' + f) FileIO.write_lines_to_file('./datasets/switch_ag12bbc.txt', lines)
if len(words) <= 3: continue vectors = [] for word in words: try: vectors.append(word_vectors[word]) except Exception as e: # print(e) pass average_vector = functools.reduce(np.add, vectors) average_vector = average_vector / 300 labels.append(label) doc_vectors.append(average_vector) print('shuffle vectors') labels = np.array(labels) doc_vectors = np.array(doc_vectors) print(labels.shape) print(doc_vectors.shape) doc_vectors, labels = shuffle(doc_vectors, labels, random_state=0) print('write output') lines = [] for i in range(100000): vector = doc_vectors[i] vector = ["%.4f" % item for item in vector.tolist()] label = labels[i] lines.append(label + '|sep|' + ','.join(vector)) FileIO.write_lines_to_file('./datasets/word2vec_ag12bbc.txt', lines)