def load_rlm_lstm_model(): rlm_lstm_model_path = f'{rlm_base_models_path}/lstm_all_data.h5' if path.exists(rlm_lstm_model_path): with open(rlm_lstm_model_path, 'rb') as rlm_lstm_pkl_file: rlm_lstm_model = load_model( rlm_lstm_pkl_file, custom_objects=SeqSelfAttention.get_custom_objects()) else: rlm_lstm_model_url = f'{base_url}/lstm_all_data.h5' rlm_lstm_model_request = requests.get(rlm_lstm_model_url) with tqdm.wrapattr(open(os.devnull, "wb"), "write", miniters=1, desc=rlm_lstm_model_url.split('/')[-1], total=int( rlm_lstm_model_request.headers.get( 'content-length', 0))) as fout: for chunk in rlm_lstm_model_request.iter_content(chunk_size=4096): fout.write(chunk) with open(rlm_lstm_model_path, 'wb') as rlm_lstm_pkl_file: rlm_lstm_pkl_file.write(rlm_lstm_model_request.content) with open(rlm_lstm_model_path, 'rb') as rlm_lstm_pkl_file: rlm_lstm_model = load_model( rlm_lstm_pkl_file, custom_objects=SeqSelfAttention.get_custom_objects()) rlm_lstm_model._make_predict_function() return rlm_lstm_model
def load_model_attention(model_path): """ load a pretrained recurrent network with attention mechanism Parameters ---------- model path : str path of the pretrained attention model Returns ------- pretrained model with attention mechanism """ from keras_self_attention import SeqSelfAttention import keras json_file = open(os.path.join(model_path + '.json'), 'r') loaded_model_json = json_file.read() json_file.close() trained_model = keras.models.model_from_json( loaded_model_json, custom_objects=SeqSelfAttention.get_custom_objects()) # load weights into new model trained_model.load_weights(os.path.join(model_path + '.h5')) print("trained model loaded") model = trained_model model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def load_and_evaluate_rnn(args): complete_path = constants.SAVED_RNN_MODELS_DIR + args.model_file print("EVALUATING MODEL %s..." % args.model_file) rnn = load_model(complete_path, custom_objects=SeqSelfAttention.get_custom_objects()) _, _, x_val, _, y_val, _ = get_rnn_data(args) predict_validation(rnn, x_val, y_val)
def load_lstm_model(path, model_h5_name): # Grabs our already created LSTM model which we trained and is # super awesome from some pathway if model_h5_name in model_load_custom_object_attention: model = load_model(path , custom_objects=SeqSelfAttention.get_custom_objects()) else: model = load_model(path) return model
def test_save_load_with_loss(self): attention = SeqSelfAttention(return_attention=True, attention_width=7, attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL, kernel_regularizer=keras.regularizers.l2(1e-4), bias_regularizer=keras.regularizers.l1(1e-4), attention_regularizer_weight=1e-3, name='Attention') _, _, token_dict = self.get_input_data() model = self.get_model(attention, token_dict) model_path = os.path.join(tempfile.gettempdir(), 'keras_self_att_test_sl_with_loss_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=SeqSelfAttention.get_custom_objects()) model.summary() self.assertTrue(model is not None)
def evaluate(xml_file, nlp, glove_handler, model_file='junk_remover_model.h5', threshold=0.01): max_length = 100 gv_dim = 100 # model = build_LSTM_model(max_length=max_length) # model = build_attention_model(max_length=max_length) # model.load_weights(model_file) model = keras.models.load_model( model_file, custom_objects=SeqSelfAttention.get_custom_objects()) # orig # model = keras.models.load_model(model_file) page_sections = extract_page_sections(xml_file) preds_all = [] labels_all = [] y_preds_all = [] for page_idx, sections in page_sections.items(): data, labels = prepare_section_tr_data(sections, nlp, max_length) pred_X = prep_data(data, max_length, glove_handler, gv_dim=gv_dim) pred_X = pred_X.reshape(len(labels), max_length, gv_dim) y_preds = model.predict(pred_X) # import pdb; pdb.set_trace() labels_all.extend(labels) preds_all.extend([1 if x >= threshold else 0 for x in y_preds]) y_preds_all.extend(y_preds) precs, recalls, thresholds = precision_recall_curve( labels_all, y_preds_all) pickle.dump({ 'precs': precs, 'recalls': recalls, 'thresholds': thresholds }, open('good_prc.p', 'wb')) bad_labels = [1 - x for x in labels_all] by_preds = [1.0 - y for y in y_preds_all] precs, recalls, thresholds = precision_recall_curve(bad_labels, by_preds) pickle.dump({ 'precs': precs, 'recalls': recalls, 'thresholds': thresholds }, open('bad_prc.p', 'wb')) r = utils.get_perf_results(labels_all, preds_all) print( f"Good P:{r['p_good']:.2f} R:{r['r_good']:.2f} F1:{r['f1_good']:.2f}") print(f"Bad P:{r['p_bad']:.2f} R:{r['r_bad']:.2f} F1:{r['f1_bad']:.2f}") return r
def load_model(self): """ :return: """ if self.model_loaded: logger.info('Using cached model') else: # Load YAML and create models logger.info("Loading models...") try: if self.from_aws: import baking.main.util.aws_utils as aws logger.debug("Loading Decoder Model from AWS...") self.decoder_model = aws.read_s3_h5_as_tmpfile(self.decoder_model_path, custom_objects=SeqSelfAttention.get_custom_objects()) logger.debug("Loading Generator Model Context from AWS...") self.generator_model_context = aws.read_s3_h5_as_tmpfile(self.generator_model_context_path, custom_objects=SeqSelfAttention.get_custom_objects()) logger.debug("Loading Verse Embedding Model Context from AWS...") self.model_verse_emb_context = aws.read_s3_h5_as_tmpfile(self.model_verse_emb_context_path, custom_objects=SeqSelfAttention.get_custom_objects()) logger.debug("Loading STV Encoder from AWS...") self.model_stv_encoder = aws.read_s3_h5_as_tmpfile(self.model_stv_encoder_path) else: logger.debug("Loading Decoder Model from disk...") self.decoder_model = load_model(self.decoder_model_path, compile=False, custom_objects=SeqSelfAttention.get_custom_objects()) logger.debug("Loading Generator Model Context from disk...") self.generator_model_context = load_model(self.generator_model_context_path, compile=False, custom_objects=SeqSelfAttention.get_custom_objects()) logger.debug("Loading Verse Embedding Model Context from disk...") self.model_verse_emb_context = load_model(self.model_verse_emb_context_path, compile=False, custom_objects=SeqSelfAttention.get_custom_objects()) logger.debug("Loading STV Encoder from disk...") self.model_stv_encoder = load_model(self.model_stv_encoder_path, compile=False) except IOError as e: logger.error("Is not possible to load Keras models") raise e except ImportError as e: logger.error("Missing dependency") raise e self.model_loaded = True logger.info("Models loaded successfully")
return tmp if __name__ == '__main__': logger.info('PWD = {}'.format(os.getcwd())) print('Reading CSV') df = read_s3_csv_as_tmpfile('resources/songdata_4_tests.csv') print(df.head()) from keras_self_attention import SeqSelfAttention print('Reading Model') model = read_s3_h5_as_tmpfile('resources/models/lyrics_skth_v0_20_40_300_5000_100.model.generator_word.h5', custom_objects=SeqSelfAttention.get_custom_objects()) print(model) print('Reading Artist/Genre Tokenizer') agt = read_s3_pickle_as_tmpfile('resources/models/lyrics_skth_v0_20_40_300_5000_100.artist_genre_tokenizer.npz', allow_pickle=True) print(agt) print('Reading Embedding Matrix') em = read_s3_pickle_as_tmpfile('resources/models/lyrics_skth_v0_20_40_300_5000_100.embmat.npz') print(em) print('Reading Tokenizer') _tokenizer = read_s3_pickle_tmp('resources/models/lyrics_skth_v0_20_40_300_5000_100.tokenizer.pickle') print(_tokenizer)
def get_model(data_in, data_out, dropout_rate, nb_cnn2d_filt, pool_size, rnn_size, fnn_size, classification_mode, weights, loader, loader2): # model definition spec_start = Input(shape=(data_in[-3], data_in[-2], data_in[-1])) spec_cnn = spec_start for i, convCnt in enumerate(pool_size): spec_cnn = Conv2D(filters=nb_cnn2d_filt, kernel_size=(3, 3), padding='same')(spec_cnn) spec_cnn = BatchNormalization()(spec_cnn) spec_cnn = Activation('relu')(spec_cnn) spec_cnn = MaxPooling2D(pool_size=(1, pool_size[i]))(spec_cnn) spec_cnn = Dropout(dropout_rate)(spec_cnn) spec_cnn = Permute((2, 1, 3))(spec_cnn) spec_rnn = Reshape((data_in[-2], -1))(spec_cnn) for nb_rnn_filt in rnn_size: spec_rnn = Bidirectional(GRU(nb_rnn_filt, activation='tanh', dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True), merge_mode='mul')(spec_rnn) # Attention # spec_rnn = SeqSelfAttention(attention_activation='tanh')(spec_rnn) # DOA doa = spec_rnn # doa = SeqSelfAttention(attention_activation='tanh')(spec_rnn) for nb_fnn_filt in fnn_size: doa = TimeDistributed(Dense(nb_fnn_filt))(doa) doa = Dropout(dropout_rate)(doa) doa = TimeDistributed(Dense(data_out[1][-1]))(doa) doa = Activation('tanh', name='doa_out')(doa) # SED sed = spec_rnn # sed = SeqSelfAttention(attention_activation='tanh')(spec_rnn) for nb_fnn_filt in fnn_size: sed = TimeDistributed(Dense(nb_fnn_filt))(sed) sed = Dropout(dropout_rate)(sed) sed = TimeDistributed(Dense(data_out[0][-1]))(sed) sed = Activation('sigmoid', name='sed_out')(sed) model = Model(inputs=spec_start, outputs=[sed, doa]) if loader: model = load_model( 'C:/Users/shalea2/PycharmProjects/Drones/models/attention_3_ansim_ov1_split2_regr0_3d0_1_model.h5', custom_objects=SeqSelfAttention.get_custom_objects()) if loader2: temp_weights = [layer.get_weights() for layer in model.layers] model.layers.pop() model.layers.pop() model.layers.pop() model.layers.pop() doa = TimeDistributed(Dense(data_out[1][-1]))( model.layers[-1].output) doa = Activation('tanh', name='doa_out')(doa) sed = TimeDistributed(Dense(data_out[0][-1]))( model.layers[-2].output) sed = Activation('sigmoid', name='sed_out')(sed) model = Model(inputs=model.get_input_at(0), outputs=[sed, doa]) for i in range(len(temp_weights) - 4): model.layers[i].set_weights(temp_weights[i]) model.compile(optimizer=Adam(), loss=['binary_crossentropy', 'mse'], loss_weights=weights) model.summary() return model
def load_model(self, model_path): self.model = load_model(model_path, custom_objects=SeqSelfAttention.get_custom_objects())
def run(args): # LOAD DATASET if args.use_full_dataset: id_dataset = pickle.load(open(constants.ID_DATASET_FULL_PATH, "rb")) data_index = pickle.load(open(constants.DATA_INDEX_FULL_PATH, "rb")) else: id_dataset = pickle.load(open(constants.ID_DATASET_SMALL_PATH, "rb")) data_index = pickle.load(open(constants.DATA_INDEX_SMALL_PATH, "rb")) glove_embeddings = pickle.load( open("../embeddings/stanford_20k.WordEmbedding", "rb")) external_embeddings = glove_embeddings.vectors # Split dataset 90%/10%, and shuffle x_val = id_dataset["train_tweets"][data_index["test_index"]] y_val = id_dataset["train_labels"][data_index["test_index"]] x_test = id_dataset["test_tweets"] val_prediction_matrix_path = constants.SAVED_RNN_MODELS_DIR + "val_prediction_matrix.obj" # Specify model model_files = [ "rnn_pooling_256.hdf5", "rnn_attention_256.hdf5", "rnn_conv_256.hdf5", "rnn_pooled_attention_512.hdf5", "rnn_simple_256.hdf5", ] # Load specified models loaded_models = [ load_model(os.path.join(constants.SAVED_RNN_MODELS_DIR, f), custom_objects=SeqSelfAttention.get_custom_objects()) for f in tqdm(model_files) ] if os.path.isfile(val_prediction_matrix_path): print("Found prediction matrix at %s, loading it..." % val_prediction_matrix_path) val_prediction_matrix = pickle.load( open(val_prediction_matrix_path, "rb")) else: print( "Did not find prediction matrix at %s, creating it from scratch..." % val_prediction_matrix_path) # Predict on x_val val_prediction_matrix = np.array( [perform_prediction(m, x_val) for m in tqdm(loaded_models)]).T pickle.dump(val_prediction_matrix, val_prediction_matrix_path, "wb") # Calc majority votes val_majority_votes = maj_vote(val_prediction_matrix) # Calculate validation accuracy calc_val_acc(val_majority_votes, y_val) # Predict on test set print("Predicting on the test set...") test_prediction_matrix = np.array( [perform_prediction(m, x_test) for m in tqdm(loaded_models)]).T test_majority_votes = maj_vote(test_prediction_matrix) submit.generate_submission_file(test_majority_votes, "majority")
# adam optimizer opt = keras.optimizers.Adam(learning_rate = 1e-5) model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics=['accuracy']) # callbacks mcp_save = keras.callbacks.ModelCheckpoint('saved_models/cln_protbert_' + str(fold) + '.h5', save_best_only=True, monitor='val_accuracy', verbose=1) reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=20, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0) callbacks_list = [reduce_lr, mcp_save] # test and train generators X_train, X_val = X[train_index], X[val_index] y_train, y_val = y[train_index], y[val_index] train_gen = bm_generator(X_train, y_train, bs) val_gen = bm_generator(X_val, y_val, bs) history = model.fit_generator(train_gen, epochs = num_epochs, steps_per_epoch = math.ceil(len(X_train)/(bs)), verbose=1, validation_data = val_gen, validation_steps = len(X_val)/bs, workers = 0, shuffle = True, callbacks = callbacks_list) model = load_model('saved_models/cln_protbert_' + str(fold) + '.h5', custom_objects=SeqSelfAttention.get_custom_objects()) # print("Validation") # y_pred_val = model.predict(X_val) # f1_score_val = f1_score(y_val, y_pred_val.argmax(axis=1), average = 'weighted') # acc_score_val = accuracy_score(y_val, y_pred_val.argmax(axis=1)) # val_f1score.append(f1_score_val) # val_acc.append(acc_score_val) # print("F1 Score: ", val_f1score) # print("Acc Score", val_acc) print("Testing") y_pred_test = model.predict(X_test) f1_score_test = f1_score(y_test, y_pred_test.argmax(axis=1), average = 'weighted') acc_score_test = accuracy_score(y_test, y_pred_test.argmax(axis=1)) test_f1score.append(f1_score_test)
def activityPredict(brandFileName, fileName, histNum=3): brandFile = open(brandFileName, 'r') brandList = [] for line in brandFile: brandList.append(line.strip()) brandFile.close() print('Loading model...') modelFile = open(fileName + '_model.json', 'r') model_load = modelFile.read() modelFile.close() model = model_from_json(model_load, custom_objects=SeqSelfAttention.get_custom_objects()) model.load_weights(fileName + '_model.h5') tkTweet = pickle.load(open(fileName + '_tweet.tk', 'rb')) tkPOS = pickle.load(open(fileName + '_pos.tk', 'rb')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) #print(model.summary()) resultFile = open('result/accountDist.result', 'w') for brand in brandList: idMapper = {} print('Processing '+brand+'...') posFile = open('data/userTweets2/clean2/' + brand + '.pos', 'r') for line in posFile: data = json.loads(line.strip()) contentList, posList = extractPOS(data.values()[0], breakEmoji=True) if len(contentList) > 3: idMapper[int(data.keys()[0])] = (contentList, posList) posFile.close() tweetData = {} tweetFile = open('data/userTweets2/clean2/' + brand + '.json', 'r') for line in tweetFile: data = json.loads(line.strip()) tweetData[data['user_id']] = data['statuses'] tweetFile.close() contents = [] days = [] hours = [] poss = [] histContents = {} histDayVectors = {} histHourVectors = {} histPOSLists = {} for i in range(histNum): histContents[i] = [] histDayVectors[i] = [] histHourVectors[i] = [] histPOSLists[i] = [] totalIndex = 0 indexUserMapper = {} for userID, statuses in tweetData.items(): #print('Tweet #: '+str(len(statuses))) for index, tweet in enumerate(statuses): if index < (len(statuses)-histNum-3): if tweet['id'] in idMapper: histTweets = constructHist(statuses, index+1, histNum, idMapper) #print(histTweets) if histTweets is None: continue contentList, posList = idMapper[tweet['id']] contents.append(list2str(contentList).encode('utf-8')) poss.append(list2str(posList).encode('utf-8')) dateTemp = tweet['created_at'].split() day = dayMapper[dateTemp[0]] hour = hourMapper(dateTemp[3].split(':')[0]) days.append(np.full((tweetLength), day, dtype='int')) hours.append(np.full((tweetLength), hour, dtype='int')) indexUserMapper[totalIndex] = userID totalIndex += 1 for i in range(histNum): histContents[i].append(histTweets[i]['content'].encode('utf-8')) histPOSLists[i].append(histTweets[i]['pos'].encode('utf-8')) histDayVectors[i].append(np.full((tweetLength), histTweets[i]['day'], dtype='int')) histHourVectors[i].append(np.full((tweetLength), histTweets[i]['hour'], dtype='int')) print('Data size: '+str(len(contents))) #print('Valid data#: '+str(len(contents))) for i in range(histNum): histDayVectors[i] = np.array(histDayVectors[i]) histHourVectors[i] = np.array(histHourVectors[i]) days = np.array(days) hours = np.array(hours) tweetSequences = tkTweet.texts_to_sequences(contents) tweetVector = sequence.pad_sequences(tweetSequences, maxlen=tweetLength, truncating='post', padding='post') posSequences = tkPOS.texts_to_sequences(poss) posVector = sequence.pad_sequences(posSequences, maxlen=tweetLength, truncating='post', padding='post') histTweetVectors = [] histPOSVectors = [] for i in range(histNum): histDayVectors[i] = np.array(histDayVectors[i]) histHourVectors[i] = np.array(histHourVectors[i]) histSequence = tkTweet.texts_to_sequences(histContents[i]) tempVector = sequence.pad_sequences(histSequence, maxlen=tweetLength, truncating='post', padding='post') histTweetVectors.append(tempVector) histPOSSequences = tkPOS.texts_to_sequences(histPOSLists[i]) histPOSVector = sequence.pad_sequences(histPOSSequences, maxlen=tweetLength, truncating='post', padding='post') histPOSVectors.append(histPOSVector) #print tweetVector.shape if len(tweetVector) % batch_size != 0: tweetVector = tweetVector[:-(len(tweetVector) % batch_size)] days = days[:-(len(days) % batch_size)] hours = hours[:-(len(hours) % batch_size)] posVector = posVector[:-(len(posVector) % batch_size)] for i in range(histNum): histTweetVectors[i] = histTweetVectors[i][:-(len(histTweetVectors[i]) % batch_size)] histDayVectors[i] = histDayVectors[i][:-(len(histDayVectors[i]) % batch_size)] histHourVectors[i] = histHourVectors[i][:-(len(histHourVectors[i]) % batch_size)] histPOSVectors[i] = histPOSVectors[i][:-(len(histPOSVectors[i]) % batch_size)] #print posVector.shape featureList = [tweetVector, days, hours, posVector] for i in range(histNum): featureList += [histTweetVectors[i], histDayVectors[i], histHourVectors[i], histPOSVectors[i]] #print len(featureList) try: predictions = model.predict(featureList, batch_size=batch_size) userTweetDist = {} for index, tweetDist in enumerate(predictions): user = indexUserMapper[index] if user not in userTweetDist: userTweetDist[user] = np.zeros([1, 6]) userTweetDist[user] = np.concatenate((userTweetDist[user], [tweetDist]), axis=0) userAvgDist = {} for user, tweetDist in userTweetDist.items(): userAvgDist[user] = np.divide(np.sum(tweetDist, axis=0), len(tweetDist) - 1) accountDist = np.divide(np.sum(userAvgDist.values(), axis=0), len(userAvgDist)) out = npDist2Str(accountDist) except: print ('Error in processing: '+brand) out = '' finally: resultFile.write(brand+'\t'+out+'\n') resultFile.close()