def createArchitecture(parameters): optimizer = 0 if parameters["optimizer"] == 'rmsprop': optimizer = optimizers.rmsprop(lr=parameters["learning_rate"], epsilon=parameters["epsilon"]) elif parameters["optimizer"] == 'adam': optimizer = optimizers.adam(lr=parameters["learning_rate"], epsilon=parameters["epsilon"]) elif parameters["optimizer"] == 'nadam': optimizer = optimizers.nadam(lr=parameters["learning_rate"], epsilon=parameters["epsilon"]) elif parameters["optimizer"] == 'sgd': optimizer = optimizers.sgd(lr=parameters["learning_rate"]) #else: # optimizer = parameters["optimizer"] if parameters["use_embedding_layer"]: input = Input(shape=(parameters["max_seq_len"], )) model = Embedding(input_dim=parameters["one_hot_vector_len"], output_dim=parameters["embedding_layer_output"], input_length=parameters["max_seq_len"])(input) if parameters["embedding_dropout"] > 0: model = Dropout(rate=parameters["embedding_dropout"])(model) else: input = Input(shape=(parameters["max_seq_len"], parameters["one_hot_vector_len"])) model = input if parameters["bi_lstm1_units"] > 0: model = Bidirectional( CuDNNLSTM(units=parameters["bi_lstm1_units"], return_sequences=True))(model) if parameters["bi_lstm2_units"] > 0: model = Bidirectional( CuDNNLSTM(units=parameters["bi_lstm2_units"], return_sequences=True))(model) if parameters["bi_lstm3_units"] > 0: model = Bidirectional( CuDNNLSTM(units=parameters["bi_lstm3_units"], return_sequences=True))(model) if parameters["use_crf_layer"]: crf = CRF(parameters["num_tags"], learn_mode="marginal") out = crf(model) # output model = Model(input, out) model.compile(optimizer=optimizer, loss=losses.crf_loss, metrics=[metrics.crf_accuracy, avg_proximity_metric()]) else: out = TimeDistributed( Dense(parameters["num_tags"], activation="softmax"))(model) model = Model(input, out) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", avg_proximity_metric()]) model.summary() return model
if "--ensemble_size" not in opts: print("Error: must enter an --ensemble_size") exit() output_file = "new_ensemble_list.txt" if parameters["output_prefix"]: output_file = parameters["output_prefix"] + "_ensemble_list.txt" num = int(opts["--ensemble_size"]) dataSet = deepMirCut.readDataset(parameters["validation_file"],parameters) new_dataSet = deepMirCut.dropLongSequences(dataSet,parameters) X_test,y_test = deepMirCut.prepareData(new_dataSet,parameters) members = [] for i in range(0,len(parameters["ensemble"])): print("loading model %d: %s\n"%(i, parameters["ensemble"][i])) members.append(load_model(parameters["ensemble"][i], custom_objects={'prox':avg_proximity_metric()})) test_pred = [] macro_f1_scores = [] for i in range(0,len(members)): model = members[i] test_pred.append(model.predict(X_test, verbose=parameters["verbose"])) perf = get_classification_metrics(y_test,test_pred[i],parameters) macro_f1_scores.append(sum([perf[c]["fscore"] for c in ["DR5","DC5","DC3","DR3"]]) / 4) print(i,macro_f1_scores[i]) f = open(output_file,"w") w = [[0 for _ in range(0,5)] for _ in range(0,len(test_pred))] for _,j in sorted(zip(macro_f1_scores,range(0,len(macro_f1_scores))), reverse=True)[:num]: w[j] = [1/num for _ in range(0,5)] f.write(parameters["ensemble"][j] + "\n") f.close()
def hyperopt_train_test(params): epsilon = 10**params['epsilon_exp'] optimizer = optimizers.adam(lr=params['learning_rate'], epsilon=epsilon) if dmc_parameters["use_embedding_layer"]: input = Input(shape=(dmc_parameters["max_seq_len"], )) model = Embedding(input_dim=dmc_parameters["one_hot_vector_len"], output_dim=params['embedding_layer_output'], input_length=dmc_parameters["max_seq_len"])(input) model = Dropout(rate=params['embedding_dropout'])(model) else: input = Input(shape=(dmc_parameters["max_seq_len"], dmc_parameters["one_hot_vector_len"])) model = input if params['bi_lstm1_units'] > 0: model = Bidirectional( CuDNNLSTM(units=params['bi_lstm1_units'], return_sequences=True))(model) if params['bi_lstm2_units'] > 0: model = Bidirectional( CuDNNLSTM(units=params['bi_lstm2_units'], return_sequences=True))(model) if dmc_parameters["use_crf_layer"]: crf = CRF(dmc_parameters["num_tags"]) # CRF layer out = crf(model) # output model = Model(input, out) model.compile(optimizer=optimizer, loss=losses.crf_loss, metrics=[metrics.crf_accuracy, avg_proximity_metric()]) else: out = TimeDistributed( Dense(dmc_parameters["num_tags"], activation="softmax"))(model) model = Model(input, out) model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy", avg_proximity_metric()]) model.summary() es = EarlyStopping(monitor='val_loss', min_delta=0, patience=dmc_parameters["patience"], verbose=False, mode='min', restore_best_weights=True) history = model.fit(X_tr, np.array(y_tr), batch_size=dmc_parameters['batch_size'], epochs=dmc_parameters["epochs"], validation_data=(X_vl, np.array(y_vl)), verbose=False, shuffle=True, callbacks=[es]) loss, acc, prox = model.evaluate(x=X_vl, y=np.array(y_vl), batch_size=dmc_parameters['batch_size'], verbose=False) validation_labels = deepMirCut.pred2label(y_vl, dmc_parameters) validation_pred = model.predict(X_vl, verbose=False) pred_labels = deepMirCut.pred2label(validation_pred, dmc_parameters) fScore = f1_score(validation_labels, pred_labels) return loss, acc, prox, fScore
parameters = deepMirCut.load_parameters(opts) parameters = load_input_output_file_parameters(parameters, opts) predictions_outputFile = parameters[ "output_prefix"] + "_predicted_cutsites.txt" inputSet = read_predict_set(input_file, parameters) new_inputSet = deepMirCut.dropLongSequences(inputSet, parameters) X_vl, _ = deepMirCut.prepareData(new_inputSet, parameters) if "ensemble" in parameters: predictions = [] for i in range(0, len(parameters["ensemble"])): print("loading model %d: %s\n" % (i, parameters["ensemble"][i])) model = load_model(parameters["ensemble"][i], custom_objects={'prox': avg_proximity_metric()}) predictions.append( model.predict(X_vl, verbose=parameters["verbose"])) predictions_avg = apply_weights(predictions, w=parameters["ensemble_weights"]) print_predictions_output_file(new_inputSet, X_vl, predictions_avg, predictions_outputFile, parameters) if parameters["print_dvs"]: deepMirCut.print_classification_values_file( new_inputSet, predictions_avg, parameters, output_file=parameters["classification_DVs"]) else: model = load_model(parameters["model"], custom_objects={'prox': avg_proximity_metric()})
opts.update({o: a for o, a in opts_array if o not in longopts_map}) opts["--validation_file"] = validation_file except getopt.GetoptError as err: print(err) print_usage() exit() if '--help' in opts: print_usage() exit() parameters = deepMirCut.load_parameters(opts) parameters = load_input_output_file_parameters(parameters, opts) model = load_model(parameters["model"], custom_objects={ 'CRF': CRF, 'crf_loss': losses.crf_loss, 'crf_accuracy': metrics.crf_accuracy, 'prox': avg_proximity_metric() }) model.summary() validationSet = deepMirCut.readDataset(parameters["validation_file"], parameters) new_validationSet = deepMirCut.dropLongSequences(validationSet, parameters) X_vl, y_vl = deepMirCut.prepareData(new_validationSet, parameters) validation_labels = deepMirCut.pred2label(y_vl, parameters) validation_pred = model.predict(X_vl, verbose=parameters["verbose"]) print_cutsite_scores(validationSet, validation_labels, validation_pred, parameters)