Пример #1
0
def createArchitecture(parameters):

    optimizer = 0
    if parameters["optimizer"] == 'rmsprop':
        optimizer = optimizers.rmsprop(lr=parameters["learning_rate"],
                                       epsilon=parameters["epsilon"])
    elif parameters["optimizer"] == 'adam':
        optimizer = optimizers.adam(lr=parameters["learning_rate"],
                                    epsilon=parameters["epsilon"])
    elif parameters["optimizer"] == 'nadam':
        optimizer = optimizers.nadam(lr=parameters["learning_rate"],
                                     epsilon=parameters["epsilon"])
    elif parameters["optimizer"] == 'sgd':
        optimizer = optimizers.sgd(lr=parameters["learning_rate"])
    #else:
    #    optimizer = parameters["optimizer"]

    if parameters["use_embedding_layer"]:
        input = Input(shape=(parameters["max_seq_len"], ))
        model = Embedding(input_dim=parameters["one_hot_vector_len"],
                          output_dim=parameters["embedding_layer_output"],
                          input_length=parameters["max_seq_len"])(input)
        if parameters["embedding_dropout"] > 0:
            model = Dropout(rate=parameters["embedding_dropout"])(model)
    else:
        input = Input(shape=(parameters["max_seq_len"],
                             parameters["one_hot_vector_len"]))
        model = input
    if parameters["bi_lstm1_units"] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=parameters["bi_lstm1_units"],
                      return_sequences=True))(model)
    if parameters["bi_lstm2_units"] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=parameters["bi_lstm2_units"],
                      return_sequences=True))(model)
    if parameters["bi_lstm3_units"] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=parameters["bi_lstm3_units"],
                      return_sequences=True))(model)
    if parameters["use_crf_layer"]:
        crf = CRF(parameters["num_tags"], learn_mode="marginal")
        out = crf(model)  # output
        model = Model(input, out)
        model.compile(optimizer=optimizer,
                      loss=losses.crf_loss,
                      metrics=[metrics.crf_accuracy,
                               avg_proximity_metric()])
    else:
        out = TimeDistributed(
            Dense(parameters["num_tags"], activation="softmax"))(model)
        model = Model(input, out)
        model.compile(optimizer=optimizer,
                      loss="categorical_crossentropy",
                      metrics=["accuracy", avg_proximity_metric()])
    model.summary()
    return model
Пример #2
0
    if "--ensemble_size" not in opts:
        print("Error: must enter an --ensemble_size")
        exit()
    output_file = "new_ensemble_list.txt"
    if parameters["output_prefix"]:
        output_file = parameters["output_prefix"] + "_ensemble_list.txt"
    num = int(opts["--ensemble_size"])

    dataSet = deepMirCut.readDataset(parameters["validation_file"],parameters)
    new_dataSet = deepMirCut.dropLongSequences(dataSet,parameters)
    X_test,y_test = deepMirCut.prepareData(new_dataSet,parameters)

    members = []
    for i in range(0,len(parameters["ensemble"])):
        print("loading model %d: %s\n"%(i, parameters["ensemble"][i]))
        members.append(load_model(parameters["ensemble"][i], custom_objects={'prox':avg_proximity_metric()}))
    test_pred = []
    macro_f1_scores = []
    for i in range(0,len(members)):
        model = members[i]
        test_pred.append(model.predict(X_test, verbose=parameters["verbose"]))
        perf = get_classification_metrics(y_test,test_pred[i],parameters)
        macro_f1_scores.append(sum([perf[c]["fscore"] for c in ["DR5","DC5","DC3","DR3"]]) / 4)
        print(i,macro_f1_scores[i])

    f = open(output_file,"w")
    w = [[0 for _ in range(0,5)] for _ in range(0,len(test_pred))]
    for _,j in sorted(zip(macro_f1_scores,range(0,len(macro_f1_scores))), reverse=True)[:num]:
        w[j] = [1/num for _ in range(0,5)]
        f.write(parameters["ensemble"][j] + "\n")
    f.close()
Пример #3
0
def hyperopt_train_test(params):

    epsilon = 10**params['epsilon_exp']
    optimizer = optimizers.adam(lr=params['learning_rate'], epsilon=epsilon)

    if dmc_parameters["use_embedding_layer"]:
        input = Input(shape=(dmc_parameters["max_seq_len"], ))
        model = Embedding(input_dim=dmc_parameters["one_hot_vector_len"],
                          output_dim=params['embedding_layer_output'],
                          input_length=dmc_parameters["max_seq_len"])(input)
        model = Dropout(rate=params['embedding_dropout'])(model)
    else:
        input = Input(shape=(dmc_parameters["max_seq_len"],
                             dmc_parameters["one_hot_vector_len"]))
        model = input
    if params['bi_lstm1_units'] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=params['bi_lstm1_units'],
                      return_sequences=True))(model)
    if params['bi_lstm2_units'] > 0:
        model = Bidirectional(
            CuDNNLSTM(units=params['bi_lstm2_units'],
                      return_sequences=True))(model)
    if dmc_parameters["use_crf_layer"]:
        crf = CRF(dmc_parameters["num_tags"])  # CRF layer
        out = crf(model)  # output
        model = Model(input, out)
        model.compile(optimizer=optimizer,
                      loss=losses.crf_loss,
                      metrics=[metrics.crf_accuracy,
                               avg_proximity_metric()])
    else:
        out = TimeDistributed(
            Dense(dmc_parameters["num_tags"], activation="softmax"))(model)
        model = Model(input, out)
        model.compile(optimizer=optimizer,
                      loss="categorical_crossentropy",
                      metrics=["accuracy", avg_proximity_metric()])
    model.summary()
    es = EarlyStopping(monitor='val_loss',
                       min_delta=0,
                       patience=dmc_parameters["patience"],
                       verbose=False,
                       mode='min',
                       restore_best_weights=True)
    history = model.fit(X_tr,
                        np.array(y_tr),
                        batch_size=dmc_parameters['batch_size'],
                        epochs=dmc_parameters["epochs"],
                        validation_data=(X_vl, np.array(y_vl)),
                        verbose=False,
                        shuffle=True,
                        callbacks=[es])
    loss, acc, prox = model.evaluate(x=X_vl,
                                     y=np.array(y_vl),
                                     batch_size=dmc_parameters['batch_size'],
                                     verbose=False)
    validation_labels = deepMirCut.pred2label(y_vl, dmc_parameters)
    validation_pred = model.predict(X_vl, verbose=False)
    pred_labels = deepMirCut.pred2label(validation_pred, dmc_parameters)
    fScore = f1_score(validation_labels, pred_labels)
    return loss, acc, prox, fScore
Пример #4
0
    parameters = deepMirCut.load_parameters(opts)
    parameters = load_input_output_file_parameters(parameters, opts)

    predictions_outputFile = parameters[
        "output_prefix"] + "_predicted_cutsites.txt"

    inputSet = read_predict_set(input_file, parameters)
    new_inputSet = deepMirCut.dropLongSequences(inputSet, parameters)
    X_vl, _ = deepMirCut.prepareData(new_inputSet, parameters)
    if "ensemble" in parameters:
        predictions = []
        for i in range(0, len(parameters["ensemble"])):
            print("loading model %d: %s\n" % (i, parameters["ensemble"][i]))
            model = load_model(parameters["ensemble"][i],
                               custom_objects={'prox': avg_proximity_metric()})
            predictions.append(
                model.predict(X_vl, verbose=parameters["verbose"]))
        predictions_avg = apply_weights(predictions,
                                        w=parameters["ensemble_weights"])
        print_predictions_output_file(new_inputSet, X_vl, predictions_avg,
                                      predictions_outputFile, parameters)
        if parameters["print_dvs"]:
            deepMirCut.print_classification_values_file(
                new_inputSet,
                predictions_avg,
                parameters,
                output_file=parameters["classification_DVs"])
    else:
        model = load_model(parameters["model"],
                           custom_objects={'prox': avg_proximity_metric()})
Пример #5
0
        opts.update({o: a for o, a in opts_array if o not in longopts_map})
        opts["--validation_file"] = validation_file
    except getopt.GetoptError as err:
        print(err)
        print_usage()
        exit()
    if '--help' in opts:
        print_usage()
        exit()

    parameters = deepMirCut.load_parameters(opts)
    parameters = load_input_output_file_parameters(parameters, opts)

    model = load_model(parameters["model"],
                       custom_objects={
                           'CRF': CRF,
                           'crf_loss': losses.crf_loss,
                           'crf_accuracy': metrics.crf_accuracy,
                           'prox': avg_proximity_metric()
                       })

    model.summary()
    validationSet = deepMirCut.readDataset(parameters["validation_file"],
                                           parameters)
    new_validationSet = deepMirCut.dropLongSequences(validationSet, parameters)
    X_vl, y_vl = deepMirCut.prepareData(new_validationSet, parameters)
    validation_labels = deepMirCut.pred2label(y_vl, parameters)
    validation_pred = model.predict(X_vl, verbose=parameters["verbose"])
    print_cutsite_scores(validationSet, validation_labels, validation_pred,
                         parameters)