Python GetKerasModel примеры использования

Язык программирования: Python

Пространство имен/Пакет: modelhelpers

Класс/Тип: GetKerasModel

Примеров на hotexamples.com: 8

Python GetKerasModel - 8 примеров найдено. Это лучшие примеры Python кода для modelhelpers.GetKerasModel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GetKerasModel(8)

Основные методы

GetKerasModel (8)

Пример #1

Показать файл

def cv(db,
       csv_target,
       csv_descriptors,
       n_splits_,
       n_repeats_,
       num_epochs,
       n_rot_train,
       train_steps_per_epoch_,
       n_rot_test,
       test_steps_per_epoch_,
       ndense_layers,
       nunits,
       nfilters,
       random_state,
       cvout=None,
       fcvgroup=None,
       featimp_out=None,
       y_recalc=False,
       mout=None):
    # Load the dataset
    ai = AIModel(csv_target, db, csv_descriptors)
    available_keys = ai.GetAvailableKeys()
    print("N. instances: %d" % (len(ai.target)))
    predictions = dict()
    valpredictions = dict()
    for key in ai.target.keys():
        predictions[key] = []
        valpredictions[key] = []

    feat_imp = None
    feat_imp_iterations = 20

    if featimp_out is not None:
        # feature importance list for csv descriptors
        if ai.other_descriptors is not None:
            feat_imp = [[] for p in range(ai.nfeatures)]
            # charge voxel descriptor
            feat_imp.append([])
        else:
            print("Feature Importance calculation: DISABLED")

    # Create directory to store all the models
    mout_path = None
    if mout is not None:
        # Utilised to store the out path
        mout_path = Path("%s_%s" % (time.strftime("%Y%m%d%H%M%S"), mout))
        mout_path.mkdir(exist_ok=True, parents=True)
        if ai.other_descriptors is not None:
            # Save the descriptor order
            f = open("%s/odesc_header.csv" % (str(mout_path.absolute())), "w")
            for item in ai.header:
                f.write("%s\n" % (item))
            f.close()
    # Choose between static manual cross validation group or
    # Repeated KFold Cross Validation
    cvmethod = None
    cvgroups = None
    if fcvgroup is not None:
        cvgroups = CVGroupRead(fcvgroup)
        cvmethod = StaticGroupCV(cvgroups)
        # cvmethod = RepeatedStratifiedCV(cvgroups, n_repeats_, 2)
    else:
        cvmethod = RepeatedKFold(available_keys,
                                 n_splits_,
                                 n_repeats_,
                                 random_state,
                                 test_size=0.2)
    cv_ = 0
    for train_keys, val_keys, test_keys in cvmethod:
        print("Train set size: %d Val set size %d Test set size: %d" %
              (len(train_keys), len(val_keys), len(test_keys)))
        # Some memory clean-up
        K.clear_session()
        # print(global_test_intexes)
        model = None
        model_ = GetKerasModel()
        if ai.other_descriptors is None:
            if model_ is None:
                model = build_model(ai.conv3d_chtype, ai.input_shape,
                                    ndense_layers, nunits, nfilters)
            else:
                model = model_(ai.conv3d_chtype, ai.input_shape, ndense_layers,
                               nunits, nfilters)
            # model = model_scirep(ai.conv3d_chtype, ai.input_shape, ndense_layers, nunits, nfilters)
            # model = ResNetModel(ai.input_shape)
            print(model.summary())
        else:
            if model_ is None:
                model = build_2DData_model(ai.conv3d_chtype, ai.input_shape,
                                           ai.nfeatures, ndense_layers, nunits,
                                           nfilters)
            else:
                model = model_(ai.conv3d_chtype, ai.input_shape, ndense_layers,
                               nunits, nfilters)
            """
            for l in model.layers[0].layers:
                print(l.summary())
            """
            print("Total Summary")
            print(model.summary())

        dname = os.path.basename(csv_target).replace(".csv", "")
        log_dir_ = ("./logs/cv%d_%s_%d_#rot%d_#f%d_#dl%d_#u%d_" %
                    (cv_, dname, num_epochs, train_steps_per_epoch_, nfilters,
                     ndense_layers, nunits))
        log_dir_ += time.strftime("%Y%m%d%H%M%S")

        model_outfile = "%s/%d.h5" % (str(mout_path.absolute()), cv_)
        callbacks_ = [
            TensorBoard(log_dir=log_dir_,
                        histogram_freq=0,
                        write_graph=False,
                        write_images=False),
            ModelCheckpoint(model_outfile,
                            monitor='val_loss',
                            verbose=0,
                            save_best_only=True)
        ]

        train_generator = ai.VoxelTrainGenerator(train_keys, n_rot_train)
        x_train_, y_train_ = ai.VoxelTestSetGenerator(train_keys, n_rot_train)
        x_test_, y_test_ = ai.VoxelTestSetGenerator(test_keys, n_rot_test)
        x_val_, y_val_ = ai.VoxelTestSetGenerator(val_keys, n_rot_test)
        val_generator = ai.VoxelTrainGenerator(val_keys, n_rot_test)
        model.fit_generator(
            train_generator,
            epochs=num_epochs,
            steps_per_epoch=train_steps_per_epoch_,
            verbose=1,
            # validation_data=(x_test_, y_test_),
            validation_data=val_generator,
            validation_steps=test_steps_per_epoch_,
            callbacks=callbacks_,
            use_multiprocessing=True)
        """
        if y_recalc is True:
            # Recalculate y it takes a lot of time
            x_dataset_, y_dataset_ = ai.VoxelTestSetGenerator(train_keys, n_rotation_test)
            yrecalc = model.predict(x_dataset_)
            # Store the recalculated y
            k = 0
            c = 0
            for i in range(len(yrecalc)):
                recalc[train_keys[k]].extend(list(yrecalc[i]))
                if c == n_rotation_test-1:
                    k += 1
                    c = 0
                else:
                    c += 1
        """
        """
        test_scores = model.evaluate(x_test_, y_test_)
        print("Test Scores: {}".format(test_scores))
        """
        model = GetLoadModelFnc()(model_outfile)
        y_recalc = model.predict(x_train_)
        ypred_test = model.predict(x_test_)
        ypred_val = model.predict(x_val_)
        # exp_pred_plot(y_test_, ypred_test[:,0])
        r2 = RSQ(y_train_, y_recalc)
        q2 = RSQ(y_test_, ypred_test)
        vr2 = RSQ(y_val_, ypred_val)
        print("Train R2: %.4f Test Q2: %.4f Val: R2: %.4f\n" % (r2, q2, vr2))

        # Store the test prediction result
        k = 0
        c = 0
        for i in range(len(ypred_val)):
            valpredictions[test_keys[k]].append(list(ypred_val[i]))
            if c == n_rot_test - 1:
                k += 1
                c = 0
            else:
                c += 1

        # Store the cross validation result
        k = 0
        c = 0
        for i in range(len(ypred_test)):
            predictions[test_keys[k]].append(list(ypred_test[i]))
            if c == n_rot_test - 1:
                k += 1
                c = 0
            else:
                c += 1
        """
        Compute the feature importance according to the Breiman-Fisher-Rudin-Dominici-Algorithm
        Train a model f with a feature map X and a target vector y. Measure th error L(y, y_pred) = e_original

        Input: trained model f, feature matrix X, target vector y, error measure L(y, y_pred)
        1) Estimate the original model error
        2) For each feature:
          - Generate a feature matrix with the p feature permutated N times to breaks the
            association between Xj and y
          - estimate the error using the permutated X feature matrix
          - calculate the feature importance FI = e_perm/e_original or FI = e_perm - e_original
        3) Sort variables by descending Fi

        The error estimation utilised is the mean squared error calculated with this formula
        mse = (np.square(A - B)).mean(axis=0)
        """
        if feat_imp is not None:
            # e_orig = MSE(list(y_test_), list(ypred))
            e_orig = MAE(list(y_test_), list(ypred_test))
            # calculate the feature importance for the descriptors
            for fid_ in range(ai.nfeatures):
                for it in range(feat_imp_iterations):
                    x_val_perm = ai.FeaturePermutation(x_val_, fid=fid_)
                    ypred_perm = model.predict(x_val_perm)
                    # e_perm = MSE(list(y_test_), list(ypred_perm))
                    e_perm = MAE(list(y_test_), list(ypred_perm))
                    feat_imp[fid_].append(e_perm / e_orig)

            # Calculate the feature importance for the voxel information
            for it in range(feat_imp_iterations):
                x_val_perm = ai.FeaturePermutation(x_val_, fid=9999)
                ypred_perm = model.predict(x_val_perm)
                e_perm = MAE(list(y_test_), list(ypred_perm))
                feat_imp[-1].append(e_perm / e_orig)

        if mout_path is not None:
            model.save("%s/%d.h5" % (str(mout_path.absolute()), cv_))
        # Update the cross validation id
        cv_ += 1

    if cvout is not None:
        WriteCrossValidationOutput(cvout, self.target, predictions, None)

    if feat_imp is not None:
        fo = open("%s" % (featimp_out), "w")
        for i in range(ai.nfeatures):
            """
            fo.write("%s," % (ai.header[i]))
            for j in range(len(feat_imp[i])-1):
                fo.write("%.4f," % (feat_imp[i][j]))
            fo.write("%.4f\n" % (feat_imp[i][-1]))
            """
            a = np.array(feat_imp[i])
            min_a = a.min()
            q1 = np.percentile(a, 25)
            med_a = np.percentile(a, 50)
            q3 = np.percentile(a, 75)
            max_a = a.max()
            fo.write("%s,%.4f,%.4f,%.4f,%.4f,%.4f\n" %
                     (ai.header[i], min_a, q1, med_a, q3, max_a))
        a = np.array(feat_imp[-1])
        min_a = a.min()
        q1 = np.percentile(a, 25)
        med_a = np.percentile(a, 50)
        q3 = np.percentile(a, 75)
        max_a = a.max()
        fo.write("%s,%.4f,%.4f,%.4f,%.4f,%.4f\n" %
                 ("qm_voxel_charge", min_a, q1, med_a, q3, max_a))
        """
        fo.write("%s,\n" % ("qm_voxel_charge"))
        for j in range(len(feat_imp[-1])-1):
            fo.write("%.4f," % (feat_imp[-1][j]))
        fo.write("%.4f\n" % (feat_imp[-1][-1]))
        """
        fo.close()

    ycvp = {}
    for key in predictions.keys():
        if len(predictions[key]) > 0:
            ycvp[key] = np.mean(predictions[key])
        else:
            continue
    return ycvp

Пример #2

Показать файл

    def runcv(self,
              batch_size_,
              batch_mode_,
              num_epochs,
              ndense_layers,
              nunits,
              cvout,
              n_splits=5,
              n_repeats=10,
              random_state=None,
              mout=None,
              fimpfile=None):
        print("N. instances: %d" % (len(self.target)))

        mout_path = None
        if mout is not None:
            # Utilised to store the out path
            # mout_path = Path("%s_%s" % (time.strftime("%Y%m%d%H%M%S"), mout))
            mout_path = Path(mout)
        else:
            # Utilised to store the out path
            mout_path = Path("%s_model" % (time.strftime("%Y%m%d%H%M%S")))

        last_model = None
        if mout_path.exists() is True:
            # Find the last model and restart the calculation from it.
            p = Path(mout_path).glob('**/*.h5')
            # getonlzfile numbers
            mids = [int(x.stem) for x in p if x.is_file()]
            if len(mids) > 0:
                # Restart from here...
                last_model = max(mids)
            else:
                last_model = None
        else:
            mout_path.mkdir()
            # Save the descriptor order
            f = open("%s/odesc_header.csv" % (str(mout_path.absolute())), "w")
            for dname in self.xheader:
                f.write("%s\n" % (dname))
            f.close()

        feat_imp = {}
        if fimpfile is not None:
            for feat_name in self.xheader:
                feat_imp[feat_name] = {'mae': [], 'mse': []}

        cv_ = 0
        predictions = {}
        recalc = {}
        for key in self.target.keys():
            predictions[key] = []
            recalc[key] = []

        valfn = GetValidationFnc()
        if valfn is None:
            valfn = RepeatedKFold(list(self.target.keys()),
                                  n_splits,
                                  n_repeats,
                                  random_state=random_state,
                                  test_size=0.2)
        else:
            print("Using custom validation split function")
            valfn = valfn(list(self.target.keys()))

        for train_keys, val_keys, test_keys in valfn:
            # Some memory clean-up
            K.clear_session()
            train_steps_per_epoch = ceil(len(train_keys) / float(batch_size_))
            train_generator = self.DataGenerator(train_keys, batch_size_,
                                                 batch_mode_)
            # x_train, y_train = self.GenData(train_keys)
            # test_steps_per_epoch = ceil(len(train_keys)/float(batch_size_))
            # test_generator = self.DataGenerator(test_keys, batch_size_)
            x_test, y_test = self.GenData(test_keys)
            x_val, y_val = self.GenData(val_keys)
            print("Train set size: %d Val set size %d Test set size: %d" %
                  (len(train_keys), len(val_keys), len(test_keys)))
            model_output = "%s/%d.h5" % (str(mout_path.absolute()), cv_)

            if last_model is None:
                model = None
                model_ = GetKerasModel()
                if model_ is None:
                    model = example_build_model(self.nfeatures, nunits,
                                                ndense_layers, self.ntargets)
                else:
                    model = model_(self.nfeatures, nunits, ndense_layers)

                print(model.summary())
                dname = cvout.replace(".csv", "")
                b = batch_size_
                log_dir_ = ("./logs/cv%d_%s_#b%d_#e%d_#u%d_#dl%d_" %
                            (cv_, dname, b, num_epochs, nunits, ndense_layers))
                log_dir_ += time.strftime("%Y%m%d%H%M%S")
                callbacks_ = [
                    TensorBoard(log_dir=log_dir_,
                                histogram_freq=0,
                                write_graph=False,
                                write_images=False),
                    ModelCheckpoint(model_output,
                                    monitor='val_loss',
                                    verbose=0,
                                    save_best_only=True)
                ]

                model.fit_generator(
                    train_generator,
                    steps_per_epoch=train_steps_per_epoch,
                    epochs=num_epochs,
                    verbose=self.verbose,
                    validation_data=(x_val, y_val),
                    # validation_data=test_generator,
                    # validation_steps=test_steps_per_epoch,
                    callbacks=callbacks_)
            else:
                if last_model - 1 == cv_:
                    last_model = None

            model_ = GetLoadModelFnc()(model_output)

            y_recalc_train = self.makePrediction(model_, train_keys)
            y_pred_val = self.makePrediction(model_, val_keys)
            y_pred_test = self.makePrediction(model_, test_keys)

            y_recalc = []
            y_true_recalc = []
            for key in train_keys:
                y_recalc.append(y_recalc_train[key])
                y_true_recalc.append(self.target[key])
                recalc[key].append(y_recalc_train[key])

            ypred_val = []
            ytrue_val = []
            for key in val_keys:
                ypred_val.append(y_pred_val[key])
                ytrue_val.append(self.target[key])

            ypred_test = []
            ytrue_test = []
            for key in test_keys:
                ypred_test.append(y_pred_test[key])
                ytrue_test.append(self.target[key])
                # Store validation prediction
                predictions[key].append(y_pred_test[key])

            r2 = RSQ(y_true_recalc, y_recalc)
            q2 = RSQ(ytrue_test, ypred_test)
            tr2 = RSQ(ytrue_val, ypred_val)
            print("Train R2: %.4f Test Q2: %.4f Val: R2: %.4f\n" %
                  (r2, q2, tr2))

            # Store the cross validation model
            # if mout_path is not None:
            #    model.save("%s/%d.h5" % (str(mout_path.absolute()), cv_))

            if fimpfile is not None:
                fimp = FeatureImportance(model, x_test, y_test, self.xheader)
                fires = fimp.Calculate(verbose=1)
                for key in fires.keys():
                    feat_imp[key]['mae'].extend(fires[key]['mae'])
                    feat_imp[key]['mse'].extend(fires[key]['mse'])
            cv_ += 1

        WriteCrossValidationOutput(cvout, self.target, predictions, recalc)

        if fimpfile is not None:
            WriteFeatureImportance(feat_imp, fimpfile)

Пример #3

Показать файл

def simplerun(db,
              csv_target,
              csv_descriptors,
              num_epochs,
              n_rot_train,
              train_steps_per_epoch_,
              n_rotation_test,
              test_steps_per_epoch_,
              ndense_layers,
              nunits,
              nfilters,
              random_state,
              outmodel=None,
              fcvgroup=None,
              tid=None):
    # Load the dataset
    ai = AIModel(csv_target, db, csv_descriptors)
    available_keys = ai.GetAvailableKeys()
    train_keys = None
    test_keys = None

    if fcvgroup is not None:
        cvgroups = CVGroupRead(fcvgroup)
        tkey = None
        if tid is not None:
            tkey = int(tid)
            print(tkey)
        else:
            tkey = random.choice(list(cvgroups.keys()))
        print(cvgroups[tkey])
        test_keys = cvgroups[tkey]
        train_keys = []
        for key in cvgroups.keys():
            if key == tkey:
                continue
            else:
                train_keys.textened(cvgroups[key])
    else:
        ttfn = GetTrainTestFnc()
        if ttfn is None:
            ttfn = TrainTestSplit
        else:
            print("Using custom train/test split function")
        train_keys, test_keys = ttfn(list(self.target.keys()),
                                     test_size=0.20,
                                     random_state=random_state)

    print("Trainin set size: %d Validation set size %d" %
          (len(train_keys), len(test_keys)))

    train_generator = ai.VoxelTrainGenerator(train_keys, n_rot_train)

    print(train_keys)
    print(test_keys)

    model = None
    model_ = GetKerasModel()
    if ai.other_descriptors is None:
        if model_ is None:
            model = build_model(ai.conv3d_chtype, ai.input_shape,
                                ndense_layers, nunits, nfilters)
        else:
            model = model_(ai.conv3d_chtype, ai.input_shape, ndense_layers,
                           nunits, nfilters)
        # model = build_fcn_model(ai.conv3d_chtype, ai.input_shape, ndense_layers, nunits, nfilters)
        # model = model_scirep(ai.conv3d_chtype, ai.input_shape, ndense_layers, nunits, nfilters)
        # model = ResNetModel(ai.input_shape)
        print(model.summary())
    else:
        if model_ is None:
            model = build_2DData_model(ai.conv3d_chtype, ai.input_shape,
                                       ai.nfeatures, ndense_layers, nunits,
                                       nfilters)
        else:
            model = model_(ai.conv3d_chtype, ai.input_shape, ai.nfeatures,
                           ndense_layers, nunits, nfilters)
        """
        for l in model.layers[0].layers:
            print(l.summary())
        """
        print("Total Summary")
        print(model.summary())
    plot_model(model, to_file="model.png", show_shapes=True)

    dname = os.path.basename(csv_target).replace(".csv", "")
    dname += os.path.basename(db)
    log_dir_ = ("./logs/%s_%d_#rot%d_#f%d_#dl%d_#u%d_" %
                (dname, num_epochs, train_steps_per_epoch_, nfilters,
                 ndense_layers, nunits))
    log_dir_ += time.strftime("%Y%m%d%H%M%S")
    callbacks_ = [
        TensorBoard(log_dir=log_dir_,
                    histogram_freq=0,
                    write_graph=False,
                    write_images=False)
    ]
    """
    ,
                  EarlyStopping(monitor='val_loss',
                                min_delta=0,
                                patience=3,
                                verbose=0,
                                mode='auto')
    """

    test_generator = ai.VoxelTrainGenerator(test_keys, n_rot_train)

    model.fit_generator(
        train_generator,
        epochs=num_epochs,
        steps_per_epoch=train_steps_per_epoch_,
        verbose=1,
        # max_queue_size=2,
        # workers=0,
        # validation_data=(x_test_, y_test_),
        validation_data=test_generator,
        validation_steps=test_steps_per_epoch_,
        # nb_val_samples=x_test.shape[0],
        callbacks=callbacks_,
        use_multiprocessing=True)

    x_test_, y_test_ = ai.VoxelTestSetGenerator(test_keys, n_rotation_test)
    y_pred_ = model.predict(x_test_)
    print("Test R2: %.4f" % (r2_score(y_test_, y_pred_)))

    fo = open("statconf.csv", "w")
    for key in ai.statvoxconf.keys():
        fo.write("%s," % (key))
        for i in range(len(ai.statvoxconf[key])):
            for j in range(len(ai.statvoxconf[key][i])):
                fo.write("%d," % (ai.statvoxconf[key][i][j]))
        fo.write("\n")
    fo.close()
    # score = model.evaluate(x_test_, y_test_, verbose=0)
    # print(score)

    if outmodel is not None:
        model.save(outmodel)

Пример #4

Показать файл

    def simplerun(self,
                  batch_size_,
                  batch_mode_,
                  num_epochs,
                  ndense_layers,
                  nunits,
                  random_state,
                  model_output=None):
        """
        Run a simple model...
        """
        # train_keys, test_keys = MDCTrainTestSplit(self.target, 0)
        # train_keys, test_keys = DISCTrainTestSplit(self.target)

        ttfn = GetTrainTestFnc()
        if ttfn is None:
            ttfn = TrainTestSplit
        else:
            print("Using custom train/test split function")

        train_keys, test_keys = ttfn(list(self.target.keys()),
                                     test_size=0.20,
                                     random_state=random_state)

        print("Train set size: %d Test set size %d" %
              (len(train_keys), len(test_keys)))

        model = None
        if model_output is not None and Path(model_output).is_file():
            model = GetLoadModelFnc()(model_output)
        else:
            model_ = GetKerasModel()
            if model_ is None:
                model = example_build_model(self.nfeatures, nunits,
                                            ndense_layers, self.ntargets)
            else:
                model = model_(self.nfeatures, nunits, ndense_layers)
        print(model.summary())

        train_steps_per_epoch = ceil(len(train_keys) / float(batch_size_))
        train_generator = self.DataGenerator(train_keys, batch_size_,
                                             batch_mode_)

        #x_train, y_train = self.GenData(train_keys)

        # This is unstable
        # test_steps_per_epoch = ceil(len(train_keys)/float(batch_size_))
        # test_generator = self.DataGenerator(test_keys, batch_size_)
        # This is more stable
        x_test, y_test = self.GenData(test_keys)

        b = batch_size_
        log_dir_ = ("./logs/#b%d_#e%d_#u%d_#dl%d_" %
                    (b, num_epochs, nunits, ndense_layers))
        log_dir_ += time.strftime("%Y%m%d%H%M%S")

        callbacks_ = [
            TensorBoard(log_dir=log_dir_,
                        histogram_freq=0,
                        write_graph=False,
                        write_images=False)
        ]
        """
        model.fit(x_train, y_train,
                  epochs=num_epochs,
                  batch_size=b,
                  verbose=self.verbose,
                  validation_data=(x_test, y_test),
                  callbacks=callbacks_)

        yrecalc_train = model.predict(x_train)

        """

        model.fit_generator(
            train_generator,
            steps_per_epoch=train_steps_per_epoch,
            epochs=num_epochs,
            verbose=1,
            validation_data=(x_test, y_test),
            # validation_data=test_generator,
            # validation_steps=test_steps_per_epoch,
            callbacks=callbacks_)

        y_recalc_train = self.makePrediction(model, train_keys)
        y_pred_test = self.makePrediction(model, test_keys)

        ytrain_recalc = []
        ytrain_true = []
        for key in train_keys:
            ytrain_recalc.append(y_recalc_train[key])
            ytrain_true.append(self.target[key])

        ytest_pred = []
        ytest_true = []
        for key in test_keys:
            ytest_pred.append(y_pred_test[key])
            ytest_true.append(self.target[key])

        print("R2: %.4f Q2: %.4f MSE: %.4f" %
              (RSQ(ytrain_true, ytrain_recalc), RSQ(
                  ytest_pred, ytest_true), MSE(ytest_pred, ytest_true)))

        fo = open("%s_pred.csv" % time.strftime("%Y%m%d%H%M%S"), "w")
        for i in range(len(ytest_true)):
            fo.write("%s" % (test_keys[i]))
            for j in range(len(ytest_true[i])):
                fo.write(",%f,%f" % (ytest_true[i][j], ytest_pred[i][j]))
            fo.write("\n")
        fo.close()

        if model_output is not None:
            model.save(model_output)

Пример #5

Показать файл

    def GridSearch(self,
                   batch_size_,
                   steps_per_epoch_,
                   num_epochs,
                   random_state,
                   gmout="GridSearchResult"):

        train_keys, test_keys = TrainTestSplit(list(self.target.keys()),
                                               test_size=0.20,
                                               random_state=random_state)
        print("Train set size: %d Test set size %d" % (len(train_keys),
                                                       len(test_keys)))

        # train_steps_per_epoch = ceil(len(train_keys)/float(batch_size_))
        # train_generator = self.DataGenerator(train_keys, batch_size_)

        x_train, y_train, rtrain_keys = self.GenData(train_keys)

        # This is unstable
        # test_steps_per_epoch = ceil(len(train_keys)/float(batch_size_))
        # test_generator = self.DataGenerator(test_keys, batch_size_)
        # This is more stable
        x_test, y_test, rtest_keys = self.GenData(test_keys)

        # PARAMETER DEFINITIONS
        # simple architecture
        """
        param = {}
        param["nunits"] = [100, 200, 400]
        param["ndense_layers"] = [2, 4, 6]
        param["dropout"] = ["on", "off"]
        #param["activation"] = ["relu", "leakyrelu"]
        param["activation"] = ["relu"]
        """

        # resnet architecture
        param = {}
        param["nunits"] = [200, 400,  600, 800]
        param["ndense_layers"] = [2, 4, 6, 8]

        all_combo = list(ParameterGrid(param))
        print("Evaluating %d combinations of parameters" % (len(all_combo)))

        already_computed_combo = []
        if Path(gmout).is_file():
            fi = open(gmout, "r")
            for line in fi:
                v = str.split(line.strip(), " ")
                """
                # simple architecture
                units = v[0]
                layers = v[1]tensorflow
                act = v[2]
                drop = v[3]
                s = ("%s-%s-%s-%s" % (units, layers, act, drop))
                """
                # resnet architecture
                units = v[0]
                layers = v[1]
                s = ("%s-%s" % (units, layers))
                already_computed_combo.append(s)
            fi.close()
        model_ = GetKerasModel()
        for c in all_combo:
            """
            # simple architecture
            s = ("%s-%s-%s-%s" % (c["nunits"],
                                  c["ndense_layers"],
                                  c["activation"],
                                  c["dropout"]))
            """
            # resnet architecture
            s = ("%s-%s" % (c["nunits"], c["ndense_layers"]))
            if s in already_computed_combo:
                print("%s already computed... skip..." % (s))
            else:
                """
                model = build_gridsearch_model(self.nfeatures,
                                              c["ndense_layers"],
                                              c["nunits"],
                                              c["activation"],
                                              c["dropout"])
                """
                if model_ is None:
                    model = example_build_model(self.nfeatures,
                                                c["nunits"],
                                                c["ndense_layers"])
                else:
                    model = model_(self.nfeatures,
                                   c["nunits"],
                                   c["ndense_layers"])

                """
                model = build_dnn_resnet_model(self.nfeatures,
                                               c["nunits"],
                                               c["ndense_layers"])
                """

                print(model.summary())
                b = batch_size_
                """
                model_name = ("#b%d_#e%d_#u%d_#dl%d_act-%s_dp-%s" % (b,
                                                                    num_epochs,
                                                                    c["nunits"],
                                                                    c["ndense_layers"],
                                                                    c["activation"],
                                                                    c["dropout"]))
                """

                model_name = ("#b%d_#e%d_#u%d_#dl%d" % (b,
                                                        num_epochs,
                                                        c["nunits"],
                                                        c["ndense_layers"]))
                log_dir_ = ("./logs/%s" % (model_name))

                log_dir_ += time.strftime("%Y%m%d%H%M%S")

                model_output = "%s.h5" % (model_name)
                callbacks_ = [TensorBoard(log_dir=log_dir_,
                                          histogram_freq=0,
                                          write_graph=False,
                                          write_images=False),
                              ModelCheckpoint(model_output,
                                              monitor='val_loss',
                                              verbose=0,
                                              save_best_only=True)]
                """
                callbacks_ = [TensorBoard(log_dir=log_dir_,
                                          histogram_freq=0,
                                          write_graph=False,
                                          write_images=False),
                              EarlyStopping(monitor='val_loss',
                                            min_delta=0,
                                            patience=50,
                                            verbose=0,
                                            mode='auto')]
                """

                model.fit(x_train, y_train,
                          epochs=num_epochs,
                          batch_size=b,
                          steps_per_epoch=steps_per_epoch_,
                          verbose=self.verbose,
                          validation_data=(x_test, y_test),
                          callbacks=callbacks_)

                bestmodel = load_model(model_output,
                                       custom_objects={"score": score})

                yrecalc_train = bestmodel.predict(x_train)

                """

                model.fit_generator(train_generator,
                                    steps_per_epoch=train_steps_per_epoch,
                                    epochs=num_epochs,
                                    verbose=1,
                                    validation_data=(x_test, y_test),
                                    # validation_data=test_generator,
                                    # validation_steps=test_steps_per_epoch,
                                    callbacks=callbacks_)


                yrecalc_train = []
                y_train = []
                for key in train_keys:
                    a = np.array([self.X_raw[key]])
                    yrecalc_train.extend(model.predict(a))
                    y_train.append(self.target[key])
                """
                ypred_test = bestmodel.predict(x_test)
                r2 = r2_score(y_train, yrecalc_train)
                mse_train = mse(y_train, yrecalc_train)
                mae_train = mae(y_train, yrecalc_train)
                q2 = r2_score(y_test, ypred_test)
                mse_test = mse(y_test, ypred_test)
                mae_test = mae(y_test, ypred_test)
                train_score = LOGMAE(y_train, yrecalc_train)
                test_score = LOGMAE(y_test, ypred_test)
                print("R2: %.4f Train Score: %f Q2: %.4f Test Score: %f" % (r2, train_score, q2, test_score))

                fo = open("%s" % (gmout), "a")
                """
                # simple architecture
                fo.write("%d %d %s %s %f %f %f %f %f %f %f %f\n" % (c["nunits"],
                                                                    c["ndense_layers"],
                                                                    c["activation"],
                                                                    c["dropout"],
                                                                    mse_train,
                                                                    mae_train,
                                                                    r2,
                                                                    train_score,
                                                                    mse_test,
                                                                    mae_test,
                                                                    q2,
                                                                    test_score))
                """
                # resnet architecture
                fo.write("%d %d %f %f %f %f %f %f %f %f\n" % (c["nunits"],
                                                              c["ndense_layers"],
                                                              mse_train,
                                                              mae_train,
                                                              r2,
                                                              train_score,
                                                              mse_test,
                                                              mae_test,
                                                              q2,
                                                              test_score))
                fo.close()

Пример #6

Показать файл

    def runloo(self, batch_size_, num_epochs, ndense_layers, nunits, cvout):
        print("N. instances: %d" % (len(self.target)))
        predictions = dict()
        for val_key in self.target.keys():
            sub_target = {}
            for key in self.target.keys():
                if val_key == key:
                    continue
                else:
                    sub_target[key] = self.target[key]
                    # train_keys.append(key)
            print("Validating %s" % (val_key))

            # train_keys, test_keys = MDCTrainTestSplit(sub_target, 0)
            train_keys, test_keys = TrainTestSplit(sub_target, test_size=0.20)
            x_train, y_train, rtrain_keys = self.GenData(train_keys)
            x_test, y_test, rtest_keys = self.GenData(test_keys)

            model = None
            model_ = GetKerasModel()
            if model_ is None:
                model = example_build_model(self.nfeatures,
                                            nunits,
                                            ndense_layers)
            else:
                model = model_(self.nfeatures,
                               nunits,
                               ndense_layers)

            print(model.summary())
            b = 0
            if batch_size_ is None:
                b = len(x_test)
            else:
                b = batch_size_
            log_dir_ = ("./logs/%s_#b%d_#e%d_#u%d_#dl%d_" % (val_key,
                                                             b,
                                                             num_epochs,
                                                             nunits,
                                                             ndense_layers))
            log_dir_ += time.strftime("%Y%m%d%H%M%S")

            callbacks_ = [TensorBoard(log_dir=log_dir_,
                                      histogram_freq=0,
                                      write_graph=False,
                                      write_images=False)]
            """
            callbacks_ = [TensorBoard(log_dir=log_dir_,
                                      histogram_freq=0,
                                      write_graph=False,
                                      write_images=False),
                          EarlyStopping(monitor='val_loss',
                                        min_delta=0,
                                        patience=3,
                                        verbose=0,
                                        mode='auto')]
            """

            model.fit(x_train, y_train,
                      epochs=num_epochs,
                      batch_size=b,
                      verbose=1,
                      validation_data=(x_test, y_test),
                      callbacks=callbacks_)

            predictions[val_key] = model.predict(x_test)[0]

        fo = open(cvout, "w")
        for key in predictions.keys():
            fo.write("%s,%.4f,%.4f\n" % (key,
                                         self.target[key],
                                         predictions[key]))
        fo.close()

Пример #7

Показать файл

    def runcv(self,
              batch_size_,
              num_epochs,
              steps_per_epoch_,
              nfilters,
              nunits,
              random_state,
              cvout,
              n_splits=5,
              n_repeats=10,
              mout=None):
        print("N. instances: %d" % (len(self.target)))

        mout_path = None
        if mout is not None:
            # Utilised to store the out path
            mout_path = Path("%s_%s" % (time.strftime("%Y%m%d%H%M%S"), mout))
            mout_path.mkdir()
            # Save the descriptor order
            """
            f = open("%s/odesc_header.csv" % (str(mout_path.absolute())), "w")
            for name in self.xheader:
                f.write("%s\n" % (name))
            f.close()
            """

        cv_ = 0
        predictions = {}
        recalc = {}
        for key in self.target.keys():
            # N.B.: each molecule can have multiple outputs.
            predictions[key] = []
            recalc[key] = []

        for train_keys, val_keys, test_keys in RepeatedKFold(list(self.target.keys()),
                                                             n_splits,
                                                             n_repeats,
                                                             random_state=random_state,
                                                             test_size=0.2):
            print("Train set size: %d Val set size %d Test set size: %d" % (len(train_keys),
                                                                            len(val_keys),
                                                                            len(test_keys)))
            x_train, y_train, rtrain_keys = self.GenData(train_keys)
            x_val, y_val, rval_keys = self.GenData(val_keys)
            x_test, y_test, rtest_keys = self.GenData(test_keys)

            # Some memory clean-up
            K.clear_session()
            model = None
            model_ = GetKerasModel()
            if self.dx is not None:
                print("Number of descriptors: %d" % (self.n_descs))
                if model_ is None:
                    model = example_build_2DData_model(self.db.input_shape,
                                                       self.n_descs,
                                                       nfilters,
                                                       nunits)
                else:
                    model = model_(self.db.input_shape,
                                   self.n_descs,
                                   nfilters,
                                   nunits)
            else:
                if model_ is None:
                    model = example_build_model(self.db.input_shape,
                                                nfilters,
                                                nunits)
                else:
                    model = model_(self.db.input_shape,
                                   nfilters,
                                   nunits)

            print(model.summary())
            dname = cvout.replace(".csv", "")
            b = 0
            if batch_size_ is None:
                b = len(x_val)
            else:
                b = batch_size_

            name = "cv%d_%s_#b%d_#e%d_#u%d_#f%d_" % (cv_,
                                                     dname,
                                                     b,
                                                     num_epochs,
                                                     nunits,
                                                     nfilters)
            name += time.strftime("%Y%m%d%H%M%S")
            log_dir_ = ("./logs/%s" % (name))

            model_output = None
            if mout_path is not None:
                model_output = "%s/%d.h5" % (str(mout_path.absolute()), cv_)
            if model_output is None:
                callbacks_ = [TensorBoard(log_dir=log_dir_,
                                          histogram_freq=0,
                                          write_graph=False,
                                          write_images=False)]
            else:
                callbacks_ = [TensorBoard(log_dir=log_dir_,
                                          histogram_freq=0,
                                          write_graph=False,
                                          write_images=False),
                              ModelCheckpoint(model_output,
                                              monitor='val_loss',
                                              verbose=0,
                                              save_best_only=True)]

            train_generator = self.DataGenerator(train_keys, batch_size_)
            model.fit_generator(train_generator,
                                steps_per_epoch=steps_per_epoch_,
                                epochs=num_epochs,
                                verbose=1,
                                validation_data=(x_val, y_val),
                                # validation_data=test_generator,
                                # validation_steps=test_steps_per_epoch,
                                callbacks=callbacks_,
                                use_multiprocessing=True)
            """
            model.fit(x_train, y_train,
                      epochs=num_epochs,
                      batch_size=b,
                      steps_per_epoch=steps_per_epoch_,
                      verbose=1,
                      validation_data=(x_val, y_val),
                      callbacks=callbacks_)
            """
            # WARNING Implement cross validation results for multiple outputs
            bestmodel = load_model(model_output,
                                   custom_objects={"score": score})
            yrecalc = bestmodel.predict(x_train)
            for i in range(len(yrecalc)):
                recalc[train_keys[i]].append(list(yrecalc[i]))

            ypred_val = bestmodel.predict(x_val)
            print("Test R2: %.4f" % (r2_score(y_val, ypred_val)))

            ypred_test = bestmodel.predict(x_test)
            # exp_pred_plot(y_val_, ypred[:,0])
            print("Validation R2: %.4f" % (r2_score(y_test, ypred_test)))
            for i in range(len(ypred_test)):
                predictions[test_keys[i]].append(list(ypred_test[i]))

            """
            if fimpfile is not None:
                fimp = FeatureImportance(model, x_val, y_val, self.xheader)
                fires = fimp.Calculate(verbose=1)
                for key in fires.keys():
                    feat_imp[key]['mae'].extend(fires[key]['mae'])
                    feat_imp[key]['mse'].extend(fires[key]['mse'])
            """
            cv_ += 1

        WriteCrossValidationOutput(cvout, self.target, predictions, recalc)

Пример #8

Показать файл

    def simplerun(self,
                  batch_size_,
                  num_epochs,
                  steps_per_epoch_,
                  nfilters,
                  nunits,
                  random_state,
                  mout=None):
        print("N. instances: %d" % (len(self.target)))
        
        ttfn = GetTrainTestFnc()
        if ttfn is None:
            ttfn = TrainTestSplit
        else:
            print("Using custom train/test split function")
            
        train_keys, test_keys = ttfn(list(self.target.keys()),
                                     test_size=0.20,
                                     random_state=random_state)
        
        print("Train set size: %d Test set size %d" % (len(train_keys),
                                                       len(test_keys)))
        model = None
        model_ = GetKerasModel()

        if self.dx is not None:
            print("Number of descriptors: %d" % (self.n_descs))
            if model_ is None:
                model = example_build_2DData_model(self.db.input_shape,
                                                   self.n_descs,
                                                   nfilters,
                                                   nunits)
            else:
                model = model_(self.db.input_shape,
                               self.n_descs,
                               nfilters,
                               nunits)
        else:
            if model_ is None:
                model = example_build_model(self.db.input_shape,
                                            nfilters,
                                            nunits)
            else:
                model = model_(self.db.input_shape, nfilters, nunits)

        print(model.summary())

        x_train, y_train, rtrain_keys = self.GenData(train_keys)
        x_test, y_test, rtest_keys = self.GenData(test_keys)
        if self.dx is not None:
            print("Branch 1 size:", np.array(x_train[0]).shape)
            print("Branch 2 size:", np.array(x_train[1]).shape)
        else:
            print(x_train.shape)

        print(y_train.shape)
        b = 0
        if batch_size_ is None:
            b = len(x_test)
        else:
            b = batch_size_

        name = "#b%d_#e%d_#u%d_#f%d_" % (b,
                                         num_epochs,
                                         nunits,
                                         nfilters)
        name += time.strftime("%Y%m%d%H%M%S")
        log_dir_ = ("./logs/%s" % (name))

        callbacks_ = [TensorBoard(log_dir=log_dir_,
                                  histogram_freq=0,
                                  write_graph=False,
                                  write_images=False)]
        """
        callbacks_ = [TensorBoard(log_dir=log_dir_,
                                  histogram_freq=0,
                                  wx_trainrite_graph=False,
                                  write_images=False),
                      EarlyStopping(monitor='val_loss',
                                    min_delta=0,
                                    patience=50,
                                    verbose=0,
                                    mode='auto')]
        """
        #train_steps_per_epoch = int(np.ceil(len(train_keys)/float(batch_size_)))
        train_generator = self.DataGenerator(train_keys, batch_size_)
        model.fit_generator(train_generator,
                            steps_per_epoch=steps_per_epoch_,
                            epochs=num_epochs,
                            verbose=1,
                            validation_data=(x_test, y_test),
                            # validation_data=test_generator,
                            # validation_steps=test_steps_per_epoch,
                            callbacks=callbacks_,
                            use_multiprocessing=True)
        """
        model.fit(x_train, y_train,
                  epochs=num_epochs,
                  batch_size=b,
                  verbose=self.verbose,
                  validation_data=(x_test, y_test),
                  callbacks=callbacks_)
        """
        yrecalc = model.predict(x_train)
        ypred_test = model.predict(x_test)
        fo = open("%s_pred.csv" % (name), "w")
        if ypred_test.shape[1] > 1:
            for i in range(len(rtest_keys)):
                fo.write("%s," % (rtest_keys[i]))
                for j in range(len(y_test[i])-1):
                    fo.write("%f,%f," % (y_test[i][j], ypred_test[i][j]))
                fo.write("%f,%f\n" % (y_test[i][-1], ypred_test[i][-1]))
            fo.close()
            # Then calculate R2 and Q2 for each output...
            for j in range(ypred_test.shape[1]):
                y_train_ = []
                yrecalc_ = []
                y_test_ = []
                ypred_test_ = []
                for i in range(ypred_test.shape[0]):
                    y_train_.append(y_train[i][j])
                    yrecalc_.append(yrecalc[i][j])
                    y_test_.append(y_test[i][j])
                    ypred_test_.append(ypred_test[i][j])
                print("Output %d R2: %.4f Q2: %.4f" % (j,
                                                       r2_score(y_train_, yrecalc_),
                                                       r2_score(y_test_, ypred_test_)))
        else:
            for i in range(len(rtest_keys)):
                fo.write("%s," % (rtest_keys[i]))
                for j in range(len(y_test[i])):
                    fo.write("%f,%f" % (y_test[i],
                                        ypred_test[i]))
                fo.write("\n")
            fo.close()
            print("R2: %.4f Q2: %.4f" % (r2_score(y_train, yrecalc),
                                         r2_score(y_test, ypred_test)))