Пример #1
0
def main(_):
    config = flags.FLAGS
    if config.mode == "train":
        train(config)
    elif config.mode == "data":
        train_test_split(config)
    elif config.mode == "sn":
        test_spectral_norm()
    elif config.mode == "trace":
        test_trace_approximation()
Пример #2
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # convert the nominal y values to binary
    y = to_categorical(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    #mlp
    clf = MultilayerPerceptron(n_hidden=16,
                               n_iterations=1000,
                               learning_rate=0.01)
    clf.fit(X_train, y_train)
    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Multilayer Perceptron",
                      accuracy=accuracy,
                      legend_labels=np.unique(y))
Пример #3
0
def main():
    @run_time
    def batch():
        print("Tesing the accuracy of LogisticRegression(batch)...")
        # Train model
        clf = LogisticRegression()
        clf.fit(X=X_train, y=y_train, lr=0.008, epochs=5000)
        # Model accuracy
        get_acc(clf, X_test, y_test)

    @run_time
    def stochastic():
        print("Tesing the accuracy of LogisticRegression(stochastic)...")
        # Train model
        clf = LogisticRegression()
        clf.fit(X=X_train, y=y_train, lr=0.01, epochs=200,
                method="stochastic", sample_rate=0.5)
        # Model accuracy
        get_acc(clf, X_test, y_test)

    # Load data
    X, y = load_breast_cancer()
    X = min_max_scale(X)
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
    batch()
    stochastic()
Пример #4
0
    def __init__(self):
        '''
        Initialize and load dataset
        '''
        # Get all stop words
        self.stop_words = set(stopwords.words('english'))

        # Lemmatizer
        self.lemmatizer = WordNetLemmatizer()

        # stemmer
        self.stemmer = PorterStemmer()

        # Tokenizer
        self.tokenizer = RegexpTokenizer(r'\w+')

        # Load data
        self.load_data()

        X_train, y_train, X_test, y_test = train_test_split(self.X, self.y)

        # Train model
        self.train(X_train, y_train)

        # Evaluate
        accuracy, f1measure = self.evaulate(X_test, y_test)

        print('Accuracy: {:.3f}, F1-score: {:.3f}'.format(accuracy, f1measure))
Пример #5
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    # One-hot encoding of nominal y-values
    y = to_categorical(y)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=8)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     learning_rate=0.001,
                     loss=CrossEntropy,
                     activation_function=Sigmoid)
    clf.fit(X_train, y_train)

    y_pred = np.argmax(clf.predict(X_test), axis=1)
    y_test = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Perceptron",
                      accuracy=accuracy,
                      legend_labels=np.unique(y))
Пример #6
0
    def fit(self, X, z, split_size):
        """Searches for the optimal hyperparameter combination."""
        # model and params are now lists --> sende med navn istedenfor.
        # Setup
        self.results = {self.name: []}
        self.train_scores_mse, self.test_scores_mse = [], []
        self.train_scores_r2, self.test_scores_r2 = [], []

        # Splitting our original dataset into test and train.
        X_train, X_test, z_train, z_test = train_test_split(
            X, z, split_size=split_size, random_state=105)

        " Returning these dictionaries to plot mse vs model"
        self.mse_test = []
        self.mse_train = []
        self.r2_test = []
        self.r2_train = []
        self.z_pred = []
        self.coef_ = []
        # For en model tester vi alle parameterne og returnerer denne.
        for param in self.params:
            estimator = self.model(lmd=param)
            # Train a model for this pair of lambda and random state
            estimator.fit(X_train, z_train)
            temp = estimator.predict(X_test)
            temp2 = estimator.predict(X_train)
            self.mse_test.append(mean_squared_error(z_test, temp))
            self.mse_train.append(mean_squared_error(z_train, temp2))
            self.r2_test.append(r2_score(z_test, temp))
            self.r2_train.append(r2_score(z_train, temp2))
            self.z_pred.append(temp)
            self.coef_.append(estimator.coef_)
        return self
Пример #7
0
def main():
    X, y = make_regression(n_samples=100, n_features=1, noise=20)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    n_samples, n_features = np.shape(X)
    model = LinearRegression(n_iterations=100)
    model.fit(X_train, y_train)

    #training error plot
    n = len(model.training_errors)
    training,=plt.plot(range(n),model.training_errors, label="training error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel("Mean Squared Error")
    plt.xlabel("Iterations")
    plt.show()

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s"%mse)

    y_pred_line = model.predict(X)

    #color map
    cmap = plt.get_cmap("viridis")

    #plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9),s = 10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366*X, y_pred_line, color="black", linewidth=2, label="Prediction")
    plt.suptitle("Linear Regression")
    plt.title("MSE: %.2f"% mse, fontsize=10)
    plt.xlabel("Day")
    plt.ylabel("Temperature in Celcius")
    plt.legend((m1, m2),("Training data", "Test data"), loc="lower right")
    plt.show()
Пример #8
0
def load_data(feature_dict_path, df, test_fold, n_folds):
    def _comb_features(base_f, other_f):
        return np.concatenate([
            base_f,
            other_f,
            np.square(base_f - other_f),
            [spearmanr(base_f, other_f)[0]],
            # [np.square(base_f - other_f).sum()],
            # [pearsonr(base_f, other_f)[0]],
        ])

    def _get_features(_df, feature_dict):
        features = []
        y = []
        for _, row in _df.iterrows():
            datasetId, baseSf, baseAdduct, otherSf, otherAdduct, rank = row
            base_ion = '.'.join((baseSf, baseAdduct.replace('+', 'p').replace('-', 'm')))
            other_ion = '.'.join((otherSf, otherAdduct.replace('+', 'p').replace('-', 'm')))
            base_img = '.'.join((datasetId, base_ion))
            other_img = '.'.join((datasetId, other_ion))

            base_features = feature_dict[next(key for key in feature_dict.keys() if base_img in key)]
            other_features = feature_dict[next(key for key in feature_dict.keys() if other_img in key)]
            features.append(_comb_features(base_features, other_features))
            y.append(rank / 10.)
        return np.array(features), np.array(y)

    train_df, test_df = train_test_split(df, test_fold=test_fold, n_folds=n_folds)
    with open(feature_dict_path, 'rb') as f:
        feature_dict = pickle.load(f)

    return _get_features(train_df, feature_dict), _get_features(test_df, feature_dict), test_df.index
Пример #9
0
def run_random_forest(data, target_column):
    st.sidebar.title('Choose parameters for Random Forest')
    ts = st.sidebar.slider('Training size', min_value=0.0, max_value=1.0, step=0.01, value=0.7)
    n_estimators = st.sidebar.number_input('n_estimators', min_value=1, max_value=1000, step=1)
    n_features = st.sidebar.number_input('n_features', min_value=1, max_value=len(data.columns)-1, step=1, value=len(data.columns)-1)
    bootstrap_size = st.sidebar.number_input('bootstrap_size', min_value=1, max_value=int(len(data)*ts), step=1, value=int(len(data)*ts))
    if st.sidebar.checkbox('Specify Depth'):
        max_depth = st.sidebar.number_input('max_depth', min_value=1, max_value=int(len(data)*ts), step=1)
    else:
        max_depth = None
    run_status = st.sidebar.button('Run Algorithm')
    if run_status:
        with st.spinner('Running...'):
            x_train, x_test, y_train, y_test = train_test_split(data.drop([target_column], axis=1),
                                                                data[target_column],
                                                                test_size=1 - ts)
            clf = RandomForest(n_estimators=n_estimators,
                               n_features=n_features,
                               max_depth=max_depth,
                               bootstrap_size=bootstrap_size)
            clf.fit(x_train, y_train)
            """
            ## :dart: Accuracy
            """
            st.subheader(accuracy_score(y_test, clf.predict(x_test)))
def main():
    print '-- Grandient Boosting Regression --'
    
    data = pd.read_csv('TempLinkoping2016.txt', sep='\t')
    
    time = np.atleast_2d(data['time'].as_matrix()).T
    temp = np.atleast_2d(data['temp'].as_matrix()).T
    
    X = time.reshape((-1,1))
    X = np.insert(X, 0, values=1, axis=1)
    y = temp[:, 0]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
    
    model = GBDTRegressor()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    cmap = plt.get_cmap('viridis')
    
    mse = mean_squared_error(y_test, y_pred)
    
    print 'Mean Squared Error:',mse
    
    # Plot the results
    m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10)
    m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10)
    plt.suptitle("Regression Tree")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right')
    plt.show()
Пример #11
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)
    print("X_train.shape:", X_train.shape)
    print("Y_train.shape:", y_train.shape)

    clf = RandomForest(n_estimators=100)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Random Forest",
                      accuracy=accuracy,
                      legend_labels=data.target_names)
Пример #12
0
def main():
    @run_time
    def batch():
        print("Tesing the accuracy of LinearRegression(batch)...")
        # Train model
        reg = LinearRegression()
        reg.fit(X=X_train, y=y_train, lr=0.02, epochs=5000)
        # Model accuracy
        get_r2(reg, X_test, y_test)

    @run_time
    def stochastic():
        print("Tesing the accuracy of LinearRegression(stochastic)...")
        # Train model
        reg = LinearRegression()
        reg.fit(X=X_train, y=y_train, lr=0.001, epochs=1000,
                method="stochastic", sample_rate=0.5)
        # Model accuracy
        get_r2(reg, X_test, y_test)

    # Load data
    X, y = load_boston_house_prices()
    X = min_max_scale(X)
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
    batch()
    stochastic()
Пример #13
0
def main():
    df = pd.read_csv('fishiris.csv')
    df['target'] = df.apply(create_target, axis=1)
    y = df['target'].to_numpy()
    df = df.drop(['Name', 'target'], axis=1)
    feature_names = df.columns.tolist()
    X = df.to_numpy()
    target_names = ['setosa', 'versicolor', 'virginica']

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        shuffle=True)
    print('X_train\n', X_train)
    print('y_train\n', y_train)
    print('X_test\n', X_test)
    print('y_test\n', y_test)
    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print('-' * 40, 'print_tree', '-' * 40)
    clf.print_tree(feature_names=feature_names)
    print('-' * 40, 'print_tree', '-' * 40)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Decision Tree",
                      accuracy=accuracy,
                      legend_labels=target_names)
    Plot().plot_in_3d(X_test, y_pred)
Пример #14
0
def model(labels, data, parent_id, go_id):

    # Training
    batch_size = 64
    nb_epoch = 64

    train, test = train_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    if len(train_data) < 100:
        raise Exception("No training data for " + go_id)

    test_label, test_data = test
    test_label_rep = test_label

    model = Sequential()
    model.add(Convolution1D(input_dim=20,
                            input_length=MAXLEN,
                            nb_filter=320,
                            filter_length=20,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=10, stride=10))
    model.add(Dropout(0.25))
    model.add(Convolution1D(nb_filter=32,
                            filter_length=32,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=8))
    model.add(LSTM(128))
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(
        loss='binary_crossentropy', optimizer='rmsprop', class_mode='binary')

    model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5'
    checkpointer = ModelCheckpoint(
        filepath=model_path, verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=7, verbose=1)

    model.fit(
        X=train_data, y=train_label,
        batch_size=batch_size, nb_epoch=nb_epoch,
        show_accuracy=True, verbose=1,
        validation_split=0.2,
        callbacks=[checkpointer, earlystopper])

    # Loading saved weights
    print 'Loading weights'
    model.load_weights(model_path)
    pred_data = model.predict_classes(
        test_data, batch_size=batch_size)
    return classification_report(list(test_label_rep), pred_data)
Пример #15
0
def validation_curve():

    # Test decision tree using cross validation

    # Preprocess data
    data = pd.read_csv('./arrhythmia.data', header = None, na_values = '?')
    data = fill_na(data = data)

    features = data.columns.tolist()[:-1]
    target = data.columns.tolist()[-1]

    feature_types = implicit_feature_type_inferrence(data = data[features], num_unique_values = 3)

    train_set, test_set = train_test_split(data = data, train_fraction = 0.8, reindex = False, random_seed = 0)

    max_depth_cv = list()
    training_error_cv = list()
    test_error_cv = list()

    # Start cross-validation
    for i in range(2,21,2):
        tree_max_depth = i
        print("Tree Max Depth: %d" %tree_max_depth)
        max_depth_cv.append(tree_max_depth)
        tree = DecisionTree(tree_max_depth)

        training_error, test_error = cross_validation(data = data, features = features, target = target, feature_types = feature_types, model = tree, fold = 3, random_seed = 0)
        training_error_cv.append(training_error)
        test_error_cv.append(test_error)
        print("Training Error: %f" %training_error)
        print("Test Error: %f" %test_error)

    plot_curve(max_depth = max_depth_cv, training_error = training_error_cv, test_error = test_error_cv)
Пример #16
0
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred = np.reshape(y_pred, y_test.shape)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test,
                      y_pred,
                      title="Logistic Regression",
                      accuracy=accuracy,
                      legend_labels=data.target_names)
def main():

    args = argument_parser()
    try:
        ratings, movies_data, status = loading_data(args.data_path)
        if status == False:
            return "Path doesn't exist"

        user_rating = ratings.pivot(index="userId",
                                    columns="movieId",
                                    values="rating")
        user_rating = user_rating.fillna(0)
        user_rating = user_rating.values

        train = np.zeros(user_rating.shape)
        test = np.zeros(user_rating.shape)

        show_rating(ratings)

        analyis(ratings, movies_data)
        train_test_split(user_rating, train, test)

        Itrain = indicator_matrix(train)
        Itest = indicator_matrix(test)

        print("#" * 100)
        print("\n\nNon Negative Matrix Factorization  : \n")
        worker(user_rating, train, test, Itrain, Itest, movies_data, 10000,
               "GD")

        print("#" * 100)
        print("\n\nNon Negative Matrix Factorization With Regularization : \n")
        worker(user_rating, train, test, Itrain, Itest, movies_data, 5000,
               "R_GD")

        print("#" * 100)
        print("\n\n!!!!!!!!!!!!! Different type of Optimizer !!!!!!!!!!!!")
        print("\n\nSliding Window protocol for optimizer : ")
        optimizer_function(user_rating, train, test, Itrain, Itest,
                           movies_data)

        return "Successfully build"

    except Exception as e:
        print("Caught an Exception : ", e)
        print("Build Failed !!!!!!!!!!!!!!")
def main():
    x = y = {"start": -5, "end": 5, "steps": 0.5}
    data = generate_gauss_data(x, y)
    inputs, targets = data["inputs"], data['targets']
    x_train, x_val, y_train, y_val = train_test_split(inputs, targets, 0.20)

    #################### NETWORK SIZE ANALYSIS #####################
    # losses, batch_losses = [], []
    # for layer_size in range(1, 25):
    #     network = nn.NueralNet(x_train, y_train, hidden_layer_size = layer_size, output_layer_size = 1,
    #                            is_binary = False)
    #     nnTrainResults = network.train_network(epochs = 400)
    #
    #     results = network.fowardPass(inputs, targets, include_bias = True)
    #     losses.append(results['loss'])
    #
    #     batch_out = np.reshape(results["Yp"], (data['size'], data['size']))
    #     # plot_gaussian(data, batch_out, f"Gaussian Out - hidden_layer_size {layer_size}",
    #     #               gif = {"epoch": 1000, "seq": 0})
    #     batch_losses.append(nnTrainResults['batch_losses'])
    #
    # for i in [2, 4, 5, 7, 10, 15, 18, 23]:
    #     # Plot results.
    #     plt.plot(batch_losses[i], label = f" N. Hidden Layer {i}")
    #     plt.xlabel("Epochs")
    #     plt.ylabel("Mean Squared Error loss")
    #     plt.legend(loc = 'best')
    # plt.show()

    #################### SPLIT ANALYSIS #########################
    split_ratios = [0.8]
    hidden_layer_shape = 15

    for split in split_ratios:
        x_train, x_val, y_train, y_val = train_test_split(inputs, targets, split)
        network = nn.NueralNet(x_train, y_train, hidden_layer_size = hidden_layer_shape, output_layer_size = 1,
                               is_binary = False)
        losses = network.train_network(1000, inputs, targets)

        plt.plot(losses["val_losses"], label = "Validation loss")
        plt.plot(losses["epoch_losses"], label = "Train loss")
        plt.xlabel("Epochs")
        plt.ylabel("Mean Squared Error loss")
        plt.legend()
        plt.title(f"Data Split - Training: {round((1 - split) * 100)}%")
        plt.show()
    def nested_crossvalidation(self):
        if self.logging:
            print('Nested crossvalidation started.')
            print('Number of train_valid-test splits: ' + str(self.args['num_of_test_splits']))
            print('Number of train-valid splits:' + str(self.args['num_of_valid_splits']))
        
        test_split_groups = np.array(list(self.df['CLUSTER']))
        train_valid_test = train_test_split(test_split_groups, num_splits = self.args['num_of_test_splits'])
        outer = torch.zeros(self.args['num_of_test_splits'])
        for i, (train_valid_data, test_data) in enumerate(train_valid_test):
            # print('Fold ' + str(i+1))
            # print('Train dataset: ' + str(len(train_valid_data)))
            # print('Test dataset: ' + str(len(test_data)))
            # train model
            
            df_train_valid = self.df.iloc[sorted(train_valid_data)]
            #print(df_train_valid.index.tolist())
            df_train_valid = df_train_valid.set_index(pd.Index(list(range(len(df_train_valid)))))
            #print(df_train_valid.index.tolist())
            #break
            valid_split_groups = np.array(list(df_train_valid['CLUSTER']))
            train_valid = train_test_split(valid_split_groups, num_splits = self.args['num_of_valid_splits'])
            inner = torch.zeros(len(self.hyperparameters), self.args['num_of_valid_splits'])
            for j, (train_data, valid_data) in enumerate(train_valid):
                # print(j)
                # print(len(train_data))
                # print(len(valid_data))
                
                df_train = df_train_valid.iloc[sorted(train_data)]
                df_train = df_train.set_index(pd.Index(list(range(len(df_train)))))
                train_loader = self.prepare_data_loader(df_train, self.args['train_batch_size'], str(i+1) + '_' + str(j+1)+ '_' +'train')
                df_valid = df_train_valid.iloc[sorted(valid_data)]
                df_valid = df_valid.set_index(pd.Index(list(range(len(df_valid)))))
                valid_loader = self.prepare_data_loader(df_valid, self.args['valid_batch_size'], str(i+1) + '_' + str(j+1)+ '_' +'valid')
                
                for k, hp in enumerate(self.hyperparameters):
                    
                    inner[k][j] = self.train(train_loader, hp, save_model = False, valid_loader=valid_loader)

            hp_best = self.hyperparameters[self.tune_hparam(inner)]
            df_test = self.df.iloc[sorted(test_data)]
            trainvalid_loader = self.prepare_data_loader(train_valid, self.args['train_batch_size'], str(i+1) + '_' +'trainvalid')
            test_loader = self.prepare_data_loader(df_test, self.args['test_batch_size'], str(i+1) + '_' +'test')
            outer[i] = self.train(trainvalid_loader, hp_best, save_model=True, valid_loader=test_loader)
            
        return {'mean_objective_loss': torch.mean(outer), 'std_objective_loss': torch.std(outer)}
Пример #20
0
def test_split():
    n = int(len(x)*0.8)
    with mock.patch("numpy.random.choice", return_value=np.arange(n)):
        X_train, X_test, z_train, z_test = train_test_split(X, z, split_size=0.2, random_state=1)
        print(np.shape(X))
        print("--------------")
        print(np.shape(X_train.tolist()+X_test.tolist()))
        assert (np.allclose(X_train.tolist()+X_test.tolist(), X) and np.allclose(z_train.tolist()+ z_test.tolist(), z  ))
Пример #21
0
def main(args):
    torch.multiprocessing.set_start_method('spawn')
    torch.distributed.init_process_group(backend="nccl")

    with open(args.config_path, 'r') as file:
        config = AttrDict(json.load(file))

    set_seed(config.seed + torch.distributed.get_rank())

    train_data_csv, test_data_csv = train_test_split(
        config.train_data_csv_path, config.n_test_experiments)

    train_image_ids, train_labels = get_data(train_data_csv, is_train=True)
    train_transform = TrainTransform(config.crop_size)
    train_dataset = CellsDataset(config.train_images_dir, train_image_ids,
                                 train_labels, train_transform)

    test_image_ids, test_labels = get_data(test_data_csv, is_train=True)
    test_dataset = CellsDataset(config.train_images_dir, test_image_ids,
                                test_labels)

    if torch.distributed.get_rank() == 0:
        print(
            f'Train size: {len(train_dataset)}, test_size: {len(test_dataset)}'
        )

    encoder = Encoder(config.n_image_channels, config.n_emedding_channels,
                      config.n_classes, config.encoder_model,
                      config.encoder_pretrained, config.encoder_dropout,
                      config.encoder_scale)

    if config.restore_checkpoint_path is not None:
        state_dict = torch.load(config.restore_checkpoint_path,
                                map_location='cpu')
        encoder.load_state_dict(state_dict, strict=False)

    decoder = Decoder(config.n_emedding_channels, config.n_image_channels,
                      config.n_classes, config.decoder_n_channels)

    trainer = Trainer(encoder=encoder,
                      decoder=decoder,
                      optimizer_params={
                          'lr': config.lr,
                          'weight_decay': config.weight_decay,
                          'warmap': config.warmap,
                          'amsgrad': config.amsgrad
                      },
                      amp_params={
                          'opt_level': config.opt_level,
                          'loss_scale': config.loss_scale
                      },
                      rank=args.local_rank,
                      n_jobs=config.n_jobs)
    trainer.train(train_data=train_dataset,
                  n_epochs=config.n_epochs,
                  batch_size=config.batch_size,
                  test_data=test_dataset,
                  best_checkpoint_path=config.best_checkpoint_path)
Пример #22
0
def main(df_path, dates, params_path, suffix):

    df = pd.read_csv(df_path)
    X_train, X_test, y_train, y_test, _, ids_test = train_test_split(df, dates)
    b_params = best_params(params_path)
    preds = run_model(X_train, y_train, X_test, y_test, b_params)
    save_preds(X_test, y_test, preds, ids_test, suffix)

    print('predictions_saved')
Пример #23
0
def main():

    # Load temperature data
    data = pd.read_csv(
        'https://raw.githubusercontent.com/eriklindernoren/ML-From-Scratch/master/mlfromscratch/data/TempLinkoping2016.txt',
        sep="\t")

    time = np.atleast_2d(data["time"].values).T
    temp = data["temp"].values

    X = time  # fraction of the year [0, 1]
    y = temp

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    poly_degree = 13

    model = LassoRegression(degree=15,
                            reg_factor=0.05,
                            learning_rate=0.001,
                            n_iterations=4000)
    model.fit(X_train, y_train)

    # Training error plot
    n = len(model.training_errors)
    training, = plt.plot(range(n),
                         model.training_errors,
                         label="Training Error")
    plt.legend(handles=[training])
    plt.title("Error Plot")
    plt.ylabel('Mean Squared Error')
    plt.xlabel('Iterations')
    plt.show()

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05))

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    plt.plot(366 * X,
             y_pred_line,
             color='black',
             linewidth=2,
             label="Prediction")
    plt.suptitle("Lasso Regression")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
    plt.show()
Пример #24
0
 def load_data(self, filename):
     df = pd.read_csv(filename, header=None)
     dfx = df.iloc[:, :-1]
     dfx = (dfx - dfx.mean()) / (dfx.max() - dfx.min())
     X = dfx.values
     y = df.iloc[:, -1].values
     self.d = X.shape[1]
     self.out = self.d
     self.X_train, self.y_train, self.X_test, self.y_test = train_test_split(X, y)
     self.y_test = self.y_test.reshape(-1, 1)
Пример #25
0
def _training(Model):
    features, targets = engine.get_features(Model, train=True)
    X_train, X_test, y_train, y_test = utils.train_test_split(features,
                                                              targets,
                                                              test_size=0.3)
    classifier = engine.train_fn(X_train, y_train)
    utils.save_model(classifier, config.MODEL_PATH)
    predictions = engine.eval_fn(classifier, X_test)
    accuracy = utils.accuracy_score(predictions, y_test)
    print("Accuracy Score:", accuracy)
Пример #26
0
def test_unit():
    print('\n===================================================================')
    print('Unit test: Sparse Representation-based Classification (SRC)')
    dataset = 'myYaleB'
    N_train = 15
    dataset, Y_train, Y_test, label_train, label_test = \
           utils.train_test_split(dataset, N_train)
    clf = SRC(lamb = 0.01)
    clf.fit(Y_train, label_train)
    clf.evaluate(Y_test, label_test)
Пример #27
0
def get_train_valid_dataset(data_dir):
	training_filenames, trainY = utils.load_train_filename_and_labels(data_dir)
	training_filenames, valid_filenames, trainY, validY = utils.train_test_split(training_filenames, trainY, split_ratio=0.1)
	
	trsfms = transforms.Compose([
				transforms.RandomCrop(256, pad_if_needed=True, padding_mode='symmetric'),
				transforms.RandomHorizontalFlip(),
				transforms.RandomRotation(15),
				transforms.ToTensor(),
				])
	return MyDataset(os.path.join(data_dir, 'training'), transforms=trsfms), MyDataset(os.path.join(data_dir, 'validation'), transforms=trsfms)
Пример #28
0
def main():
    print("Tesing the accuracy of NaiveBayes...")
    # Load data
    X, y = load_breast_cancer()
    # Split data randomly, train set rate 70%
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
    # Train model
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    # Model accuracy
    get_acc(clf, X_test, y_test)
Пример #29
0
def main():
    parser = argparse.ArgumentParser("Preprocess the data")
    parser.add_argument('task', nargs='?', type=str)
    parser.add_argument('--path', '-p', dest='path', action='store', type=str)
    parser.add_argument('--patch-size',
                        dest='patch_size',
                        action='store',
                        type=int,
                        default=[],
                        nargs='+')
    parser.add_argument('--canny-sigma',
                        dest='canny_sigma',
                        action='store',
                        type=float)
    parser.add_argument('--threshold', type=int)
    parser.add_argument('--color', dest='color', action='store_true')
    parser.add_argument('--binarized', dest='color', action='store_false')
    parser.add_argument('--val-size', dest='val_size', type=float)
    parser.set_defaults(color=True)
    parser.add_argument('--patch-stride',
                        dest='patch_stride',
                        type=int,
                        default=256)
    parser.add_argument('--padding', type=int, default=0)

    args = parser.parse_args()

    if args.task == 'patchify':
        # split images into patches
        utils.dataset_to_patches(args.path,
                                 args.patch_size,
                                 stride=args.patch_stride,
                                 canny_sigma=args.canny_sigma,
                                 threshold=args.threshold,
                                 color=args.color,
                                 padding=args.padding)
    if args.task == 'split-writer-dirs':
        utils.prepare_files_of_trainingset(args.path)

    if args.task == 'train-val-split':
        utils.train_test_split(args.path, args.val_size)
Пример #30
0
def test_unit():
    print(
        '\n==================================================================='
    )
    print('Mini Unit test: COPAR')
    dataset = 'myYaleB'
    N_train = 15
    dataset, Y_train, Y_test, label_train, label_test = \
           utils.train_test_split(dataset, N_train)
    clf = COPAR(k=10, k0=5, lambd=0.001, eta=0.01)
    clf.fit(Y_train, label_train, iterations=100, verbose=True)
    clf.evaluate(Y_test, label_test)
Пример #31
0
def train_model():
    Model = model.BertForFakeNewsDetection()
    features, labels = Model.get_features(train=True)
    X_train, X_test, y_train, y_test = utils.train_test_split(features,
                                                              labels,
                                                              test_size=0.3)
    clf = svm.SVC()
    clf.fit(features, labels)
    print("Validation Accuracy:",
          round(clf.score(X_test, y_test), 4) * 100, "%")
    clf.fit(X_test, y_test)
    utils.save_model(clf, config.MODEL_PATH)
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)
    print("X_train.shape:", X_train.shape)
    print("Y_train.shape:", y_train.shape)

    clf = RandomForest(n_estimators=100)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    print("X_train",X_train.shape)
    clf = NaiveBayes()
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred = np.reshape(y_pred, y_test.shape)

    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main():

    print ("-- Regression Tree --")

    # Load temperature data
    data = pd.read_csv('../TempLinkoping2016.txt', sep="\t")

    time = np.atleast_2d(data["time"].as_matrix()).T
    temp = np.atleast_2d(data["temp"].as_matrix()).T

    X = standardize(time)        # Time. Fraction of the year [0, 1]
    y = temp[:, 0]  # Temperature. Reduce to one-dim

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    model = RegressionTree()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    y_pred_line = model.predict(X)

    # Color map
    cmap = plt.get_cmap('viridis')

    mse = mean_squared_error(y_test, y_pred)

    print ("Mean Squared Error:", mse)

    # Plot the results
    # Plot the results
    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
    m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10)
    plt.suptitle("Regression Tree")
    plt.title("MSE: %.2f" % mse, fontsize=10)
    plt.xlabel('Day')
    plt.ylabel('Temperature in Celcius')
    plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right')
    plt.show()
def main():

    print ("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    Plot().plot_in_2d(X_test, y_pred,
        title="Decision Tree",
        accuracy=accuracy,
        legend_labels=data.target_names)
def model(labels, data):
    # set parameters:
  
    # Training
    batch_size = 100
    nb_epoch = 100

    train, test = train_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    test_label, test_data = test
    test_label_rep = test_label
    shap=numpy.shape(train_data)

    print('X_train shape: ',shap)
    print('X_test shape: ',test_data.shape)
    model = Sequential()
    model.add(Dense(shap[1], activation='relu', input_dim=shap[1]))
    model.add(Highway())
    model.add(Dense(1,activation='sigmoid'))
    print 'compiling model'
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary")
    print 'running at most 60 epochs'
    checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    model.fit(train_data, train_label, batch_size=batch_size,nb_epoch=nb_epoch,shuffle=True, show_accuracy=True, 
               validation_split=0.3,callbacks=[checkpointer,earlystopper])

    # # Loading saved weights
    print 'Loading weights'
    model.load_weights('bestmodel.hdf5')
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    tresults = model.evaluate(test_data, test_label,show_accuracy=True)
    print tresults
    return classification_report(list(test_label_rep), pred_data)
def main():

    print ("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
    print(y_train)

    clf = GBDTClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)


    Plot().plot_in_2d(X_test, y_pred,
        title="Gradient Boosting",
        accuracy=accuracy,
        legend_labels=data.target_names)
Пример #39
0
def model(labels, data, go_id):
    # set parameters:
    batch_size = 64
    nb_epoch = 10
    lstm_size = 128

    data1, data2 = data

    train1, test1 = train_test_split(
        labels, data1, batch_size=batch_size)
    train2, test2 = train_test_split(
        labels, data2, batch_size=batch_size)
    train_label, train1_data = train1
    train_label, train2_data = train2

    test_label, test1_data = test1
    test_label, test2_data = test2

    test_label_rep = test_label
    # 256 0.5 256
    model = Graph()
    model.add_input(name='input1', batch_input_shape=(batch_size, 20))
    model.add_input(name='input2', batch_input_shape=(batch_size, 3))
    model.add_node(Convolution1D(
        nb_filter=32,
        filter_length=20,
        border_mode='valid',
        activation='relu',
        subsample_length=1), name='conv1', input='input1')
    model.add_node(MaxPooling1D(
        pool_length=10, stride=10), name='pool1', input='conv1')
    model.add_node(
        LSTM(lstm_size), name='lstm1', input='pool1')
    model.add_node(Convolution1D(
        nb_filter=32,
        filter_length=3,
        border_mode='valid',
        activation='relu',
        subsample_length=1), name='conv2', input='input2')
    model.add_node(MaxPooling1D(
        pool_length=2), name='pool2', input='conv2')
    model.add_node(
        LSTM(lstm_size), name='lstm2', input='pool2')
    model.add_node(
        Dense(1024),
        name='dense1', inputs=['lstm1', 'lstm2'])
    model.add_node(Dropout(0.25), name='dropout', input='dense1')
    model.add_node(Activation('relu'), name='relu', input='dropout')
    model.add_node(
        Dense(1, activation='sigmoid'), name='dense2', input='relu')
    model.add_output(name='output', input='dense2')

    # try using different optimizers and different optimizer configs
    model.compile('adadelta', {'output': 'binary_crossentropy'})
    model_path = DATA_ROOT + go_id + '.hdf5'
    checkpointer = ModelCheckpoint(
        filepath=model_path, verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
    model.fit(
        {'input1': train1_data, 'input2': train2_data, 'output': train_label},
        batch_size=batch_size,
        nb_epoch=nb_epoch,
        validation_split=0.2,
        callbacks=[checkpointer, earlystopper])

    print 'Loading weights'
    model.load_weights(model_path)

    pred_data = model.predict(
        {'input1': test1_data, 'input2': test2_data}, batch_size=batch_size)
    pred_data = numpy.round(numpy.array(pred_data['output']))
    # Loading saved weights
    # Saving the model
    # print 'Saving the model for ' + go_id
    # model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
Пример #40
0
reload(utils)
reload(algo_param)
reload(param)

# TODO Add unlabeled subset functionality
# TODO Add parallelization

#####################   PERFORM GRID SEARCH    ########################
if param.optimize_params:

    # parse data
    all_X, all_Y = utils.parse(param.data_file, param.feature_file,
                               param.response_var, debug_limit=param.debug_limit)
    X, Y = utils.labeled_subset(all_X, all_Y)
    X, Y = utils.subsample((X, Y), param.labeled_subsample)
    (X_train, X_test, Y_train, Y_test) = utils.train_test_split(X, Y, test_size=param.test_size)

    # pickle data for use in other files
    saved_data = (X_train, X_test, Y_train, Y_test)
    utils.pickler(saved_data, param.optimization_data_pickle)

    # make meta pipeline for grid searching
    pipeline, parameter_space = make_meta_pipeline([
        ('imputer', param.imputer_params),
        ('scaler', param.scaler_params),
        ('dim_reducer', param.dim_reducer_params),
        ('regressor', param.regressor_params)
    ], all_X, all_Y)

    print("Opening logfiles")
    sys.stdout.flush()
Пример #41
0
def model(labels, data, parent_id, go_id):
    # Convolution
    filter_length = 20
    nb_filter = 32
    pool_length = 10
    global nb_classes

    # LSTM
    lstm_output_size = 128

    # Training
    batch_size = 64
    nb_epoch = 64

    train, test = train_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train
    # sample_weight = [1.0 if y == 1 else 1.0 for y in train_label]
    # sample_wseight = numpy.array(sample_weight, dtype='float32')

    test_label, test_data = test
    test_label_rep = test_label

    model = Sequential()
    model.add(Convolution1D(input_dim=20,
                            input_length=MAXLEN,
                            nb_filter=nb_filter,
                            filter_length=filter_length,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=pool_length, stride=10))
    model.add(Dropout(0.25))
    model.add(Convolution1D(nb_filter=nb_filter,
                            filter_length=filter_length,
                            border_mode='valid',
                            activation='relu',
                            subsample_length=1))
    model.add(MaxPooling1D(pool_length=pool_length, stride=10))
    model.add(LSTM(lstm_output_size))
    # model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer='rmsprop')

    model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5'
    # parent_model_path = DATA_ROOT + 'data/' + parent_id + '.hdf5'
    # if os.path.exists(parent_model_path):
    #     print 'Loading parent model weights'
    #     model.load_weights(parent_model_path)
    checkpointer = ModelCheckpoint(
        filepath=model_path, verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)

    model.fit(
        X=train_data, y=train_label,
        batch_size=batch_size, nb_epoch=nb_epoch,
        show_accuracy=True, verbose=1,
        validation_split=0.2,
        callbacks=[checkpointer, earlystopper])

    # Loading saved weights
    print 'Loading weights'
    model.load_weights(DATA_ROOT + parent_id + '/' + go_id + '.hdf5')
    score = model.evaluate(
        test_data, test_label, show_accuracy=True, verbose=1)
    print 'Score: ', score[0]
    print 'Accuracy: ', score[1]
Пример #42
0
def model(labels, data, parent_id, go_id):
    # set parameters:
    # Convolution
    nb_filter = 64
    nb_row = 5
    nb_col = 1

    pool_length = 3

    # Training
    batch_size = 64
    nb_epoch = 24

    lstm_size = 70

    data1, data2 = data

    train1, test1 = train_test_split(
        labels, data1, batch_size=batch_size, split=0.8)
    train_label, train1_data = train1

    train2, test2 = train_test_split(
        labels, data2, batch_size=batch_size, split=0.8)
    train_label, train2_data = train2

    if len(train1_data) < 100:
        raise Exception("No training data for " + go_id)

    test_label, test1_data = test1
    test_label, test2_data = test2
    test_label_rep = test_label

    first = Sequential()
    first.add(Convolution2D(
        nb_filter, nb_row, nb_col,
        border_mode='valid',
        input_shape=(1, MAXLEN, 20)))
    first.add(Activation('relu'))
    first.add(Convolution2D(2 * nb_filter, nb_row, nb_col))
    first.add(Activation('relu'))
    # first.add(Convolution2D(nb_filter, nb_row, nb_col))
    # first.add(Activation('relu'))
    first.add(MaxPooling2D(pool_size=(pool_length, 1)))
    first.add(Dropout(0.5))
    first.add(Flatten())

    second = Sequential()
    second.add(
        LSTM(lstm_size, return_sequences=True, input_shape=(MAXLEN, 20)))
    second.add(Dropout(0.25))
    # second.add(LSTM(lstm_size, return_sequences=True))
    # second.add(Dropout(0.25))
    second.add(LSTM(lstm_size, return_sequences=False))
    second.add(Dropout(0.25))
    second.add(Flatten())

    model = Sequential()
    model.add(Merge([first, second], mode='concat'))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    adam = Adam(lr=0.00001)
    model.compile(
        loss='binary_crossentropy', optimizer=adam, class_mode='binary')

    model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5'
    checkpointer = ModelCheckpoint(
        filepath=model_path, verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    model.fit(
        X=[train1_data, train2_data], y=train_label,
        batch_size=batch_size, nb_epoch=nb_epoch,
        show_accuracy=True, verbose=1,
        validation_split=0.3,
        callbacks=[checkpointer, earlystopper])

    model.load_weights(model_path)
    pred_data = model.predict_classes(
        [test1_data, test2_data],
        batch_size=batch_size)
    return classification_report(list(test_label_rep), pred_data)
Пример #43
0
def model(labels, data, go_id):
    # set parameters:

    # Convolution
    filter_length = 7
    nb_filter = 64
    pool_length = 2
    k=7
    # LSTM
    lstm_output_size = 70

    # Training
    batch_size = 32
    nb_epoch = 12

    train, test = train_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    test_label, test_data = test
    test_label_rep = test_label

    model = Sequential()
    model.add(Convolution1D(
        input_dim=20,
        input_length=500,
        nb_filter=320,
        filter_length=20,
        border_mode="valid",
        activation="relu",
        subsample_length=1))
    model.add(MaxPooling1D(pool_length=10, stride=10))
    model.add(Dropout(0.2))
    model.add(Convolution1D(
        nb_filter=320,
        filter_length=20,
        border_mode="valid",
        activation="relu",
        subsample_length=1))
    model.add(MaxPooling1D(pool_length=10, stride=10))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Highway())
    model.add(Dropout(0.5))
    model.add(Dense(output_dim=1000))
    model.add(Activation('relu'))
    model.add(Dense(output_dim=1))
    model.add(Activation('sigmoid'))
    print 'compiling model'
    model.compile(
        loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary")
    print 'running at most 60 epochs'
    model_path = DATA_ROOT + go_id + '.hdf5'
    checkpointer = ModelCheckpoint(
        filepath=model_path, verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    model.fit(
        train_data, train_label, batch_size=batch_size,
        nb_epoch=60, shuffle=True, show_accuracy=True,
        validation_split=0.3,
        callbacks=[checkpointer, earlystopper])

    # # Loading saved weights
    print 'Loading weights'
    model.load_weights(model_path)
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    # tresults = model.evaluate(test_data, test_label,show_accuracy=True)
    # print tresults
    return classification_report(list(test_label_rep), pred_data)
def model(labels, data, go_id):
    # set parameters:
   
    # Convolution
    filter_length = 7
    nb_filter = 64
    pool_length = 2
    k=7
    # LSTM
    lstm_output_size = 70

    # Training
    batch_size = 30
    nb_epoch = 12

    train, test = train_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    test_label, test_data = test
    test_label_rep = test_label


    nb_filters = 100
    filter_lenghts = [7,10,12]
  
    first = Sequential()
    first.add(Convolution1D(input_dim=20,
                        input_length=500,
                        nb_filter=nb_filters,
                        filter_length=7,
                        border_mode="valid",
                        activation="relu",
                        subsample_length=1))
    first.add(MaxPooling1D(pool_length=3, stride=3))
    first.add(LSTM(input_dim=100, output_dim=100))
    second = Sequential()
    second.add(Convolution1D(input_dim=20,
                        input_length=500,
                        nb_filter=nb_filters,
                        filter_length=10,
                        border_mode="valid",
                        activation="relu",
                        subsample_length=1))
    second.add(Activation('relu'))
    second.add(MaxPooling1D(pool_length=5, stride=5))
    second.add(LSTM(input_dim=100, output_dim=100))


    third = Sequential()
    third.add(Convolution1D(input_dim=20,
                        input_length=500,
                        nb_filter=nb_filters,
                        filter_length=12,
                        border_mode="valid",
                        activation="relu",
                        subsample_length=1))
    third.add(Activation('relu'))
    third.add(MaxPooling1D(pool_length=6, stride=6))
    third.add(LSTM(input_dim=100, output_dim=100))


    model = Sequential()
    model.add(Merge([first, second, third], mode='concat'))
    model.add(Dense(1000))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary")


    checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    model.fit(X=[train_data, train_data, train_data], y=train_label, batch_size=100, nb_epoch=60, shuffle=True, show_accuracy=True, 
        validation_split=0.3, callbacks=[checkpointer,earlystopper])




#now concat  all the results and  do whatever you want with the new sentence_embedding :D
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    model.load_weights('bestmodel.hdf5')
    pred_data = model.predict_classes(
        [test_data, test_data, test_data], batch_size=batch_size)
    # Saving the model
    #tresults = model.evaluate(test_data, test_label,show_accuracy=True)
    #print tresults
    return classification_report(list(test_label_rep), pred_data)
Пример #45
0
print ("MLP")
print ("nb_epoch:{}".format(nb_epoch))
print ("Layer: {}/{}/{}".format(layer1,layer2,layer3))

Loop, Tap = utils.load_data_envelope()
Xloop, Xtap, target = utils.makeTrainingDataRegression(Loop, Tap)

# Xloop, Xtap, target = utils.makeTrainingDataRank(Loop, Tap)
# target = np_utils.to_categorical(target)
X = np.hstack((Xloop, Xtap))
X = np.float64(X)
if normFlag:
    # X = X - np.mean(X, axis=1)[:, np.newaxis]
    X = normalize(X, axis=1, norm='l2')
#X_train, X_test, target_train, target_test = cross_validation.train_test_split(X, target, test_size=test_split, random_state=0)
X_train, X_test, target_train, target_test = utils.train_test_split(X, target, test_split)
mlp = nn.MLP_regression(layer1=layer1, layer2=layer2, layer3=layer3, input_shape=X_train[0].shape)
checkpointer = ModelCheckpoint(filepath="./tmp/mlp_weights_l1{}l2{}l3{}.hdf5".format(layer1,layer2,layer3),\
               verbose=1, save_best_only=True, monitor='val_loss')
mlp.fit(X_train, target_train, batch_size=batch_size, nb_epoch=nb_epoch,\
       show_accuracy=True, verbose=2, shuffle=True, validation_data=(X_test, target_test), callbacks = [checkpointer])
# mlp.evaluate(X_test, target_test, show_accuracy=True, verbose=1)
#mlp.load_weights("./tmp/mlp_weights_l1{}l2{}l3{}.hdf5".format(layer1,layer2,layer3))
propagation = nn.Propagation(mlp)
utils.calculate_MRR(Loop, Tap, method='nn', model=mlp, prop=propagation)

# print("====test data====")
# for i in np.arange(len(target_test)):
#     error = np.abs(y[i][0]-target_test[i])
#     print("{:0.3f}, {:0.2f}, error= {:.2f}, {}").format(y[i][0], target_test[i], error, 'correct' if error < 0.5 else 'wrong')
Пример #46
0
nb_epoch = 200
test_split = 0.2
layer1 = 10
layer2 = None
layer3 = None

print ("GRU")
print ("nb_epoch:{}".format(nb_epoch))
print ("Layer: {}/{}/{}".format(layer1,layer2,layer3))

target = np.load('./target.npy')
Loop = pd.read_pickle('./Loop.pkl')
Tap = pd.read_pickle('./Tap.pkl')  
X = np.load('./X.npy')
X_train, X_test, target_train, target_test =\
   utils.train_test_split(X, target, test_split)
if normFlag:
    Xloop = Xloop - np.mean(Xloop, axis=1)[:, np.newaxis]
    Xtap = Xtap - np.mean(Xtap, axis=1)[:, np.newaxis]
    Xloop = normalize(Xloop, axis=1, norm='l2')
    Xtap = normalize(Xtap, axis=1, norm='l2')
rnn = nn.RNN_regression(\
    layer1=layer1, layer2=layer2,\
    layer3=layer3, input_length=(len(Xloop[0]),))
#X_train, X_test, target_train, target_test =\
#  utils.train_test_split(X, target, test_split)
print("Train...")
checkpointer = ModelCheckpoint(\
    filepath="./tmp/rnn_gru_weights_l1{}l2{}l3{}.hdf5".\
    format(layer1,layer2,layer3),\
    verbose=1, save_best_only=True, monitor='val_loss')
def model(go_id):
    # set parameters:
   
    # Convolution
    filter_length = 20
    nb_filter = 32
    pool_length = 10
    stride = 10

    # LSTM
    lstm_output_size = 96

    # Training
    batch_size = 100
    nb_epoch = 60
    patience = 5

    #Encoding
    maxlen = 500
    dictn = 20
    gram = 2

    labels, data = load_data(go_id,maxlen,dictn,gram)

    train, test = train_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    test_label, test_data = test
    test_label_rep = test_label

    

    shap=numpy.shape(train_data)
    print('X_train shape: ',shap)
    print('X_test shape: ',test_data.shape)
    model = Sequential()
    model.add(Convolution1D(input_dim=shap[2],
                        input_length=shap[1],
                        nb_filter=nb_filter,
                        filter_length=filter_length,
                        border_mode="valid",
                        activation="relu",
                        subsample_length=1))
    model.add(MaxPooling1D(pool_length=pool_length, stride=stride))
    model.add(Dropout(0.75))
    model.add(LSTM(lstm_output_size, return_sequences=True))
    model.add(LSTM(lstm_output_size))
    model.add(Dropout(0.75))
    model.add(Dense(1000))
    model.add(Dense(1,activation='sigmoid'))
    print 'compiling model'
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary")
    print 'running at most 60 epochs'
    checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=patience, verbose=1)

    model.fit(train_data, train_label, batch_size=batch_size, nb_epoch=nb_epoch, shuffle=True, show_accuracy=True, 
               validation_split=0.3,callbacks=[checkpointer,earlystopper])

    # # Loading saved weights
    print 'Loading weights'
    model.load_weights('bestmodel.hdf5')
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    tresults = model.evaluate(test_data, test_label,show_accuracy=True)
    print tresults
    return classification_report(list(test_label_rep), pred_data)