def main(_): config = flags.FLAGS if config.mode == "train": train(config) elif config.mode == "data": train_test_split(config) elif config.mode == "sn": test_spectral_norm() elif config.mode == "trace": test_trace_approximation()
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # convert the nominal y values to binary y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) #mlp clf = MultilayerPerceptron(n_hidden=16, n_iterations=1000, learning_rate=0.01) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): @run_time def batch(): print("Tesing the accuracy of LogisticRegression(batch)...") # Train model clf = LogisticRegression() clf.fit(X=X_train, y=y_train, lr=0.008, epochs=5000) # Model accuracy get_acc(clf, X_test, y_test) @run_time def stochastic(): print("Tesing the accuracy of LogisticRegression(stochastic)...") # Train model clf = LogisticRegression() clf.fit(X=X_train, y=y_train, lr=0.01, epochs=200, method="stochastic", sample_rate=0.5) # Model accuracy get_acc(clf, X_test, y_test) # Load data X, y = load_breast_cancer() X = min_max_scale(X) # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) batch() stochastic()
def __init__(self): ''' Initialize and load dataset ''' # Get all stop words self.stop_words = set(stopwords.words('english')) # Lemmatizer self.lemmatizer = WordNetLemmatizer() # stemmer self.stemmer = PorterStemmer() # Tokenizer self.tokenizer = RegexpTokenizer(r'\w+') # Load data self.load_data() X_train, y_train, X_test, y_test = train_test_split(self.X, self.y) # Train model self.train(X_train, y_train) # Evaluate accuracy, f1measure = self.evaulate(X_test, y_test) print('Accuracy: {:.3f}, F1-score: {:.3f}'.format(accuracy, f1measure))
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target # One-hot encoding of nominal y-values y = to_categorical(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=8) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.001, loss=CrossEntropy, activation_function=Sigmoid) clf.fit(X_train, y_train) y_pred = np.argmax(clf.predict(X_test), axis=1) y_test = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def fit(self, X, z, split_size): """Searches for the optimal hyperparameter combination.""" # model and params are now lists --> sende med navn istedenfor. # Setup self.results = {self.name: []} self.train_scores_mse, self.test_scores_mse = [], [] self.train_scores_r2, self.test_scores_r2 = [], [] # Splitting our original dataset into test and train. X_train, X_test, z_train, z_test = train_test_split( X, z, split_size=split_size, random_state=105) " Returning these dictionaries to plot mse vs model" self.mse_test = [] self.mse_train = [] self.r2_test = [] self.r2_train = [] self.z_pred = [] self.coef_ = [] # For en model tester vi alle parameterne og returnerer denne. for param in self.params: estimator = self.model(lmd=param) # Train a model for this pair of lambda and random state estimator.fit(X_train, z_train) temp = estimator.predict(X_test) temp2 = estimator.predict(X_train) self.mse_test.append(mean_squared_error(z_test, temp)) self.mse_train.append(mean_squared_error(z_train, temp2)) self.r2_test.append(r2_score(z_test, temp)) self.r2_train.append(r2_score(z_train, temp2)) self.z_pred.append(temp) self.coef_.append(estimator.coef_) return self
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LinearRegression(n_iterations=100) model.fit(X_train, y_train) #training error plot n = len(model.training_errors) training,=plt.plot(range(n),model.training_errors, label="training error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel("Mean Squared Error") plt.xlabel("Iterations") plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s"%mse) y_pred_line = model.predict(X) #color map cmap = plt.get_cmap("viridis") #plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9),s = 10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366*X, y_pred_line, color="black", linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f"% mse, fontsize=10) plt.xlabel("Day") plt.ylabel("Temperature in Celcius") plt.legend((m1, m2),("Training data", "Test data"), loc="lower right") plt.show()
def load_data(feature_dict_path, df, test_fold, n_folds): def _comb_features(base_f, other_f): return np.concatenate([ base_f, other_f, np.square(base_f - other_f), [spearmanr(base_f, other_f)[0]], # [np.square(base_f - other_f).sum()], # [pearsonr(base_f, other_f)[0]], ]) def _get_features(_df, feature_dict): features = [] y = [] for _, row in _df.iterrows(): datasetId, baseSf, baseAdduct, otherSf, otherAdduct, rank = row base_ion = '.'.join((baseSf, baseAdduct.replace('+', 'p').replace('-', 'm'))) other_ion = '.'.join((otherSf, otherAdduct.replace('+', 'p').replace('-', 'm'))) base_img = '.'.join((datasetId, base_ion)) other_img = '.'.join((datasetId, other_ion)) base_features = feature_dict[next(key for key in feature_dict.keys() if base_img in key)] other_features = feature_dict[next(key for key in feature_dict.keys() if other_img in key)] features.append(_comb_features(base_features, other_features)) y.append(rank / 10.) return np.array(features), np.array(y) train_df, test_df = train_test_split(df, test_fold=test_fold, n_folds=n_folds) with open(feature_dict_path, 'rb') as f: feature_dict = pickle.load(f) return _get_features(train_df, feature_dict), _get_features(test_df, feature_dict), test_df.index
def run_random_forest(data, target_column): st.sidebar.title('Choose parameters for Random Forest') ts = st.sidebar.slider('Training size', min_value=0.0, max_value=1.0, step=0.01, value=0.7) n_estimators = st.sidebar.number_input('n_estimators', min_value=1, max_value=1000, step=1) n_features = st.sidebar.number_input('n_features', min_value=1, max_value=len(data.columns)-1, step=1, value=len(data.columns)-1) bootstrap_size = st.sidebar.number_input('bootstrap_size', min_value=1, max_value=int(len(data)*ts), step=1, value=int(len(data)*ts)) if st.sidebar.checkbox('Specify Depth'): max_depth = st.sidebar.number_input('max_depth', min_value=1, max_value=int(len(data)*ts), step=1) else: max_depth = None run_status = st.sidebar.button('Run Algorithm') if run_status: with st.spinner('Running...'): x_train, x_test, y_train, y_test = train_test_split(data.drop([target_column], axis=1), data[target_column], test_size=1 - ts) clf = RandomForest(n_estimators=n_estimators, n_features=n_features, max_depth=max_depth, bootstrap_size=bootstrap_size) clf.fit(x_train, y_train) """ ## :dart: Accuracy """ st.subheader(accuracy_score(y_test, clf.predict(x_test)))
def main(): print '-- Grandient Boosting Regression --' data = pd.read_csv('TempLinkoping2016.txt', sep='\t') time = np.atleast_2d(data['time'].as_matrix()).T temp = np.atleast_2d(data['temp'].as_matrix()).T X = time.reshape((-1,1)) X = np.insert(X, 0, values=1, axis=1) y = temp[:, 0] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) model = GBDTRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print 'Mean Squared Error:',mse # Plot the results m1 = plt.scatter(366 * X_train[:, 1], y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test[:, 1], y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test[:, 1], y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) print("X_train.shape:", X_train.shape) print("Y_train.shape:", y_train.shape) clf = RandomForest(n_estimators=100) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Random Forest", accuracy=accuracy, legend_labels=data.target_names)
def main(): @run_time def batch(): print("Tesing the accuracy of LinearRegression(batch)...") # Train model reg = LinearRegression() reg.fit(X=X_train, y=y_train, lr=0.02, epochs=5000) # Model accuracy get_r2(reg, X_test, y_test) @run_time def stochastic(): print("Tesing the accuracy of LinearRegression(stochastic)...") # Train model reg = LinearRegression() reg.fit(X=X_train, y=y_train, lr=0.001, epochs=1000, method="stochastic", sample_rate=0.5) # Model accuracy get_r2(reg, X_test, y_test) # Load data X, y = load_boston_house_prices() X = min_max_scale(X) # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) batch() stochastic()
def main(): df = pd.read_csv('fishiris.csv') df['target'] = df.apply(create_target, axis=1) y = df['target'].to_numpy() df = df.drop(['Name', 'target'], axis=1) feature_names = df.columns.tolist() X = df.to_numpy() target_names = ['setosa', 'versicolor', 'virginica'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, shuffle=True) print('X_train\n', X_train) print('y_train\n', y_train) print('X_test\n', X_test) print('y_test\n', y_test) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print('-' * 40, 'print_tree', '-' * 40) clf.print_tree(feature_names=feature_names) print('-' * 40, 'print_tree', '-' * 40) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Decision Tree", accuracy=accuracy, legend_labels=target_names) Plot().plot_in_3d(X_test, y_pred)
def model(labels, data, parent_id, go_id): # Training batch_size = 64 nb_epoch = 64 train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train if len(train_data) < 100: raise Exception("No training data for " + go_id) test_label, test_data = test test_label_rep = test_label model = Sequential() model.add(Convolution1D(input_dim=20, input_length=MAXLEN, nb_filter=320, filter_length=20, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=10, stride=10)) model.add(Dropout(0.25)) model.add(Convolution1D(nb_filter=32, filter_length=32, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=8)) model.add(LSTM(128)) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile( loss='binary_crossentropy', optimizer='rmsprop', class_mode='binary') model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5' checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=7, verbose=1) model.fit( X=train_data, y=train_label, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_split=0.2, callbacks=[checkpointer, earlystopper]) # Loading saved weights print 'Loading weights' model.load_weights(model_path) pred_data = model.predict_classes( test_data, batch_size=batch_size) return classification_report(list(test_label_rep), pred_data)
def validation_curve(): # Test decision tree using cross validation # Preprocess data data = pd.read_csv('./arrhythmia.data', header = None, na_values = '?') data = fill_na(data = data) features = data.columns.tolist()[:-1] target = data.columns.tolist()[-1] feature_types = implicit_feature_type_inferrence(data = data[features], num_unique_values = 3) train_set, test_set = train_test_split(data = data, train_fraction = 0.8, reindex = False, random_seed = 0) max_depth_cv = list() training_error_cv = list() test_error_cv = list() # Start cross-validation for i in range(2,21,2): tree_max_depth = i print("Tree Max Depth: %d" %tree_max_depth) max_depth_cv.append(tree_max_depth) tree = DecisionTree(tree_max_depth) training_error, test_error = cross_validation(data = data, features = features, target = target, feature_types = feature_types, model = tree, fold = 3, random_seed = 0) training_error_cv.append(training_error) test_error_cv.append(test_error) print("Training Error: %f" %training_error) print("Test Error: %f" %test_error) plot_curve(max_depth = max_depth_cv, training_error = training_error_cv, test_error = test_error_cv)
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_pred = np.reshape(y_pred, y_test.shape) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy, legend_labels=data.target_names)
def main(): args = argument_parser() try: ratings, movies_data, status = loading_data(args.data_path) if status == False: return "Path doesn't exist" user_rating = ratings.pivot(index="userId", columns="movieId", values="rating") user_rating = user_rating.fillna(0) user_rating = user_rating.values train = np.zeros(user_rating.shape) test = np.zeros(user_rating.shape) show_rating(ratings) analyis(ratings, movies_data) train_test_split(user_rating, train, test) Itrain = indicator_matrix(train) Itest = indicator_matrix(test) print("#" * 100) print("\n\nNon Negative Matrix Factorization : \n") worker(user_rating, train, test, Itrain, Itest, movies_data, 10000, "GD") print("#" * 100) print("\n\nNon Negative Matrix Factorization With Regularization : \n") worker(user_rating, train, test, Itrain, Itest, movies_data, 5000, "R_GD") print("#" * 100) print("\n\n!!!!!!!!!!!!! Different type of Optimizer !!!!!!!!!!!!") print("\n\nSliding Window protocol for optimizer : ") optimizer_function(user_rating, train, test, Itrain, Itest, movies_data) return "Successfully build" except Exception as e: print("Caught an Exception : ", e) print("Build Failed !!!!!!!!!!!!!!")
def main(): x = y = {"start": -5, "end": 5, "steps": 0.5} data = generate_gauss_data(x, y) inputs, targets = data["inputs"], data['targets'] x_train, x_val, y_train, y_val = train_test_split(inputs, targets, 0.20) #################### NETWORK SIZE ANALYSIS ##################### # losses, batch_losses = [], [] # for layer_size in range(1, 25): # network = nn.NueralNet(x_train, y_train, hidden_layer_size = layer_size, output_layer_size = 1, # is_binary = False) # nnTrainResults = network.train_network(epochs = 400) # # results = network.fowardPass(inputs, targets, include_bias = True) # losses.append(results['loss']) # # batch_out = np.reshape(results["Yp"], (data['size'], data['size'])) # # plot_gaussian(data, batch_out, f"Gaussian Out - hidden_layer_size {layer_size}", # # gif = {"epoch": 1000, "seq": 0}) # batch_losses.append(nnTrainResults['batch_losses']) # # for i in [2, 4, 5, 7, 10, 15, 18, 23]: # # Plot results. # plt.plot(batch_losses[i], label = f" N. Hidden Layer {i}") # plt.xlabel("Epochs") # plt.ylabel("Mean Squared Error loss") # plt.legend(loc = 'best') # plt.show() #################### SPLIT ANALYSIS ######################### split_ratios = [0.8] hidden_layer_shape = 15 for split in split_ratios: x_train, x_val, y_train, y_val = train_test_split(inputs, targets, split) network = nn.NueralNet(x_train, y_train, hidden_layer_size = hidden_layer_shape, output_layer_size = 1, is_binary = False) losses = network.train_network(1000, inputs, targets) plt.plot(losses["val_losses"], label = "Validation loss") plt.plot(losses["epoch_losses"], label = "Train loss") plt.xlabel("Epochs") plt.ylabel("Mean Squared Error loss") plt.legend() plt.title(f"Data Split - Training: {round((1 - split) * 100)}%") plt.show()
def nested_crossvalidation(self): if self.logging: print('Nested crossvalidation started.') print('Number of train_valid-test splits: ' + str(self.args['num_of_test_splits'])) print('Number of train-valid splits:' + str(self.args['num_of_valid_splits'])) test_split_groups = np.array(list(self.df['CLUSTER'])) train_valid_test = train_test_split(test_split_groups, num_splits = self.args['num_of_test_splits']) outer = torch.zeros(self.args['num_of_test_splits']) for i, (train_valid_data, test_data) in enumerate(train_valid_test): # print('Fold ' + str(i+1)) # print('Train dataset: ' + str(len(train_valid_data))) # print('Test dataset: ' + str(len(test_data))) # train model df_train_valid = self.df.iloc[sorted(train_valid_data)] #print(df_train_valid.index.tolist()) df_train_valid = df_train_valid.set_index(pd.Index(list(range(len(df_train_valid))))) #print(df_train_valid.index.tolist()) #break valid_split_groups = np.array(list(df_train_valid['CLUSTER'])) train_valid = train_test_split(valid_split_groups, num_splits = self.args['num_of_valid_splits']) inner = torch.zeros(len(self.hyperparameters), self.args['num_of_valid_splits']) for j, (train_data, valid_data) in enumerate(train_valid): # print(j) # print(len(train_data)) # print(len(valid_data)) df_train = df_train_valid.iloc[sorted(train_data)] df_train = df_train.set_index(pd.Index(list(range(len(df_train))))) train_loader = self.prepare_data_loader(df_train, self.args['train_batch_size'], str(i+1) + '_' + str(j+1)+ '_' +'train') df_valid = df_train_valid.iloc[sorted(valid_data)] df_valid = df_valid.set_index(pd.Index(list(range(len(df_valid))))) valid_loader = self.prepare_data_loader(df_valid, self.args['valid_batch_size'], str(i+1) + '_' + str(j+1)+ '_' +'valid') for k, hp in enumerate(self.hyperparameters): inner[k][j] = self.train(train_loader, hp, save_model = False, valid_loader=valid_loader) hp_best = self.hyperparameters[self.tune_hparam(inner)] df_test = self.df.iloc[sorted(test_data)] trainvalid_loader = self.prepare_data_loader(train_valid, self.args['train_batch_size'], str(i+1) + '_' +'trainvalid') test_loader = self.prepare_data_loader(df_test, self.args['test_batch_size'], str(i+1) + '_' +'test') outer[i] = self.train(trainvalid_loader, hp_best, save_model=True, valid_loader=test_loader) return {'mean_objective_loss': torch.mean(outer), 'std_objective_loss': torch.std(outer)}
def test_split(): n = int(len(x)*0.8) with mock.patch("numpy.random.choice", return_value=np.arange(n)): X_train, X_test, z_train, z_test = train_test_split(X, z, split_size=0.2, random_state=1) print(np.shape(X)) print("--------------") print(np.shape(X_train.tolist()+X_test.tolist())) assert (np.allclose(X_train.tolist()+X_test.tolist(), X) and np.allclose(z_train.tolist()+ z_test.tolist(), z ))
def main(args): torch.multiprocessing.set_start_method('spawn') torch.distributed.init_process_group(backend="nccl") with open(args.config_path, 'r') as file: config = AttrDict(json.load(file)) set_seed(config.seed + torch.distributed.get_rank()) train_data_csv, test_data_csv = train_test_split( config.train_data_csv_path, config.n_test_experiments) train_image_ids, train_labels = get_data(train_data_csv, is_train=True) train_transform = TrainTransform(config.crop_size) train_dataset = CellsDataset(config.train_images_dir, train_image_ids, train_labels, train_transform) test_image_ids, test_labels = get_data(test_data_csv, is_train=True) test_dataset = CellsDataset(config.train_images_dir, test_image_ids, test_labels) if torch.distributed.get_rank() == 0: print( f'Train size: {len(train_dataset)}, test_size: {len(test_dataset)}' ) encoder = Encoder(config.n_image_channels, config.n_emedding_channels, config.n_classes, config.encoder_model, config.encoder_pretrained, config.encoder_dropout, config.encoder_scale) if config.restore_checkpoint_path is not None: state_dict = torch.load(config.restore_checkpoint_path, map_location='cpu') encoder.load_state_dict(state_dict, strict=False) decoder = Decoder(config.n_emedding_channels, config.n_image_channels, config.n_classes, config.decoder_n_channels) trainer = Trainer(encoder=encoder, decoder=decoder, optimizer_params={ 'lr': config.lr, 'weight_decay': config.weight_decay, 'warmap': config.warmap, 'amsgrad': config.amsgrad }, amp_params={ 'opt_level': config.opt_level, 'loss_scale': config.loss_scale }, rank=args.local_rank, n_jobs=config.n_jobs) trainer.train(train_data=train_dataset, n_epochs=config.n_epochs, batch_size=config.batch_size, test_data=test_dataset, best_checkpoint_path=config.best_checkpoint_path)
def main(df_path, dates, params_path, suffix): df = pd.read_csv(df_path) X_train, X_test, y_train, y_test, _, ids_test = train_test_split(df, dates) b_params = best_params(params_path) preds = run_model(X_train, y_train, X_test, y_test, b_params) save_preds(X_test, y_test, preds, ids_test, suffix) print('predictions_saved')
def main(): # Load temperature data data = pd.read_csv( 'https://raw.githubusercontent.com/eriklindernoren/ML-From-Scratch/master/mlfromscratch/data/TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].values).T temp = data["temp"].values X = time # fraction of the year [0, 1] y = temp X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) poly_degree = 13 model = LassoRegression(degree=15, reg_factor=0.05, learning_rate=0.001, n_iterations=4000) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s (given by reg. factor: %s)" % (mse, 0.05)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Lasso Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def load_data(self, filename): df = pd.read_csv(filename, header=None) dfx = df.iloc[:, :-1] dfx = (dfx - dfx.mean()) / (dfx.max() - dfx.min()) X = dfx.values y = df.iloc[:, -1].values self.d = X.shape[1] self.out = self.d self.X_train, self.y_train, self.X_test, self.y_test = train_test_split(X, y) self.y_test = self.y_test.reshape(-1, 1)
def _training(Model): features, targets = engine.get_features(Model, train=True) X_train, X_test, y_train, y_test = utils.train_test_split(features, targets, test_size=0.3) classifier = engine.train_fn(X_train, y_train) utils.save_model(classifier, config.MODEL_PATH) predictions = engine.eval_fn(classifier, X_test) accuracy = utils.accuracy_score(predictions, y_test) print("Accuracy Score:", accuracy)
def test_unit(): print('\n===================================================================') print('Unit test: Sparse Representation-based Classification (SRC)') dataset = 'myYaleB' N_train = 15 dataset, Y_train, Y_test, label_train, label_test = \ utils.train_test_split(dataset, N_train) clf = SRC(lamb = 0.01) clf.fit(Y_train, label_train) clf.evaluate(Y_test, label_test)
def get_train_valid_dataset(data_dir): training_filenames, trainY = utils.load_train_filename_and_labels(data_dir) training_filenames, valid_filenames, trainY, validY = utils.train_test_split(training_filenames, trainY, split_ratio=0.1) trsfms = transforms.Compose([ transforms.RandomCrop(256, pad_if_needed=True, padding_mode='symmetric'), transforms.RandomHorizontalFlip(), transforms.RandomRotation(15), transforms.ToTensor(), ]) return MyDataset(os.path.join(data_dir, 'training'), transforms=trsfms), MyDataset(os.path.join(data_dir, 'validation'), transforms=trsfms)
def main(): print("Tesing the accuracy of NaiveBayes...") # Load data X, y = load_breast_cancer() # Split data randomly, train set rate 70% X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model clf = GaussianNB() clf.fit(X_train, y_train) # Model accuracy get_acc(clf, X_test, y_test)
def main(): parser = argparse.ArgumentParser("Preprocess the data") parser.add_argument('task', nargs='?', type=str) parser.add_argument('--path', '-p', dest='path', action='store', type=str) parser.add_argument('--patch-size', dest='patch_size', action='store', type=int, default=[], nargs='+') parser.add_argument('--canny-sigma', dest='canny_sigma', action='store', type=float) parser.add_argument('--threshold', type=int) parser.add_argument('--color', dest='color', action='store_true') parser.add_argument('--binarized', dest='color', action='store_false') parser.add_argument('--val-size', dest='val_size', type=float) parser.set_defaults(color=True) parser.add_argument('--patch-stride', dest='patch_stride', type=int, default=256) parser.add_argument('--padding', type=int, default=0) args = parser.parse_args() if args.task == 'patchify': # split images into patches utils.dataset_to_patches(args.path, args.patch_size, stride=args.patch_stride, canny_sigma=args.canny_sigma, threshold=args.threshold, color=args.color, padding=args.padding) if args.task == 'split-writer-dirs': utils.prepare_files_of_trainingset(args.path) if args.task == 'train-val-split': utils.train_test_split(args.path, args.val_size)
def test_unit(): print( '\n===================================================================' ) print('Mini Unit test: COPAR') dataset = 'myYaleB' N_train = 15 dataset, Y_train, Y_test, label_train, label_test = \ utils.train_test_split(dataset, N_train) clf = COPAR(k=10, k0=5, lambd=0.001, eta=0.01) clf.fit(Y_train, label_train, iterations=100, verbose=True) clf.evaluate(Y_test, label_test)
def train_model(): Model = model.BertForFakeNewsDetection() features, labels = Model.get_features(train=True) X_train, X_test, y_train, y_test = utils.train_test_split(features, labels, test_size=0.3) clf = svm.SVC() clf.fit(features, labels) print("Validation Accuracy:", round(clf.score(X_test, y_test), 4) * 100, "%") clf.fit(X_test, y_test) utils.save_model(clf, config.MODEL_PATH)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) print("X_train",X_train.shape) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_pred = np.reshape(y_pred, y_test.shape) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results Plot().plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): print ("-- Regression Tree --") # Load temperature data data = pd.read_csv('../TempLinkoping2016.txt', sep="\t") time = np.atleast_2d(data["time"].as_matrix()).T temp = np.atleast_2d(data["temp"].as_matrix()).T X = standardize(time) # Time. Fraction of the year [0, 1] y = temp[:, 0] # Temperature. Reduce to one-dim X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) model = RegressionTree() model.fit(X_train, y_train) y_pred = model.predict(X_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) m3 = plt.scatter(366 * X_test, y_pred, color='black', s=10) plt.suptitle("Regression Tree") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2, m3), ("Training data", "Test data", "Prediction"), loc='lower right') plt.show()
def main(): print ("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Decision Tree", accuracy=accuracy, legend_labels=data.target_names)
def model(labels, data): # set parameters: # Training batch_size = 100 nb_epoch = 100 train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train test_label, test_data = test test_label_rep = test_label shap=numpy.shape(train_data) print('X_train shape: ',shap) print('X_test shape: ',test_data.shape) model = Sequential() model.add(Dense(shap[1], activation='relu', input_dim=shap[1])) model.add(Highway()) model.add(Dense(1,activation='sigmoid')) print 'compiling model' model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") print 'running at most 60 epochs' checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit(train_data, train_label, batch_size=batch_size,nb_epoch=nb_epoch,shuffle=True, show_accuracy=True, validation_split=0.3,callbacks=[checkpointer,earlystopper]) # # Loading saved weights print 'Loading weights' model.load_weights('bestmodel.hdf5') pred_data = model.predict_classes(test_data, batch_size=batch_size) # Saving the model tresults = model.evaluate(test_data, test_label,show_accuracy=True) print tresults return classification_report(list(test_label_rep), pred_data)
def main(): print ("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) print(y_train) clf = GBDTClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) Plot().plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names)
def model(labels, data, go_id): # set parameters: batch_size = 64 nb_epoch = 10 lstm_size = 128 data1, data2 = data train1, test1 = train_test_split( labels, data1, batch_size=batch_size) train2, test2 = train_test_split( labels, data2, batch_size=batch_size) train_label, train1_data = train1 train_label, train2_data = train2 test_label, test1_data = test1 test_label, test2_data = test2 test_label_rep = test_label # 256 0.5 256 model = Graph() model.add_input(name='input1', batch_input_shape=(batch_size, 20)) model.add_input(name='input2', batch_input_shape=(batch_size, 3)) model.add_node(Convolution1D( nb_filter=32, filter_length=20, border_mode='valid', activation='relu', subsample_length=1), name='conv1', input='input1') model.add_node(MaxPooling1D( pool_length=10, stride=10), name='pool1', input='conv1') model.add_node( LSTM(lstm_size), name='lstm1', input='pool1') model.add_node(Convolution1D( nb_filter=32, filter_length=3, border_mode='valid', activation='relu', subsample_length=1), name='conv2', input='input2') model.add_node(MaxPooling1D( pool_length=2), name='pool2', input='conv2') model.add_node( LSTM(lstm_size), name='lstm2', input='pool2') model.add_node( Dense(1024), name='dense1', inputs=['lstm1', 'lstm2']) model.add_node(Dropout(0.25), name='dropout', input='dense1') model.add_node(Activation('relu'), name='relu', input='dropout') model.add_node( Dense(1, activation='sigmoid'), name='dense2', input='relu') model.add_output(name='output', input='dense2') # try using different optimizers and different optimizer configs model.compile('adadelta', {'output': 'binary_crossentropy'}) model_path = DATA_ROOT + go_id + '.hdf5' checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit( {'input1': train1_data, 'input2': train2_data, 'output': train_label}, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.2, callbacks=[checkpointer, earlystopper]) print 'Loading weights' model.load_weights(model_path) pred_data = model.predict( {'input1': test1_data, 'input2': test2_data}, batch_size=batch_size) pred_data = numpy.round(numpy.array(pred_data['output'])) # Loading saved weights # Saving the model # print 'Saving the model for ' + go_id # model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True) return classification_report(list(test_label_rep), pred_data)
reload(utils) reload(algo_param) reload(param) # TODO Add unlabeled subset functionality # TODO Add parallelization ##################### PERFORM GRID SEARCH ######################## if param.optimize_params: # parse data all_X, all_Y = utils.parse(param.data_file, param.feature_file, param.response_var, debug_limit=param.debug_limit) X, Y = utils.labeled_subset(all_X, all_Y) X, Y = utils.subsample((X, Y), param.labeled_subsample) (X_train, X_test, Y_train, Y_test) = utils.train_test_split(X, Y, test_size=param.test_size) # pickle data for use in other files saved_data = (X_train, X_test, Y_train, Y_test) utils.pickler(saved_data, param.optimization_data_pickle) # make meta pipeline for grid searching pipeline, parameter_space = make_meta_pipeline([ ('imputer', param.imputer_params), ('scaler', param.scaler_params), ('dim_reducer', param.dim_reducer_params), ('regressor', param.regressor_params) ], all_X, all_Y) print("Opening logfiles") sys.stdout.flush()
def model(labels, data, parent_id, go_id): # Convolution filter_length = 20 nb_filter = 32 pool_length = 10 global nb_classes # LSTM lstm_output_size = 128 # Training batch_size = 64 nb_epoch = 64 train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train # sample_weight = [1.0 if y == 1 else 1.0 for y in train_label] # sample_wseight = numpy.array(sample_weight, dtype='float32') test_label, test_data = test test_label_rep = test_label model = Sequential() model.add(Convolution1D(input_dim=20, input_length=MAXLEN, nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=pool_length, stride=10)) model.add(Dropout(0.25)) model.add(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=pool_length, stride=10)) model.add(LSTM(lstm_output_size)) # model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile( loss='categorical_crossentropy', optimizer='rmsprop') model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5' # parent_model_path = DATA_ROOT + 'data/' + parent_id + '.hdf5' # if os.path.exists(parent_model_path): # print 'Loading parent model weights' # model.load_weights(parent_model_path) checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) model.fit( X=train_data, y=train_label, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_split=0.2, callbacks=[checkpointer, earlystopper]) # Loading saved weights print 'Loading weights' model.load_weights(DATA_ROOT + parent_id + '/' + go_id + '.hdf5') score = model.evaluate( test_data, test_label, show_accuracy=True, verbose=1) print 'Score: ', score[0] print 'Accuracy: ', score[1]
def model(labels, data, parent_id, go_id): # set parameters: # Convolution nb_filter = 64 nb_row = 5 nb_col = 1 pool_length = 3 # Training batch_size = 64 nb_epoch = 24 lstm_size = 70 data1, data2 = data train1, test1 = train_test_split( labels, data1, batch_size=batch_size, split=0.8) train_label, train1_data = train1 train2, test2 = train_test_split( labels, data2, batch_size=batch_size, split=0.8) train_label, train2_data = train2 if len(train1_data) < 100: raise Exception("No training data for " + go_id) test_label, test1_data = test1 test_label, test2_data = test2 test_label_rep = test_label first = Sequential() first.add(Convolution2D( nb_filter, nb_row, nb_col, border_mode='valid', input_shape=(1, MAXLEN, 20))) first.add(Activation('relu')) first.add(Convolution2D(2 * nb_filter, nb_row, nb_col)) first.add(Activation('relu')) # first.add(Convolution2D(nb_filter, nb_row, nb_col)) # first.add(Activation('relu')) first.add(MaxPooling2D(pool_size=(pool_length, 1))) first.add(Dropout(0.5)) first.add(Flatten()) second = Sequential() second.add( LSTM(lstm_size, return_sequences=True, input_shape=(MAXLEN, 20))) second.add(Dropout(0.25)) # second.add(LSTM(lstm_size, return_sequences=True)) # second.add(Dropout(0.25)) second.add(LSTM(lstm_size, return_sequences=False)) second.add(Dropout(0.25)) second.add(Flatten()) model = Sequential() model.add(Merge([first, second], mode='concat')) model.add(Dense(256)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) adam = Adam(lr=0.00001) model.compile( loss='binary_crossentropy', optimizer=adam, class_mode='binary') model_path = DATA_ROOT + parent_id + '/' + go_id + '.hdf5' checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit( X=[train1_data, train2_data], y=train_label, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=1, validation_split=0.3, callbacks=[checkpointer, earlystopper]) model.load_weights(model_path) pred_data = model.predict_classes( [test1_data, test2_data], batch_size=batch_size) return classification_report(list(test_label_rep), pred_data)
def model(labels, data, go_id): # set parameters: # Convolution filter_length = 7 nb_filter = 64 pool_length = 2 k=7 # LSTM lstm_output_size = 70 # Training batch_size = 32 nb_epoch = 12 train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train test_label, test_data = test test_label_rep = test_label model = Sequential() model.add(Convolution1D( input_dim=20, input_length=500, nb_filter=320, filter_length=20, border_mode="valid", activation="relu", subsample_length=1)) model.add(MaxPooling1D(pool_length=10, stride=10)) model.add(Dropout(0.2)) model.add(Convolution1D( nb_filter=320, filter_length=20, border_mode="valid", activation="relu", subsample_length=1)) model.add(MaxPooling1D(pool_length=10, stride=10)) model.add(Dropout(0.2)) model.add(Flatten()) model.add(Highway()) model.add(Dropout(0.5)) model.add(Dense(output_dim=1000)) model.add(Activation('relu')) model.add(Dense(output_dim=1)) model.add(Activation('sigmoid')) print 'compiling model' model.compile( loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") print 'running at most 60 epochs' model_path = DATA_ROOT + go_id + '.hdf5' checkpointer = ModelCheckpoint( filepath=model_path, verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit( train_data, train_label, batch_size=batch_size, nb_epoch=60, shuffle=True, show_accuracy=True, validation_split=0.3, callbacks=[checkpointer, earlystopper]) # # Loading saved weights print 'Loading weights' model.load_weights(model_path) pred_data = model.predict_classes(test_data, batch_size=batch_size) # Saving the model # tresults = model.evaluate(test_data, test_label,show_accuracy=True) # print tresults return classification_report(list(test_label_rep), pred_data)
def model(labels, data, go_id): # set parameters: # Convolution filter_length = 7 nb_filter = 64 pool_length = 2 k=7 # LSTM lstm_output_size = 70 # Training batch_size = 30 nb_epoch = 12 train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train test_label, test_data = test test_label_rep = test_label nb_filters = 100 filter_lenghts = [7,10,12] first = Sequential() first.add(Convolution1D(input_dim=20, input_length=500, nb_filter=nb_filters, filter_length=7, border_mode="valid", activation="relu", subsample_length=1)) first.add(MaxPooling1D(pool_length=3, stride=3)) first.add(LSTM(input_dim=100, output_dim=100)) second = Sequential() second.add(Convolution1D(input_dim=20, input_length=500, nb_filter=nb_filters, filter_length=10, border_mode="valid", activation="relu", subsample_length=1)) second.add(Activation('relu')) second.add(MaxPooling1D(pool_length=5, stride=5)) second.add(LSTM(input_dim=100, output_dim=100)) third = Sequential() third.add(Convolution1D(input_dim=20, input_length=500, nb_filter=nb_filters, filter_length=12, border_mode="valid", activation="relu", subsample_length=1)) third.add(Activation('relu')) third.add(MaxPooling1D(pool_length=6, stride=6)) third.add(LSTM(input_dim=100, output_dim=100)) model = Sequential() model.add(Merge([first, second, third], mode='concat')) model.add(Dense(1000)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1) model.fit(X=[train_data, train_data, train_data], y=train_label, batch_size=100, nb_epoch=60, shuffle=True, show_accuracy=True, validation_split=0.3, callbacks=[checkpointer,earlystopper]) #now concat all the results and do whatever you want with the new sentence_embedding :D # # Loading saved weights # print 'Loading weights' # model.load_weights(DATA_ROOT + go_id + '.hdf5') model.load_weights('bestmodel.hdf5') pred_data = model.predict_classes( [test_data, test_data, test_data], batch_size=batch_size) # Saving the model #tresults = model.evaluate(test_data, test_label,show_accuracy=True) #print tresults return classification_report(list(test_label_rep), pred_data)
print ("MLP") print ("nb_epoch:{}".format(nb_epoch)) print ("Layer: {}/{}/{}".format(layer1,layer2,layer3)) Loop, Tap = utils.load_data_envelope() Xloop, Xtap, target = utils.makeTrainingDataRegression(Loop, Tap) # Xloop, Xtap, target = utils.makeTrainingDataRank(Loop, Tap) # target = np_utils.to_categorical(target) X = np.hstack((Xloop, Xtap)) X = np.float64(X) if normFlag: # X = X - np.mean(X, axis=1)[:, np.newaxis] X = normalize(X, axis=1, norm='l2') #X_train, X_test, target_train, target_test = cross_validation.train_test_split(X, target, test_size=test_split, random_state=0) X_train, X_test, target_train, target_test = utils.train_test_split(X, target, test_split) mlp = nn.MLP_regression(layer1=layer1, layer2=layer2, layer3=layer3, input_shape=X_train[0].shape) checkpointer = ModelCheckpoint(filepath="./tmp/mlp_weights_l1{}l2{}l3{}.hdf5".format(layer1,layer2,layer3),\ verbose=1, save_best_only=True, monitor='val_loss') mlp.fit(X_train, target_train, batch_size=batch_size, nb_epoch=nb_epoch,\ show_accuracy=True, verbose=2, shuffle=True, validation_data=(X_test, target_test), callbacks = [checkpointer]) # mlp.evaluate(X_test, target_test, show_accuracy=True, verbose=1) #mlp.load_weights("./tmp/mlp_weights_l1{}l2{}l3{}.hdf5".format(layer1,layer2,layer3)) propagation = nn.Propagation(mlp) utils.calculate_MRR(Loop, Tap, method='nn', model=mlp, prop=propagation) # print("====test data====") # for i in np.arange(len(target_test)): # error = np.abs(y[i][0]-target_test[i]) # print("{:0.3f}, {:0.2f}, error= {:.2f}, {}").format(y[i][0], target_test[i], error, 'correct' if error < 0.5 else 'wrong')
nb_epoch = 200 test_split = 0.2 layer1 = 10 layer2 = None layer3 = None print ("GRU") print ("nb_epoch:{}".format(nb_epoch)) print ("Layer: {}/{}/{}".format(layer1,layer2,layer3)) target = np.load('./target.npy') Loop = pd.read_pickle('./Loop.pkl') Tap = pd.read_pickle('./Tap.pkl') X = np.load('./X.npy') X_train, X_test, target_train, target_test =\ utils.train_test_split(X, target, test_split) if normFlag: Xloop = Xloop - np.mean(Xloop, axis=1)[:, np.newaxis] Xtap = Xtap - np.mean(Xtap, axis=1)[:, np.newaxis] Xloop = normalize(Xloop, axis=1, norm='l2') Xtap = normalize(Xtap, axis=1, norm='l2') rnn = nn.RNN_regression(\ layer1=layer1, layer2=layer2,\ layer3=layer3, input_length=(len(Xloop[0]),)) #X_train, X_test, target_train, target_test =\ # utils.train_test_split(X, target, test_split) print("Train...") checkpointer = ModelCheckpoint(\ filepath="./tmp/rnn_gru_weights_l1{}l2{}l3{}.hdf5".\ format(layer1,layer2,layer3),\ verbose=1, save_best_only=True, monitor='val_loss')
def model(go_id): # set parameters: # Convolution filter_length = 20 nb_filter = 32 pool_length = 10 stride = 10 # LSTM lstm_output_size = 96 # Training batch_size = 100 nb_epoch = 60 patience = 5 #Encoding maxlen = 500 dictn = 20 gram = 2 labels, data = load_data(go_id,maxlen,dictn,gram) train, test = train_test_split( labels, data, batch_size=batch_size) train_label, train_data = train test_label, test_data = test test_label_rep = test_label shap=numpy.shape(train_data) print('X_train shape: ',shap) print('X_test shape: ',test_data.shape) model = Sequential() model.add(Convolution1D(input_dim=shap[2], input_length=shap[1], nb_filter=nb_filter, filter_length=filter_length, border_mode="valid", activation="relu", subsample_length=1)) model.add(MaxPooling1D(pool_length=pool_length, stride=stride)) model.add(Dropout(0.75)) model.add(LSTM(lstm_output_size, return_sequences=True)) model.add(LSTM(lstm_output_size)) model.add(Dropout(0.75)) model.add(Dense(1000)) model.add(Dense(1,activation='sigmoid')) print 'compiling model' model.compile(loss='binary_crossentropy', optimizer='rmsprop', class_mode="binary") print 'running at most 60 epochs' checkpointer = ModelCheckpoint(filepath="bestmodel.hdf5", verbose=1, save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=patience, verbose=1) model.fit(train_data, train_label, batch_size=batch_size, nb_epoch=nb_epoch, shuffle=True, show_accuracy=True, validation_split=0.3,callbacks=[checkpointer,earlystopper]) # # Loading saved weights print 'Loading weights' model.load_weights('bestmodel.hdf5') pred_data = model.predict_classes(test_data, batch_size=batch_size) # Saving the model tresults = model.evaluate(test_data, test_label,show_accuracy=True) print tresults return classification_report(list(test_label_rep), pred_data)