def run(): #load data in dataframe data = util.get_dataset() # print(data.head()) # print(data.tail()) weighted_price = data.Weighted_Price.values.astype('float32') # print(weighted_price) weighted_price = weighted_price.reshape(len(weighted_price), 1) # print(weighted_price) #scale data scaler = MinMaxScaler(feature_range=(0, 1)) data_scaled = scaler.fit_transform(weighted_price) # print(data_scaled) look_back = 5 train_set, test_set = util.split_data(data_scaled, train_percentage=0.85) x_train, y_train = util.create_labels(train_set, look_back=5) x_test, y_test = util.create_labels(test_set, look_back=5) model = util.build_model() history = util.train_model(model, x_train, y_train) util.plot_training_history(history) model.load_weights('saved_models/weights.best.lstm.hdf5')
def post(self): start_time = time.time() args = self.parser.parse_args() # read data params = read_params(args['params'].stream) df = read_file(args['raw_data'].stream.read()) y_train = read_file(args['labels'].stream.read()) # build features X_train = build_features(df, params) y_train = y_train.set_index('example_id') y_train = y_train.loc[X_train.index] # train model cl = train_model(X_train, y_train.label, params) self.model_factory.add_pipeline(cl, params) if isinstance(cl, tpot.TPOTClassifier): final_classifier = cl.fitted_pipeline_ evaluated_indivs = cl.evaluated_individuals_ else: final_classifier = cl evaluated_indivs = None model_type = str(final_classifier) mean_accuracy, mean_roc_auc = cross_validate(final_classifier, X_train, y_train.label) # format feat_eng_params feat_eng_params = params['extract_features'].copy() for k in feat_eng_params.keys(): if k == 'default_fc_parameters': # shows calculations like min, mean, etc. feat_eng_params[k] = str(feat_eng_params[k].keys()) elif k == 'impute_function': feat_eng_params[k] = str(feat_eng_params[k].__name__) else: feat_eng_params[k] = str(feat_eng_params[k]) # for k in feat_eng_params: # feat_eng_params[k] = str(feat_eng_params[k]) result = { 'trainTime': time.time() - start_time, 'trainShape': X_train.shape, 'modelType': model_type, 'featureEngParams': feat_eng_params, 'modelId': params['pipeline_id'], 'mean_cv_accuracy': mean_accuracy, 'mean_cv_roc_auc': mean_roc_auc, 'evaluated_models': evaluated_indivs } self.model_factory[params['pipeline_id']]['stats'] = result return json.dumps(result)
readFilesFromSources(text,sources) #Creating the train test split and transforming data def create_train_test_set(): # split the dataset into training and validation datasets train_x, valid_x, train_y, valid_y = model_selection.train_test_split(text, labels, test_size = 0.10, random_state = 0, shuffle=True) # label encode the target variable encoder = preprocessing.LabelEncoder() train_y = encoder.fit_transform(train_y) valid_y = encoder.fit_transform(valid_y) # ngram level tf-idf xtrain_tfidf_ngram, xvalid_tfidf_ngram = ngram_transform(train_x, valid_x, n=2) return xtrain_tfidf_ngram, xvalid_tfidf_ngram, train_y, valid_y # SVM on Ngram Level TF IDF Vectors read_file() labels=np.concatenate((np.ones((400),dtype=int),np.zeros((400),dtype=int),np.ones((400),dtype=int),np.zeros((400),dtype=int))) xtrain_tfidf_ngram, xvalid_tfidf_ngram, train_y, valid_y = create_train_test_set() accuracy_SVM = train_model(svm.SVC(kernel='linear'), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram, valid_y) accuracy_RF = train_model(RandomForestClassifier(n_estimators=2, random_state=0, max_features='auto', min_samples_split=2), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram, valid_y) accuracy_NB = train_model(naive_bayes.MultinomialNB(alpha=0, class_prior=None, fit_prior=False), xtrain_tfidf_ngram, train_y, xvalid_tfidf_ngram, valid_y) print('\n') print('The statistics for the classifiers SVM, Naïve Bayes, Random Forest are: ') print("1. SVM, N-Gram Vectors: ", accuracy_SVM) print("2. Random Forest, N-Gram Vectors: ", accuracy_RF) print("3. Naive Bayes, N-Gram Vectors: ", accuracy_NB)
from sklearn import tree from utilities import load_magic04, load_wine, scale_data, train_model, tune_hyperparameters, model_complexity, learning_curve df, factors, response = load_wine() # df, factors, response = load_magic04() df_train, df_test = scale_data(df, response) classifier = tree.DecisionTreeClassifier() train_model(classifier, df_train, None, factors, response) tree.export_graphviz(classifier, out_file="tree_initial.dot") best_params = tune_hyperparameters(classifier, df_train, factors, response, { "max_depth": range(1, 20), "max_leaf_nodes": range(50, 150, 10) }) # "criterion": ["entropy","gini"] "max_leaf_nodes": range(50, 150, 10) "max_depth": range(1, 20) "min_samples_leaf": range(1, 20) "min_samples_split": range(2, 20) model_complexity( tree.DecisionTreeClassifier(max_leaf_nodes=best_params["max_leaf_nodes"]), df_train, factors, response, {"max_depth": range(1, 20)}, "max_depth") classifier = tree.DecisionTreeClassifier( max_depth=best_params["max_depth"], max_leaf_nodes=best_params["max_leaf_nodes"]) train_model(classifier, df_train, df_test, factors, response, "Final ") tree.export_graphviz(classifier, out_file="tree_pruned.dot") learning_curve(classifier, df_train, factors, response)
def perform_experiments(n_runs=10, n_points=1000, n_epochs=200, run_best=False, verbose=False): """ Perform experiments for 5 different neural network architectures and losses. To run all experiments call this function with default params :param n_runs: number of runs for which experiment should be repeated :param n_points: number of training and testing data points used in the experiments :param n_epochs: number of epochs every architecture should be trained on :param run_best: If True only the best architecture (Siamese Network with auxiliary loss) is trained :param verbose: If True, print training and validation loss every epoch :returns: dictionary containing history of training (training, validation loss and accuracy) """ history_mlp_net = [] history_conv_net = [] history_conv_net_aux = [] history_siamese = [] history_siamese_aux = [] for n_run in range(n_runs): data_set = generate_pair_sets(n_points) MAX_VAL = 255.0 TRAIN_INPUT = Variable(data_set[0]) / MAX_VAL TRAIN_TARGET = Variable(data_set[1]) TRAIN_CLASSES = Variable(data_set[2]) TEST_INPUT = Variable(data_set[3]) / MAX_VAL TEST_TARGET = Variable(data_set[4]) TEST_CLASSES = Variable(data_set[5]) if not run_best: ############################################################################## # Creates Multilayer Perceptron Network with ReLU activationss mlp_net = MLPNet(in_features=392, out_features=2, n_layers=3, n_hidden=16) # Set train flag on (for dropouts) mlp_net.train() # Train the model and append the history history_mlp_net.append( train_model(mlp_net, train_input=TRAIN_INPUT.view((n_points, -1)), train_target=TRAIN_TARGET, val_input=TEST_INPUT.view((n_points, -1)), val_target=TEST_TARGET, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data mlp_net.eval() acc = get_accuracy(mlp_net, TEST_INPUT.view( (n_points, -1)), TEST_TARGET) * 100.0 print("Run: {}, Mlp_net Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create ConvNet without auxiliary outputs conv_net = ConvNet(n_classes=2, n_layers=3, n_features=16) # Set train flag on (for dropouts) conv_net.train() # Train the model and append the history history_conv_net.append( train_model(conv_net, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, val_input=TEST_INPUT, val_target=TEST_TARGET, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net.eval() acc = get_accuracy(conv_net, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, ConvNet Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create ConvNet with auxiliary outputs conv_net_aux = ConvNet(n_classes=22, n_layers=3, n_features=16) # Set train flag on (for dropouts) conv_net_aux.train() # Train the model and append the history history_conv_net_aux.append( train_model(conv_net_aux, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, aux_param=1.0, train_classes=TRAIN_CLASSES, val_input=TEST_INPUT, val_target=TEST_TARGET, val_classes=TEST_CLASSES, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net_aux.eval() acc = get_accuracy(conv_net_aux, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, ConvNet Auxilary Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create Siamese Network without auxiliary outputs conv_net = BlockConvNet() conv_net_siamese = DeepSiameseNet(conv_net) # Set train flag on (for dropouts) conv_net.train() conv_net_siamese.train() # Train the model and append the history history_siamese.append( train_model(conv_net_siamese, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, val_input=TEST_INPUT, val_target=TEST_TARGET, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net.eval() conv_net_siamese.eval() acc = get_accuracy(conv_net_siamese, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, Siamese Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## # Create Siamese Network with auxiliary outputs conv_net = BlockConvNet() conv_net_siamese_aux = DeepSiameseNet(conv_net) # Set train flag on (for dropouts) conv_net.train() conv_net_siamese_aux.train() # Train the model and append the history history_siamese_aux.append( train_model(conv_net_siamese_aux, train_input=TRAIN_INPUT, train_target=TRAIN_TARGET, train_classes=TRAIN_CLASSES, val_input=TEST_INPUT, val_target=TEST_TARGET, val_classes=TEST_CLASSES, aux_param=3.0, n_epochs=n_epochs, verbose=verbose)) # Set train flag to False for getting accuracies on validation data conv_net.eval() conv_net_siamese_aux.eval() acc = get_accuracy(conv_net_siamese_aux, TEST_INPUT, TEST_TARGET) * 100.0 print("Run: {}, Siamese Auxilary Test Accuracy: {:.3f} %".format( n_run, acc)) ############################################################################## return { 'history_mlp_net': history_mlp_net, 'history_conv_net': history_conv_net, 'history_conv_net_aux': history_conv_net_aux, 'history_siamese': history_siamese, 'history_siamese_aux': history_siamese_aux }
len(model.wv.vocab.keys()))) else: print("Model file not found. {0}".format(model_path)) elif function == "6" \ or function == "tm": new_model_name = input("New model name: ") new_vocabulary_name = input( "New vocabulary name (leave empty if you don't want to save the vocabulary): " ) # see first lines of file for explanation of reload call importlib.reload(utilities) # see function inside utilities.py for parameters (model.train call) utilities.train_model(model=model, corpus_path=corpus_path, new_model_name=new_model_name, new_vocabulary_name=new_vocabulary_name) elif function == "7" \ or function == "wv": word = input("Insert the word of which you want the vector: ").lower() if word in model.wv.vocab.keys(): print("This is the vector of {0}. ".format(word)) print(model.wv.word_vec(word=word)) else: print("Word not in the vocabulary of this model.") elif function == "8" \ or function == "ms": print( "WORDS MUST BE SEPARATED BY WHITE SPACES, IF YOU HAVE 'italian food' MAKE IT 'italian_food'." ) print(
trainDF['text'] = text trainDF['label'] = labels # split the dataset into training and validation datasets train_x, valid_x, train_y, valid_y = model_selection.train_test_split(trainDF['text'], trainDF['label'], test_size=0.15, random_state=0) # label encode the target variable encoder = preprocessing.LabelEncoder() train_y = encoder.fit_transform(train_y) valid_y = encoder.fit_transform(valid_y) # create a count vectorizer object count_vect = CountVectorizer(analyzer='word', token_pattern=r'\w{1,}') count_vect.fit(trainDF['text']) # transform the training and validation data using count vectorizer object xtrain_count = count_vect.transform(train_x) xvalid_count = count_vect.transform(valid_x) # word level tf-idf tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\w{1,}', max_features=5000) tfidf_vect.fit(trainDF['text']) xtrain_tfidf = tfidf_vect.transform(train_x) xvalid_tfidf = tfidf_vect.transform(valid_x) # Naive Bayes on Word Level TF IDF Vectors result = train_model(naive_bayes.MultinomialNB(), xtrain_tfidf, train_y, xvalid_tfidf, valid_y) print("NB, WordLevel TF-IDF: Accuracy=%.3f\tF1=%.3f"%(result['accuracy'],result['f1'])) result1 = train_model(svm.SVC(kernel="linear"), xtrain_tfidf, train_y, xvalid_tfidf, valid_y) print("SVM, WordLevel TF-IDF: Accuracy=%.3f\tF1=%.3f"%(result1['accuracy'],result1['f1']))
dnn_model.classifier = DNNModelClassifier(num_input, num_output, num_hid) # send the model to the device dnn_model.to(device) # use the negative log likelihood loss because the output of classifier is log softmax criterion = nn.NLLLoss() # only train model on classifier parameters, feature parameters are frozen optimizer = optim.Adam(dnn_model.classifier.parameters(), lr=FLAGS.learning_rate) # train the classifier of the model utilities.train_model(dnn_model, optimizer, criterion, dataloaders, device, num_epochs=FLAGS.epochs, print_every=2) # save the class to index dictionary to the model dnn_model.class_to_idx = image_datasets['train'].class_to_idx # save a checkpoint of the model utilities.save_checkpoint(dnn_model, model_arch, optimizer, num_input, num_hid, num_output, save_dir=FLAGS.save_dir)
from utilities import create_input_corpus, train_model, table_of_contents_builder, novel_generator, text_formatter, \ novel_length, novel_version_maker input_corpus = create_input_corpus('festival_input_texts') textmodel = train_model(input_corpus) # gets a tuple containing (generted text as list, number of chapters) generated_text = novel_generator(textmodel, 2000) # build the body of the novel novel_body = text_formatter(generated_text[0]) length = novel_length(novel_body) # create table of contents table_of_contents = table_of_contents_builder(generated_text[1]) table_of_contents = text_formatter(table_of_contents) novel_version_maker("generated_novel", 1, length, table_of_contents, novel_body)
min_samples_split=2) #K-Fold cross validation on training set k = 5 kf = KFold(n_splits=k, shuffle=True, random_state=0) print("K-Fold cross validation (K=%d)" % k) i = 1 for train_index, valid_index in kf.split(x_train): print("\nFold ", i) i += 1 training_data, valid_data = x_train.iloc[train_index], x_train.iloc[ valid_index] expected_labels = y_train.iloc[valid_index] result1 = train_model(classifier1, training_data, y_train.iloc[train_index], valid_data, expected_labels) print("NB result : ", result1) result2 = train_model(classifier2, training_data, y_train.iloc[train_index], valid_data, expected_labels) print("SVM result : ", result2) result3 = train_model(classifier3, training_data, y_train.iloc[train_index], valid_data, expected_labels) print("Random Forest result : ", result3) #Final classification print("Train-test classification...\n")