def _validate_model(self, x: np.ndarray, y: np.ndarray, validation_file_name: str = "validation.json") -> dict: logging.info("Creating predictions ...") y_predicted_categories = self._model.predict(x, batch_size=self._batch_size) gc.collect() from sklearn.metrics.classification import accuracy_score, precision_recall_fscore_support y_expected_1dim = self._label_enc.max_category(y) y_predicted_1dim = self._label_enc.max_category(y_predicted_categories) logging.info("Results:") logging.info("{}".format(precision_recall_fscore_support(y_true=y_expected_1dim, y_pred=y_predicted_1dim))) accuracy = accuracy_score(y_true=y_expected_1dim, y_pred=y_predicted_1dim) logging.info("{}".format(accuracy)) from sklearn.metrics.classification import classification_report logging.info("\n{}".format(classification_report(y_true=y_expected_1dim, y_pred=y_predicted_1dim, target_names=["neg", "pos"], ))) results = classification_report(y_true=y_expected_1dim, y_pred=y_predicted_1dim, target_names=["neg", "pos"], output_dict=True) results["accuracy"] = accuracy write_text_file( file_path=self._experiment_folder / validation_file_name, text=json.dumps(results)) return results
def ensemble_test(): file_src_dict = {'embedding_file': './data/word_embedding.pkl'} with open(file_src_dict['embedding_file'], 'rb') as f: # embedding is a 2d-list with size :vocab_size*dim embeddings = pickle.load(f) with open('./data/val.pkl', 'rb') as f: # embedding is a 2d-list with size :vocab_size*dim val_q, val_r, val_label = pickle.load(f) all_pred_score = [] all_pred_label = [] models = ['./base/model.1'] model_name = ['base'] graphs = [tf.Graph() for i in range(0, len(models))] for i in range(0, len(graphs)): with graphs[i].as_default(): arnn = ARNN(embedding=embeddings) if model_name[i] == 'base': arnn.build_base_model(training=False) pred_label, pred_score = arnn.predict(model_path=models[i], data_q=val_q, data_r=val_r) all_pred_score.append(pred_score) all_pred_label.append(pred_label) del arnn final_score = (all_pred_score[0] + all_pred_score[1]) for i in range(2, len(all_pred_score)): final_score += all_pred_score[i] final_score /= len(all_pred_score) final_label = [int(s[1] > s[0]) for s in final_score] if val_label is not None: for pred_l in all_pred_label: print(classification_report(val_label, pred_l)) print(classification_report(val_label, final_label))
def eval(model, cfg, mode='val', cuda=True): data_info = cfg.dataset[mode] data_reader = DataReader( ann_files=[data_info['ann_file']], img_dirs=[data_info['img_prefix']], transform=None, mode='val', img_scale=data_info['img_scale'], keep_ratio=data_info['keep_ratio'], ) data_loader = DataLoader(data_reader, collate_fn=collate_fn, **cfg.val_data_loader) y_true, y_pred = [], [] model.eval() for step, (data, target) in tqdm(enumerate(data_loader)): # inputs = torch.stack(data) # target = torch.from_numpy(np.array(target)).type(torch.LongTensor) inputs = data targets = target if cuda: inputs = inputs.cuda() targets = targets.cuda() with torch.no_grad(): outputs = model(inputs) outs = nn.functional.softmax(outputs, dim=1) pred = torch.argmax(outs, dim=1) y_true.extend(list(targets.cpu().detach().numpy())) y_pred.extend(list(pred.cpu().detach().numpy())) model.train() return classification_report(y_true, y_pred, output_dict=True), \ classification_report(y_true, y_pred, output_dict=False)
def evalulate_on_cache(self): grapheme_logits_all = np.vstack(self.grapheme_logits_cache) vowel_logits_all = np.vstack(self.vowel_logits_cache) consonant_logits_all = np.vstack(self.consonant_logits_cache) labels_all = np.vstack(self.labels_cache) grapheme_preds = np.argmax(grapheme_logits_all, axis=1) vowels_preds = np.argmax(vowel_logits_all, axis=1) consonant_preds = np.argmax(consonant_logits_all, axis=1) grapheme_clf_result = classification_report(labels_all[:, 0], grapheme_preds, output_dict=True) vowels_clf_result = classification_report(labels_all[:, 1], vowels_preds, output_dict=True) consonant_clf_result = classification_report(labels_all[:, 2], consonant_preds, output_dict=True) kaggle_score = (grapheme_clf_result['macro avg']['recall'] * 2 + vowels_clf_result['macro avg']['recall'] + consonant_clf_result['macro avg']['recall']) / 4 acc = np.mean(self.acc_cache) loss = np.mean(self.loss_cache) result = { 'grapheme_clf_result': grapheme_clf_result, 'vowels_clf_result': vowels_clf_result, 'consonant_clf_result': consonant_clf_result, 'kaggle_score': kaggle_score, #'preds_labels': preds_labels, 'acc': acc, 'loss': loss } return result
def evaluate(self, y_predict, y_true, target_names=None): if self.config.task_type == 'classification': classification_report(y_true=y_true, y_pred=y_predict, target_names=target_names) elif self.config.task_type == 'ranking': roc_auc_score(y_true=y_true, y_score=y_predict) elif self.config.task_type == 'regression': r2_score(y_true=y_true, y_pred=y_predict)
def eval(test_y, sys_y, pipeline_name, data_dir): print(classification_report(test_y, sys_y)) report = classification_report(test_y, sys_y, output_dict=True) df = pd.DataFrame(report).transpose() dir = data_dir.split('/')[-2] df.to_csv(f'../results/{dir}/{pipeline_name}.csv', index=False) return report
def get_f1_scores(inp_train, out_train): inp_test, out_test = init_test_dataframes() models = [ AdaBoostClassifier(n_estimators=300, learning_rate=0.19, algorithm='SAMME.R'), LogisticRegression(random_state=0, class_weight='balanced', solver='lbfgs', multi_class='ovr', penalty='l2'), ] for model in models: model.fit(inp_train, out_train) print str(model).split('(')[0] print classification_report(out_test, model.predict(inp_test))
def testforest(self, test, testlabel,forest): outputtest= forest.predict(test) accuracytrain = accuracy_score(testlabel, outputtest) print "The size of the test set is" print np.shape(test) print "The accuracy for the test set is %r" %accuracytrain, "and the confusion matrix is" #print confusion_matrix(outputtest,testlabel) print classification_report(testlabel, outputtest) # generate probability outputproba=forest.predict_proba(test) outperfor={'prob0':outputproba[:,0],'prob1':outputproba[:,1],'output':outputtest,'target':testlabel} outframe=DataFrame(outperfor) print outframe #outframe.to_csv(r'D:\allprob.csv', header=0) return accuracytrain, outframe
def evaluate_val_for_train(self, sess, data): val_q, val_r, val_labels = data all_pred_label = [] low = 0 batch_size_for_val=300 while True: n_sample = min(low + batch_size_for_val, len(val_labels)) - low batch_q_len = self.get_sequences_length(val_q[low:low + n_sample], maxlen=self.max_sentence_len) batch_q = pad_sequences(val_q[low:low + n_sample], padding='post') batch_r_len = self.get_sequences_length(val_r[low:low + n_sample], maxlen=self.max_sentence_len) batch_r = pad_sequences(val_r[low:low + n_sample], padding='post') feed_dict = { self.input_q: np.array(batch_q), self.q_sequence_len: np.array(batch_q_len), self.input_r: np.array(batch_r), self.r_sequence_len: np.array(batch_r_len), self.input_y: np.array(val_labels[low:low + n_sample]), self.keep_prob:1.0 } pred_label,loss = sess.run([self.class_label_pred,self.total_loss], feed_dict=feed_dict) all_pred_label.append(pred_label) low = low + batch_size_for_val if low >= len(val_labels): break all_pred_label = np.concatenate(all_pred_label, axis=0) return loss,classification_report(val_labels,all_pred_label)
def print_cummulative_clf_report(self): """ Prints the cumulated classification report for all added runs :return: """ log.info('Classification report:\n\n' + classification_report( self.y_true, self.y_pred, target_names=['Benign', 'Malicious']))
def test(self, input_xy, target_names=None, **kwargs): xs, trues = zip(*input_xy) xs = np.stack(xs).reshape(-1, reduce(operator.mul, xs[0].shape)) trues = np.stack(trues) results = self.predict(xs) print(classification_report(trues, results, target_names=target_names)) return f1_score(trues, results, average="micro")
def test_model(classifier, X, y): y_pred = classifier.predict(X) conf_matrix = confusion_matrix(y, y_pred) accuracy = accuracy_score(y, y_pred) report = classification_report(y, y_pred) print(conf_matrix) print(report) print(accuracy)
def _mod_report(self, mod): report = classification_report(self.y_test, mod.predict(self.x_test)) train_auc = roc_auc_score(self.y_train, mod.predict_proba(self.x_train)[:, 1]) test_auc = roc_auc_score(self.y_test, mod.predict_proba(self.x_test)[:, 1]) return report, train_auc, test_auc
def predict(self, X, trees, y_test): reports = [] for tree in trees: y_predict = tree.predict(X) reports.append(classification_report(y_predict, y_test)) return reports
def run(self): train_X, train_y, dev_X, dev_y = self.extract_train_dev_data() if not self.dev_stage: test_X, test_y = self.extract_test_data() for c in self.classifiers: logger.info(" Classifier: {}".format(c.name)) c.train(train_X, train_y) if self.dev_stage: logger.info(" Running classifier on dev data") pred_y = c.predict(dev_X) print('dev labels') print(set(train_y)) logger.info(classification_report(dev_y, pred_y)) else: logger.info(" Running classifier on test data") pred_y = c.predict(test_X) print('test labels') print(set(train_y)) logger.info(classification_report(test_y, pred_y))
def test(self, input_xy, target_names=None, **kwargs): input_x = [] trues = [] for x, y in input_xy: input_x.append(x) trues.append(y.argmax()) results = self.predict(input_x, prob=False) trues = np.array(trues) print(classification_report(trues, results, target_names=target_names)) return f1_score(trues, results, average="micro")
def compute(self, k=10): self.datasets = (X, y) = self.transformer(self.origin_datasets) kf = KFold(n_splits=10, shuffle=True) self.clfs = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf = self.clf_type() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(classification_report(y_test, y_pred))
def test_classification(self, test, testlabel,bestmodel): # bestmodel=bestmodel outputtest = bestmodel.predict(test) accuracytest = accuracy_score(testlabel, outputtest) print ("The accuracy for the test set is %r" %accuracytest, "and the confusion matrix is") print (confusion_matrix(outputtest,testlabel)) print( classification_report(testlabel, outputtest)) # probaout=bestmodel.predict_prob(test) # probaout= DataFrame(probaout) # print probaout return outputtest
def calc_metrics(self, pred_labels, true_labels): from sklearn.metrics.classification import classification_report print(classification_report(true_labels, pred_labels)) acc = accuracy_score(true_labels, pred_labels) precision = precision_score(true_labels, pred_labels, average='macro') recall = recall_score(true_labels, pred_labels, average='macro') f1 = f1_score(true_labels, pred_labels, average='macro') return {'acc': acc, 'p': precision, 'r': recall, 'f1': f1}
def random_forrest(dir_models, ticket, x, x_test, y, y_test): print('getting model...RandomForest') clf = RandomForestClassifier(verbose=False, warm_start=True) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_random_forrest_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def evaluate(self, inputs: list, labels: list): predictions = [] labels_max = [] for j in range(0, len(inputs)): input_excample = inputs[j] data_label = labels[j] nn_output = self._nn.predict(input_excample) predicted_index = np.argmax(nn_output) predictions.append(predicted_index) label_index = np.argmax(data_label) labels_max.append(label_index) return classification_report(labels_max, predictions, output_dict=False)
def test_classification(self, test, testlabel, bestmodel): # bestmodel=bestmodel outputtest = bestmodel.predict(test) accuracytest = accuracy_score(testlabel, outputtest) print("The accuracy for the test set is %r" % accuracytest, "and the confusion matrix is") print(confusion_matrix(outputtest, testlabel)) print(classification_report(testlabel, outputtest)) # probaout=bestmodel.predict_prob(test) # probaout= DataFrame(probaout) # print probaout return outputtest
def compare(): with open('./output.txt', 'r', encoding='utf-8') as f: pred_label = [] for line in f: arr = line.strip('\n').split('\t') pred_label.append(int(arr[1])) with open('./data/test.csv', 'r', encoding='utf-8') as f: label = [] for line in f: lineno, sen1, sen2, tmp = line.strip().split('\t') label.append(int(tmp)) print(classification_report(label, pred_label))
def evaluate(self,sess,vocab,token_indices, character_indices_padded, token_lengths, pattern ,label_indices,datatype='train'): all_predictions = [] all_y_true = [] # output_filepath = os.path.join(stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,epoch_number)) # output_file = codecs.open(output_filepath, 'w', 'UTF-8') # original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'UTF-8') for i in range(len(token_indices)): feed_dict = { self.input_token_indices: token_indices[i], self.input_token_character_indices: character_indices_padded[i], self.input_token_lengths: token_lengths[i], self.input_token_patterns: pattern[i], self.dropout_keep_prob: 1., } unary_scores, transition_params_trained = sess.run([self.unary_scores, self.transition_parameters], feed_dict) predictions, _ = tf.contrib.crf.viterbi_decode(unary_scores, transition_params_trained) predictions = predictions[1:-1] assert (len(predictions) == len(token_indices[i])) all_predictions.extend(predictions) all_y_true.extend(label_indices[i]) label_predict = [vocab.labels[i] for i in all_predictions] label_true = [vocab.labels[i] for i in all_y_true] label_predict = utils_nlp.bioes_to_bio(label_predict) label_true = utils_nlp.bioes_to_bio(label_true) new_pre = [] new_true = [] for i in range(len(label_predict)): if label_true[i]!='O' or label_predict[i]!='O': new_pre.append(utils_nlp.remove_bio_from_label_name(label_predict[i])) new_true.append(label_true[i] if label_true[i]=='O' else label_true[i][2:]) labels = [label if label=='O' else label[2:] for label in vocab.labels] labels = list(set(labels)) report = classification_report(new_true,new_pre) print('matrix') matrix = confusion_matrix(new_true,new_pre,labels) file =codecs.open(datatype+'_evaluate.txt','w','utf-8') file.writelines(' '.join(labels)+'\n\r') for i,row in enumerate(matrix): file.writelines(' '.join([str(i) for i in row])+'\n\r') file.close() print(matrix) print(report) return report
def decision_tree(dir_models, ticket, x, x_test, y, y_test): print('getting model...decision tree') clf = DecisionTreeClassifier() print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_decission_tree_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def metrics(y_true, y_pred): y_final = [] for i in range(y_pred.shape[0]): if y_pred[i][0] > y_pred[i][1]: y_final.append(int(0)) else: y_final.append(int(1)) print(y_final) classify_report = classification.classification_report(y_true, y_final) print('classify_report : \n', classify_report) return 0
def voting_random_forrest(dir_models, ticket, x, x_test, y, y_test): print('getting model...voting RandomForest') estimators = [(str(idd), RandomForestClassifier()) for idd in range(100)] clf = VotingClassifier(estimators=estimators) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump( clf, dir_models + ticket + '_voting_rand_forest_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def voting_decision_tree(dir_models, ticket, x, x_test, y, y_test): print('getting model...Voting decision tree') estimators = [(str(idd), DecisionTreeClassifier()) for idd in range(100)] clf = VotingClassifier(estimators=estimators, voting='soft') print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_voting_dectree_' + str(id) + '.pkl') return accuracy_score(y_test, predicted)
def bernoulli_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...BernoulliNB') clf = BernoulliNB(binarize=True) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_bernoulli_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def gradient_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...GBC') clf = GradientBoostingClassifier(n_estimators=1000) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_gbc_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def adaboost_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...Ada') clf = AdaBoostClassifier() print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_adaboost_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def mlp_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...MLP') clf = MLPClassifier(early_stopping=True) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_mlp_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def cv_test_module(self, module, module_kwargs, fit_kwargs): logging.info('Cross-validate testing module %s...' % module.__name__) xs, ys = self.dg.get_all() module_object = module(**module_kwargs) predict = cross_val_predict( module_object, xs, ys, cv=10, n_jobs=-1, ) print(classification_report(ys, predict)) logging.info('Cross-validate testing module %s finished' % module.__name__)
def main(args): ## tr_data training set parser = argparse.ArgumentParser() parser.add_argument( "base_dir", nargs="?", default=os.path.join(os.getcwd(), "mseg_workspace"), help="this is the working directory, all sub dirs live under it", ) parser.add_argument( "pm_dir", nargs="?", default="pm_default", help="this is the directory in which to store the prosodic model file", ) parser.add_argument( "training_file", nargs="?", default=TRAIN_FILE_DEFAULT, help="name of CSV file that contains correctly annotated training examples", ) parser.add_argument( "test_file", nargs="?", default=TEST_FILE_DEFAULT, help="name of CSV file that contains mysterious cases that must be tested", ) parser.add_argument( "-lr", "--logistic_regression", default=False, action="store_true", help="use logistic regression classifier (default is RBF-SVM)", ) args = parser.parse_args() base_dir = args.base_dir pm_dir = args.pm_dir tr_file = args.training_file test_fname = args.test_file use_lr = args.logistic_regression # if(len(args)==3): # base_dir = args[0] # pm_dir = args[1] # tr_file = args[2] # test_fname = args[3] # else: # base_dir = DIR # pm_dir = "pm_default" # tr_file = TRAIN_FILE_DEFAULT # test_fname = TEST_FILE_DEFAULT # # do_search = False # # use_pilot = False n_samples = -1 cache = 800 # pm_dir= raw_input("enter PM name: [%s]" % pm_dir) or pm_dir # tr_file = raw_input("enter PM training file name: [%s]" % tr_file) or tr_file tr_data = read_file(os.path.join(base_dir, tr_file), ",", skip_header=True) # test_fname = raw_input("enter file to test on: [%s]" % test_fname) or test_fname # use_lr = bool(raw_input("use logistic regression [False]?")) or False if not use_lr: n_samples = 6000 out_fname = test_fname + "-probabilities.dat" report_fname = test_fname + "-report.txt" else: out_fname = test_fname + "-probabilities.dat" report_fname = test_fname + "-report-LR.txt" out_file = os.path.join(base_dir, pm_dir, out_fname) report_fname = os.path.join(base_dir, pm_dir, report_fname) # clear extant predictions file if os.path.exists(out_file): os.remove(out_file) print "removed", out_file print base_dir + "/" + tr_file + " -SVM-> ", out_file test_data = read_file(os.path.join(base_dir, test_fname), ",", skip_header=True) # sel = [12,13,14,15,21,22,23,24] sel = range(7, 30) # sel = [8,21,29, 24,25,27] (_, _, tr_samples, tr_classes) = dissect(tr_data, sel) (_, te_words, te_samples, te_classes) = dissect(test_data, sel) if n_samples > 0: tr_samples, _, tr_classes, _ = train_test_split( tr_samples, tr_classes, train_size=n_samples, stratify=tr_classes ) p = sum(c == 1.0 for c in tr_classes) # count the positive instances n = len(tr_classes) - p # derive the negative instances print "n=", n, " p=", p wgt = float(n) / float(p) # cast and divide print "wgt=", wgt # classWeight = { 1: wgt } # tr_samples, te_samples, tr_classes, te_classes = train_test_split(samples, classes, test_size=0.20, random_state=0, stratify=classes) scaler = preprocessing.StandardScaler().fit(np.array(tr_samples)) tr_samples = scaler.transform(tr_samples) clf = None best_params = None # override the defaults with the results of a grid search if desired (takes a while) # pickled = False pkl_dir = os.path.join(base_dir, pm_dir, "pkl") pickled_model = os.path.join(pkl_dir, "svm_classifier.pkl") if os.path.exists(pickled_model) and not overwrite_pkl: clf = joblib.load(pickled_model) clf.set_params(verbose=True) print "loaded pickled model...", pickled_model else: if not os.path.exists(pkl_dir): # output dir doesn't exist so make it os.makedirs(pkl_dir) print "made dir for pickled model:", pkl_dir cmin, cmax, cstep = -5, 17, 2 cr = range(cmin, cmax, cstep) print (cr) # c_range = [ pow(2, y) for y in cr] # c_range =(0.005, 0.5, 5, 50, 500, 5000, 50000) c_range = (0.5, 50, 5000) print ("c_range", c_range) gmin, gmax, gstep = -15, 5, 2 gr = range(gmin, gmax, gstep) print (gr) # gamma_range = [ pow(2, y) for y in gr ] # gamma_range = (0.00005, 0.0005, 0.005, 0.05, 0.5, 5.0, 50, 500) gamma_range = (0.0005, 0.05, 5.0, 500) print ("gamma_range", gamma_range) c_dist = scipy.stats.expon(scale=100) gamma_dist = scipy.stats.expon(scale=0.01) if use_lr: estr = LogisticRegression(class_weight="balanced") # estr = LogisticRegression() param_dist = {"C": c_dist} else: estr = svm.SVC(kernel="rbf", cache_size=800, probability=True, class_weight="balanced") # estr = svm.LinearSVC(class_weight='balanced') param_dist = {"C": c_dist, "gamma": gamma_dist} # searcher = RandomizedSearchCV(estr, param_distributions=param_dist, n_iter=100, n_jobs=-1, cv=5, verbose=True ) #, scoring="recall") searcher = RandomizedSearchCV( estr, param_distributions=param_dist, n_iter=100, n_jobs=-1, verbose=True, scoring="recall" ) searcher.fit(tr_samples, tr_classes) report(searcher.grid_scores_) clf = searcher.best_estimator_ print "COMPARING CLF PARAMS WITH BEST PARAMS (shd be same)" print clf.get_params() print best_params joblib.dump(clf, pickled_model) print clf # print "FITTING" # clf.set_params(verbose=True) # clf.fit(tr_samples, tr_classes) # print clf # NOW TO TEST AGAINST HELD-OUT/TEST DATA te_samples = scaler.transform(te_samples) print "no test cases", len(te_samples) predictions = -1.0 * clf.predict_log_proba( te_samples ) # this is a list of pairs of probs in form [ [1-p, p], ... ] # predictions = -1.0 * clf.decision_function(te_samples) print predictions predicted_classes = clf.predict(te_samples) print ( "TEST: Number of mislabelled points out of a total %d points : %d" % (len(te_samples), (te_classes != predicted_classes).sum()) ) print (classification_report(te_classes, predicted_classes)) rpt = open(report_fname, "w") rpt.write(classification_report(te_classes, predicted_classes)) rpt.write("\n") rpt.close() print "wrote report file", rpt pred_file = open(out_file, "w") pred_file.write("labels 0 1\n") # this emulates an earlier file format for compatibility for word, prob_tuple, guessed_class in zip(te_words, predictions, predicted_classes): pred_file.write("%d %f %f %s\n" % (guessed_class, prob_tuple[0], prob_tuple[1], word)) pred_file.close() print "wrote predictions file:", pred_file
model.add(SimpleRNN(X_train.shape[1], input_dim=X_train.shape[1])) model.add(Activation('relu')) model.add(SimpleRNN(20000)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(SimpleRNN(nb_classes)) model.add(Activation('softmax')) model.compile(loss=loss, optimizer=optim, metrics=['accuracy']) return model classifier = KerasClassifier(build_fn=create_model, nb_epoch=nb_epoch, batch_size=batch_size) history = classifier.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch) Y_pred = classifier.predict(X_test, batch_size=batch_size) print(classification_report(y_true=Y_test, y_pred=Y_pred)) plt.figure() plt.plot(history.history['acc']) plt.title('Genauigkeit') plt.ylabel('Genauigkeit') plt.xlabel('Epoche') plt.legend(['Training', 'Test'], loc='upper left') plt.savefig("data/acc.png") # summarize history for loss plt.figure() plt.plot(history.history['loss']) plt.title('Loss Werte') plt.ylabel('Loss') plt.xlabel('Epoche')
# class_weight={0: 3, 1: 1, 2: 1, 3: 1, 4: 1, 5: 2, 6: 1}) # model = LogisticRegression(C=subsample, verbose=0, penalty='l1', max_iter=100) # model = KNeighborsClassifier(n_neighbors=learning_rate) # model = xgb.XGBRegressor(max_depth=depth, n_estimators=n_estimators, learning_rate=learning_rate, # nthread=1, subsample=subsample, silent=True, colsample_bytree=0.8) # model = LinearSVC(C=0.9, penalty='l2', dual=False, verbose=1, max_iter=100000) model.fit(trtrfe, trtrtrue) # mean accuracy on the given test data and labels predicted = [math.floor(x) for x in model.predict(trtefe)] score = model.score(trtefe, trtetrue) print("score =", score) print(classification_report(trtetrue, predicted)) print(confusion_matrix(trtetrue, predicted)) if score > best_score or True: best_model = model best_score = score best_model.fit(train_features, train_true) predicted = [math.floor(x) for x in best_model.predict(test_features)] fname = "data/net_result/sol_" + str(score) + "_" + str(time.time()) + ".csv" write_sol(predicted, fname) print("this model", depth, "\t", subsample, "\t", score) print("best model", best_score) best_model.fit(trtefe, trtetrue) predicted = [math.floor(x) for x in best_model.predict(test_features)]