def evaluate(self, dataset, mode="test"): # We use test dataset because semeval doesn't have dev dataset eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader( dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size ) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": batch[3], "e1_mask": batch[4], "e2_mask": batch[5], } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0 ) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) write_prediction( self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds ) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" {:15}: {:.4f}".format(key, results[key])) return results
def compute_metrics(p: EvalPrediction): preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions preds = np.argmax(preds, axis=1) write_prediction( label_list, os.path.join(training_args.eval_dir, "proposed_answers.txt"), preds) return { "accuracy": (preds == p.label_ids).astype(np.float32).mean().item(), "f1": official_f1(), }
def evaluate(self): # self.load_model() # Load model eval_sampler = SequentialSampler(self.test_dataset) eval_dataloader = DataLoader(self.test_dataset, sampler=eval_sampler, batch_size=self.config.batch_size) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(self.test_dataset)) logger.info(" Batch size = %d", self.config.batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None results = {} for batch in tqdm(eval_dataloader, desc="Evaluating"): self.model.eval() batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'e1_mask': batch[4], 'e2_mask': batch[5]} outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) write_prediction(os.path.join(self.config.eval_dir, "proposed_answers.txt"), preds) return results
def main(): operation = sys.argv[1] if operation == 'train': loss = sys.argv[2] output_file = sys.argv[5] X, y = utils.load_data(feature_file=sys.argv[3], label_file=sys.argv[4], dtype=np.float64) scaler = StandardScaler(with_mean=False) # scaler.fit(X) X, y = utils.to_single_output(X, y) # X = scaler.transform(X) with gzip.open(output_file + '.scaler', 'wb') as f: cPickle.dump(scaler, f, cPickle.HIGHEST_PROTOCOL) print 'training...' model = train(X, y, loss) with gzip.open(output_file, 'wb') as f: cPickle.dump(model, f, cPickle.HIGHEST_PROTOCOL) print 'model saved to %s.' % output_file elif operation == 'test': model_file = sys.argv[2] feature_file = sys.argv[3] output_file = sys.argv[4] X = utils.load_data(feature_file, dtype=np.float64) with gzip.open(model_file + '.scaler', 'rb') as f: scaler = cPickle.load(f) with gzip.open(model_file, 'rb') as f: print 'loading model...' model = cPickle.load(f) print 'testing...' with open(output_file, 'wb') as f: # X = scaler.transform(X) y = model.predict_log_proba(X) utils.write_prediction(f, y, model.classes_)
logistic = linear_model.LogisticRegression(max_iter=1000000) logistic.fit(features, target) print(logistic.score(features, target)) scores = model_selection.cross_val_score(logistic, features, target, scoring='accuracy', cv=10) print(scores) print(scores.mean()) test_features = test[[ "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked" ]].values utils.write_prediction(logistic.predict(test_features), "logistic_regression.csv") print("\nUse polynomial features") poly = preprocessing.PolynomialFeatures(degree=2) features_ = poly.fit_transform(features) clf = linear_model.LogisticRegression(C=10, max_iter=1000000) clf.fit(features_, target) print(clf.score(features_, target)) scores = model_selection.cross_val_score(clf, features_, target, scoring='accuracy', cv=10) print(scores)
print(grid_search.grid_scores_, grid_search.best_params_, grid_search.best_score_) ''' #now, decreses the learning rate and appling gradient descent more time i.e. 15000 gbm = ensemble.GradientBoostingClassifier(learning_rate=0.005, min_samples_split=40, min_samples_leaf=1, max_features=2, max_depth=12, n_estimators=1500, subsample=0.75, random_state=1) gbm = gbm.fit(features, target) print(gbm.feature_importances_) print(gbm.score(features, target)) #it take preety much time scores = model_selection.cross_val_score(gbm, features, target, scoring='accuracy', cv=20) print(scores) print(scores.mean()) test_features = test[[ "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked" ]].values prediction_gbm = gbm.predict(test_features) utils.write_prediction(prediction_gbm, "resultsgbm.csv")
import pandas as pd import utils from sklearn import tree, model_selection import numpy as np test = pd.read_csv("test.csv") utils.clean_data(test) train = pd.read_csv("train.csv") utils.clean_data(train) target = train["Survived"].values feautures_names = [ "Pclass", "Age", "Fare", "Embarked", "Sex", "SibSp", "Parch" ] feautures = train[feautures_names].values generalized_tree = tree.DecisionTreeClassifier(random_state=1, max_depth=7, min_samples_split=2) generalized_tree_predict = generalized_tree.fit(feautures, target) feautures_test = test[feautures_names].values predictions = generalized_tree_predict.predict(feautures_test) utils.write_prediction(predictions, "naive_decision_Tree_prediction.csv")
def evaluate(self, mode): # We use test dataset because semeval doesn't have dev dataset if mode == "test": dataset = self.test_dataset elif mode == "dev": dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size, drop_last=True) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "labels": batch[3], "e1_mask": batch[4], "e2_mask": batch[5], } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs["labels"].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) write_prediction( self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds) # result = compute_metrics(preds, out_label_ids) # results.update(result) # 0327 新增 precision = precision_score(out_label_ids.tolist(), preds.tolist(), average='macro', zero_division=0) recall = recall_score(out_label_ids.tolist(), preds.tolist(), average='macro', zero_division=0) f1 = f1_score(out_label_ids.tolist(), preds.tolist(), average='macro', zero_division=0) acc = accuracy_score(out_label_ids.tolist(), preds.tolist()) results["precision"] = precision results["recall"] = recall results["f1"] = f1 results["acc"] = acc logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" {} = {:.4f}".format(key, results[key])) # 新增每一个label的prf logger.info( classification_report(out_label_ids.tolist(), preds.tolist(), target_names=self.label_lst)) return results
def evaluate(self, mode): # We use test dataset because semeval doesn't have dev dataset if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'e1_mask': batch[4], 'e2_mask': batch[5] } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) result = compute_metrics(preds, out_label_ids) results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) write_prediction( self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds) return results
target = train["Survived"].values features_forest = train[[ "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked" ]].values print "\nUse Random Forest classifier" forest = ensemble.RandomForestClassifier(max_depth=7, min_samples_split=4, n_estimators=1000, random_state=1, n_jobs=-1) forest = forest.fit(features_forest, target) print(forest.feature_importances_) print(forest.score(features_forest, target)) scores = model_selection.cross_val_score(forest, features_forest, target, scoring='accuracy', cv=10) print scores print scores.mean() test_features_forest = test[[ "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked" ]].values prediction_forest = forest.predict(test_features_forest) utils.write_prediction(prediction_forest, "results/random_forest.csv")
print(train.shape) target = train["Survived"].values features = train[["Pclass", "Sex", "Age", "Fare"]].values decision_tree = tree.DecisionTreeClassifier(random_state=1) decision_tree = decision_tree.fit(features, target) print(decision_tree.feature_importances_) print(decision_tree.score(features, target)) print "\nTry on test set" test_features = test[["Pclass", "Sex", "Age", "Fare"]].values prediction = decision_tree.predict(test_features) utils.write_prediction(prediction, "results/decision_tree.csv") print "\nCorrect overfitting" feature_names = ["Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked"] features_two = train[feature_names].values decision_tree_two = tree.DecisionTreeClassifier(max_depth=7, min_samples_split=2, random_state=1) decision_tree_two = decision_tree_two.fit(features_two, target) print(decision_tree_two.feature_importances_) print(decision_tree_two.score(features_two, target)) tree.export_graphviz(decision_tree_two, feature_names=feature_names, out_file="./graphs/decision_tree_two.dot")
def evaluate(self, mode): # We use test dataset because semeval doesn't have dev dataset if mode == "test": dataset = self.test_dataset elif mode == "dev": dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None criterion1 = create_criterion('cross_entropy') criterion2 = create_criterion('f1') criterion3 = create_criterion('focal') criterion4 = create_criterion('label_smoothing') self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): # inputs = { # "input_ids": batch[0], # "attention_mask": batch[1], # "token_type_ids": batch[2], # "labels": batch[3], # "e1_mask": batch[4], # "e2_mask": batch[5], # } # outputs = self.model(**inputs) # tmp_eval_loss, logits = outputs[:2] # print(batch) logits = self.model(input_ids=batch[0], attention_mask=batch[1], e1_mask=batch[4], e2_mask=batch[5]) loss1 = criterion3(logits, batch[3]) loss2 = criterion4(logits, batch[3]) tmp_eval_loss = loss1 + loss2 eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() # out_label_ids = inputs["labels"].detach().cpu().numpy() out_label_ids = batch[3].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) # out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, batch[3].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) write_prediction(self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds) result = compute_metrics(preds, out_label_ids) print(f'evaluate acc:{result}') results.update(result) logger.info("***** Eval results *****") for key in sorted(results.keys()): logger.info(" {} = {:.4f}".format(key, results[key])) return results
def evaluate(self, mode): ''' eval process :param mode: "dev" or "test" :return: ''' # We use test dataset because semeval doesn't have dev dataset if mode == 'test': dataset = self.test_dataset elif mode == 'dev': dataset = self.dev_dataset else: raise Exception("Only dev and test dataset available") eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.batch_size) # Eval! logger.info("***** Running evaluation on %s dataset *****", mode) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", self.args.batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None self.model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): batch = tuple(t.to(self.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'e1_mask': batch[4], 'e2_mask': batch[5], 'e1_ids': batch[6], 'e2_ids': batch[7], 'graph': self.graph, 'edge_feature': self.edge_feature, 'entity_feature': self.entity_feature } outputs = self.model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append( out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps results = {"loss": eval_loss} preds = np.argmax(preds, axis=1) write_prediction( self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds) result = compute_metrics(self.args.task, preds, out_label_ids) results.update(result) # logger.info("***** Eval results *****") # for key in sorted(results.keys()): # logger.info(" {} = {:.4f}".format(key, results[key])) output_eval_file = os.path.join("eval", "eval_results.txt") with open(output_eval_file, "a") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) for i in range(0, 10): writer.write("\n") return results
def main(): # -------------- Stage 1: get user dict -------------- # read all data X, y, users_train = read_train_txt(os.path.join(DATA_DIR, TRAIN_TXT)) X_dev, y_dev, users_dev = read_train_txt(os.path.join(DATA_DIR, DEV_TXT)) X_test, ids, users_test = read_test_txt(os.path.join(DATA_DIR, TEST_TXT)) X_train, y_train = X, y # merge data according to user id X_train_merged, y_train_merged = get_user_dict(X_train, y_train, users_train) # merge dev data according to user id X_dev, y_dev = get_user_dict(X_dev, y_dev, users_dev) new_X_train = X_train + X_train_merged + X_dev new_y_train = y_train + y_train_merged + y_dev users_test, X_test, user_ids_dict = get_user_dict_test( X_test, ids, users_test) # # -------------- Stage 2: Tf-idf -------------- # compute tf-idf features vectorizer = TfidfVectorizer(sublinear_tf=True, ngram_range=(1, 1)) X_test = vectorizer.fit_transform(X_test) X_train = vectorizer.transform(new_X_train) X_dev = vectorizer.transform(X_dev) # # -------------- Stage 3: Training -------------- print("--- Start training ---") svm = OneVsRestClassifier(LinearSVC(C=1.5), n_jobs=-1) svm.fit(X_train, new_y_train) # nb = MultinomialNB() # nb.fit(X_train, y_train) # knn = KNeighborsClassifier() # knn.fit(X_train, y_train) print("--- finish training ---") # # -------------- Stage 4: Predictions -------------- # print(svm.score(X_dev, y_dev)) # print(nb.score(X_dev, y_dev)) predictions = svm.predict(X_test) write_prediction(OUT_CSV, predictions, users_test, user_ids_dict)
features = train[[ 'Pclass', 'Age', 'Fare', 'Embarked', 'Sex', 'SibSp', 'Parch' ]].values #using logistic regression classifier = linear_model.LogisticRegression(C=10) classifier = classifier.fit(features, target) print(classifier.score(features, target)) scores = model_selection.cross_val_score(classifier, features, target, scoring='accuracy', cv=50) lin_predict = classifier.predict(scores) utils.write_prediction(lin_predict, 'resultlogistic_regression.csv') #here we use polynomial regression which fit much better than linear regression poly = preprocessing.PolynomialFeatures(degree=2) poly_features = poly.fit_transform(features) classifier_ = classifier.fit(poly_features, target) print(classifier_.score(poly_features, target)) scores = model_selection.cross_val_score(classifier, features, target, scoring='accuracy', cv=10) print(scores) print(scores.mean())