def predict(in_file, out_file): xvals, yvals = utils.load_hot(in_file) network = build_network() predictions = utils.predict_hot(xvals, network, 'complicated1.tflearn') print('Accuracy: {}%'.format(utils.get_accuracy_hot(yvals, predictions))) utils.write_predictions(xvals, predictions, out_file)
def predict(dataset): """Generate predictions for audio tagging and sound event detection. This function uses an ensemble of trained models to generate the predictions, with the averaging function being an arithmetic mean. Computed predictions are then saved to disk. Args: dataset: Dataset to generate predictions for. """ import capsnet # Load (standardized) input data and associated file names test_x, _, names = _load_data(dataset) # Predict class probabilities for each model (epoch) at_preds, sed_preds = [], [] for epoch in _determine_epochs(cfg.prediction_epochs): model = _load_model(epoch) at_pred, sed_pred = utils.timeit( lambda: capsnet.gccaps_predict(test_x, model), '[Epoch %d] Predicted class probabilities' % epoch) at_preds.append(at_pred) sed_preds.append(sed_pred) # Average predictions to give an overall output total_at_pred = np.mean(at_preds, axis=0) total_sed_pred = np.mean(sed_preds, axis=0) # Ensure output directory exists and set file path format os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True) predictions_path = cfg.predictions_path.format('%s', dataset.name) # Save free parameters to disk utils.log_parameters({'prediction_epochs': cfg.prediction_epochs}, os.path.join(os.path.dirname(cfg.predictions_path), 'parameters.json')) # Write predictions to disk utils.write_predictions(names, total_at_pred, predictions_path % 'at') utils.write_predictions(names, total_sed_pred, predictions_path % 'sed')
def evaluate(args, model, tokenizer_a, tokenizer_b, is_double=True): dataset, examples, features = load_and_cached_examples( args, tokenizer_a, tokenizer_b, evaluate=True, output_examples=True, is_double=is_double) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", args.eval_batch_size) all_results = [] for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { 'input_a_id': batch[0], 'input_b_id': batch[1], 'input_a_mask': batch[2], 'input_b_mask': batch[3], 'input_a_length': batch[4], 'input_b_length': batch[5], 'label': batch[6] } outputs = model(**inputs) loss = outputs[0] predict = torch.argmax(outputs[1], -1) all_results.extend(to_list(predict.view(-1))) output_prediction_file = os.path.join(args.output_dir, "predictions.json") all_labels = write_predictions(examples, all_results, output_prediction_file) accuracy, confusion = eval_cross(all_labels, all_results) return accuracy, confusion
scores = cross_validation.cross_val_score(clf, data, result_vect, cv=10, scoring=rmsle_scorer) scores = -scores cv_means.append(np.mean(scores)) cv_stds.append(np.std(scores)) # Plots mean and std depending on number of trees plt.subplot(211) plt.plot(n_trees_list, cv_means) plt.xlabel("Number of trees") plt.ylabel("Mean RMSLE") plt.subplot(212) plt.plot(n_trees_list, cv_stds) plt.xlabel("Number of trees") plt.ylabel("Standard dev RMSLE") plt.tight_layout() plt.show() # Make predictions according to best result clf = RandomForestRegressor(n_estimators=n_trees_list[np.argmin(cv_means)]) test_data = utils.get_data("test.csv") clf.fit(data, result_vect) pred_test = clf.predict(test_data) utils.write_predictions(pred_test, "res_RF_all.csv")
#utils.plot_tune_results('Perceptron', 'T', Ts, *pct_tune_results) #utils.plot_tune_results('Avg Perceptron', 'T', Ts, *avg_pct_tune_results) #utils.plot_tune_results('Avg Passive-Aggressive', 'T', Ts, *avg_pa_tune_results_T) #utils.plot_tune_results('Avg Passive-Aggressive', 'L', Ls, *avg_pa_tune_results_L) print "(train accuracy, test accuracy) after modification" print p1.average_passive_aggressive_accuracy(train_final_features,test_final_features,train_labels,test_labels,best_T,best_L) #------------------------------------------------------------------------------- # #------------------------------------------------------------------------------- # # #------------------------------------------------------------------------------- submit_texts = [sample['text'] for sample in utils.load_data('reviews_submit.tsv')] # 1. Extract the preferred features from the train and submit data dictionary = p1.modified_bag_of_words(submit_texts) train_final_features = p1.extract_final_features(train_texts, dictionary) submit_final_features = p1.extract_final_features(submit_texts, dictionary) # 2. Train the most accurate classifier final_thetas = p1.average_passive_aggressive(train_final_features, train_labels, 2,50) # 3. Classify and write out the submit predictions. submit_predictions = p1.classify(submit_final_features, *final_thetas) utils.write_predictions('reviews_submit.tsv', submit_predictions) #-------------------------------------------------------------------------------
def main(): parser = argparse.ArgumentParser() # # 必要参数 parser.add_argument('--task', default='multi', type=str, help='Task affecting load data and vectorize feature') parser.add_argument( '--loss_type', default='double', type=str, help='Select loss double or single, only for multi task' ) # 针对multi任务才有效 parser.add_argument( "--bert_model", default="bert-base-uncased", type=str, help= "Bert pre-trained model selected in the list: bert-base-uncased,bert-large-uncased, " "bert-base-cased, bert-large-cased, bert-base-multilingual-uncased,bert-base-chinese," "bert-base-multilingual-cased.") # 选择预训练模型参数 parser.add_argument("--debug", default=False, help="Whether run on small dataset") # 正常情况下都应该选择false parser.add_argument( "--output_dir", default="./SQuAD/output/", type=str, help= "The output directory where the model checkpoints and predictions will be written." ) # # 其他参数 parser.add_argument("--train_file", default="./SQuAD/version/train.json", type=str, help="SQuAD json for training. E.g., train-v1.1.json") parser.add_argument( "--predict_file", default="./SQuAD/version/prediction.json", type=str, help= "SQuAD json for predictio ns. E.g., dev-v1.1.json or test-v1.1.json") parser.add_argument( "--max_seq_length", default=384, type=int, help= "The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded." ) parser.add_argument( "--doc_stride", default=128, type=int, help= "When splitting up a long document into chunks, how much stride to take between chunks." ) parser.add_argument( "--max_query_length", default=64, type=int, help= "The maximum number of tokens for the question. Questions longer than this will be " "truncated to this length.") # # 控制参数 parser.add_argument("--do_train", default=True, help="Whether to run training.") parser.add_argument("--do_predict", default=True, help="Whether to run eval on the dev set.") parser.add_argument("--train_batch_size", default=18, type=int, help="Total batch size for training.") parser.add_argument("--predict_batch_size", default=18, type=int, help="Total batch size for predictions.") parser.add_argument("--learning_rate", default=3e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for.") parser.add_argument( "--n_best_size", default=20, type=int, help= "The total number of n-best predictions to generate in the nbest_predictions.json file." ) parser.add_argument( "--max_answer_length", default=30, type=int, help= "The maximum length of an answer that can be generated.This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument( "--verbose_logging", default=False, help= "If true, all of the warnings related to data processing will be printed.A number of " "warnings are expected for a normal SQuAD evaluation.") parser.add_argument("--no_cuda", default=False, help="Whether not to use CUDA when available") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument( "--do_lower_case", default=True, help= "Whether to lower case the input text. True for uncased models, False for cased models." ) parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument( '--fp16', default=False, help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument( '--loss_scale', type=float, default=0, help= "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.Positive power of 2: static loss scaling value.\n" ) parser.add_argument( '--version_2_with_negative', default=False, help= 'If true, the SQuAD examples contain some that do not have an answer.') parser.add_argument( '--null_score_diff_threshold', type=float, default=0.0, help= "If null_score - best_non_null is greater than the threshold predict null." ) args = parser.parse_args() # if是采用单机形式,else采用的是分布式形式;因为我们没有分布式系统,所以采用单机多GPU的方式进行训练10.24 if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='hierarchical_copy') # 以下三句话的意义不是很大,基本操作这一部分是日志的输出形式10.24 logging.basicConfig( format='%(asctime)s-%(levelname)s-%(name)s-%(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN) logger.info( "device:{}, n_gpu:{}, distributed training:{}, 16-bits training:{}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) # 以下几行均是用来设置参数10.24 args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) # 设置随机种子 np.random.seed(args.seed) # 设置随机种子 torch.manual_seed(args.seed) # 为CPU设置种子用于生成随机数,以使得结果是确定的 if n_gpu > 0: # 如果使用多个GPU,应该使用torch.cuda.manual_seed_all()为所有的GPU设置种子 torch.cuda.manual_seed_all(args.seed) # 以下三句又是基本操作,意义不大10.24 if not args.do_train and not args.do_predict: raise ValueError( "At least one of `do_train` or `do_predict` must be True.") if args.do_train: if not args.train_file: raise ValueError( "If `do_train` is True, then `train_file` must be specified.") if args.do_predict: if not args.predict_file: raise ValueError( "If `do_predict` is True, then `predict_file` must be specified." ) # 以下2句是用来判断output_dir是否存在,若不存在,则创建即可(感觉有这个东西反而不太好,因为需要空文件夹)10.24 # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train: # raise ValueError("Output directory () already exists and is not empty.") if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # 这个东西是用来干啥的(从tokenization中读取,对Tokenizer进行初始化操作)10.24 tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) # 从data中读取数据的方式,一种是单队列的读取方式,另一种是多通道读取方式10.24 if args.task == 'squad': read_examples = read_squad_examples elif args.task == 'multi': read_examples = read_multi_examples # 用来加载训练样例以及优化的步骤10.24 train_examples = None num_train_optimization_steps = None if args.do_train: train_examples = read_examples( input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative) if args.debug: train_examples = train_examples[:100] num_train_optimization_steps = \ int(len(train_examples)/args.train_batch_size/args.gradient_accumulation_steps) * args.num_train_epochs if args.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) # 模型准备中ing10.24 model = BertForQuestionAnswering.from_pretrained( args.bert_model, cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank))) # model = torch.nn.DataParallel(model).cuda() # 判断是否使用float16编码10.24 if args.fp16: # model.half().cuda() model.half() # 将模型加载到相应的CPU或者GPU中10.24 model.to(device) # 配置优化器等函数10.24 if args.do_train: param_optimizer = list(model.named_parameters()) # hack to remove pooler, which is not used # thus it produce None grad that break apex param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if args.fp16: try: # from apex.optimizers import FP16_Optimizer from apex.fp16_utils import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=True) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) warmup_linear = WarmupLinearSchedule( warmup=args.warmup_proportion, t_total=num_train_optimization_steps) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_optimization_steps) # 进行模型的拟合训练10.24 global_step = 0 if args.do_train: # 训练语料的特征提取 train_features = convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=True) logger.info("***** Running training *****") logger.info(" Num orig examples = %d", len(train_examples)) logger.info(" Num split examples = %d", len(train_features)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_start_positions = torch.tensor( [f.start_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor( [f.end_position for f in train_features], dtype=torch.long) all_start_vector = torch.tensor( [f.start_vector for f in train_features], dtype=torch.float) all_end_vector = torch.tensor([f.end_vector for f in train_features], dtype=torch.float) all_content_vector = torch.tensor( [f.content_vector for f in train_features], dtype=torch.float) # # 替换的内容all_start_positions以及all_end_positions # all1_start_positions = [] # for i in range(len(train_features)): # for j in range(len(train_features[i].start_position)): # all1_start_positions.append(train_features[i].start_position[j]) # all_start_positions = torch.tensor([k for k in all1_start_positions], dtype=torch.long) # all1_end_positions = [] # for i in range(len(train_features)): # for j in range(len(train_features[i].end_position)): # all1_end_positions.append(train_features[i].end_position[j]) # all_end_positions = torch.tensor([k for k in all1_end_positions], dtype=torch.long) # #################################################################### train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions, all_start_vector, all_end_vector, all_content_vector) if args.local_rank == -1: train_sampler = RandomSampler(train_data) # 随机采样器 else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() for ep in trange(int(args.num_train_epochs), desc="Epoch"): # 每次都叫他进行分发,这样的话,就可以进行多GPU训练 model = torch.nn.DataParallel(model).cuda() for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])): if n_gpu == 1: batch = tuple( t.to(device) for t in batch) # multi-gpu does scattering it-self input_ids, input_mask, segment_ids, start_positions, end_positions, start_vector, end_vector, content_vector = batch loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions, start_vector, end_vector, content_vector, args.loss_type) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. print("loss率为:{}".format(loss)) if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used and handles this automatically lr_this_step = args.learning_rate * warmup_linear.get_lr( global_step, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 print("\n") print(ep) output_model_file = os.path.join(args.output_dir, str(ep) + WEIGHTS_NAME) output_config_file = os.path.join(args.output_dir, str(ep) + CONFIG_NAME) torch.save(model.state_dict(), output_model_file) if isinstance(model, torch.nn.DataParallel): model = model.module model.config.to_json_file(output_config_file) tokenizer.save_vocabulary(args.output_dir) # 这个是用来加载进行微调调好后的代码以方便进行预测10.25 if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Save a trained model, configuration and tokenizer model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self # If we save using the predefined names, we can load using `from_pretrained` output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) output_config_file = os.path.join(args.output_dir, CONFIG_NAME) torch.save(model_to_save.state_dict(), output_model_file) model_to_save.config.to_json_file(output_config_file) tokenizer.save_vocabulary(args.output_dir) # Load a trained model and vocabulary that you have fine-tuned model = BertForQuestionAnswering.from_pretrained(args.output_dir) tokenizer = BertTokenizer.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) else: model = BertForQuestionAnswering.from_pretrained(args.output_dir) tokenizer = BertTokenizer.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) # 再次将GPU加入10.25 model.to(device) # 这部分就是进行相应的预测(用于生成预测文件) if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = \ read_examples(input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative) if args.debug: eval_examples = eval_examples[:100] eval_features = convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=False) logger.info("***** Running predictions *****") logger.info(" Num orig examples = %d", len(eval_examples)) logger.info(" Num split examples = %d", len(eval_features)) logger.info(" Batch size = %d", args.predict_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) model.eval() all_results = [] logger.info("Start evaluating") for input_ids, input_mask, segment_ids, example_indices in tqdm( eval_dataloader, desc="Evaluating", disable=args.local_rank not in [-1, 0]): if len(all_results) % 1000 == 0: logger.info("Processing example: %d" % (len(all_results))) input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) with torch.no_grad(): batch_start_logits, batch_end_logits = model( input_ids, segment_ids, input_mask) for i, example_index in enumerate(example_indices): start_logits = batch_start_logits[i].detach().cpu().tolist() end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()] unique_id = int(eval_feature.unique_id) all_results.append( RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) middle_result = os.path.join(args.output_dir, 'middle_result.pkl') pickle.dump([eval_examples, eval_features, all_results], open(middle_result, 'wb')) output_prediction_file = os.path.join(args.output_dir, "predictions.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json") if (args.loss_type == 'double'): write_predictions_couple_labeling( eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, args.version_2_with_negative, args.null_score_diff_threshold) elif (args.loss_type == 'single'): write_predictions_single_labeling( eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, args.version_2_with_negative, args.null_score_diff_threshold) elif (args.loss_type == 'origin') or (args.task == 'multi' and args.loss_type == 'squad'): write_predictions(eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, args.version_2_with_negative, args.null_score_diff_threshold) else: raise ValueError('{} dataset and {} loss is not support'.format( args.task, args.loss_type))
def predict(in_file, out_file): xvals, yvals = utils.load_data(in_file) network = build_network() predictions = utils.predict(xvals, network, 'circle.tflearn') print('Accuracy: {}%'.format(utils.get_accuracy(yvals, predictions))) utils.write_predictions(xvals, predictions, out_file)
#------------------------------------------------------------------------------- # dictionary = p1.bag_of_words(train_texts) # # train_final_features = p1.extract_final_features(train_texts, dictionary) # val_final_features = p1.extract_final_features(val_texts, dictionary) # test_final_features = p1.extract_final_features(test_texts, dictionary) #------------------------------------------------------------------------------- # #------------------------------------------------------------------------------- # Section 3.13 # # Modify the code below to extract your best features from the submission data # and then classify it using your most accurate classifier. #------------------------------------------------------------------------------- submit_texts = [ sample['text'] for sample in utils.load_data('reviews_submit.tsv') ] # # # 1. Extract your preferred features from the train and submit data dictionary = p1.bag_of_words(submit_texts) train_final_features = p1.extract_final_features(train_texts, dictionary) submit_final_features = p1.extract_final_features(submit_texts, dictionary) # # # 2. Train your most accurate classifier final_thetas = p1.average_perceptron(train_final_features, train_labels, T=50) # # # 3. Classify and write out the submit predictions. submit_predictions = p1.classify(submit_final_features, *final_thetas) utils.write_predictions('reviews_submit.tsv', submit_predictions) #-------------------------------------------------------------------------------
def evaluate(args, model, tokenizer, prefix=""): if prefix == 'test': eval_file = args.test_file else: eval_file = args.dev_file DatasetClass = WeakSupervisorDataset dataset = DatasetClass(eval_file, args.max_seq_length, tokenizer, args.load_small, is_training=False) if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) predict_dir = os.path.join(args.output_dir, 'predictions') if not os.path.exists(predict_dir) and args.local_rank in [-1, 0]: os.makedirs(predict_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly # eval_sampler = SequentialSampler( # dataset) if args.local_rank == -1 else DistributedSampler(dataset) eval_sampler = SequentialSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # multi-gpu evaluate if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", args.eval_batch_size) all_results = [] start_time = timeit.default_timer() for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() example_ids = batch['example_id'] batch = { k: v.to(args.device) for k, v in batch.items() if k != 'example_id' } with torch.no_grad(): inputs = { 'input_ids': batch['input_ids'], 'attention_mask': batch['input_mask'] } if args.model_type != 'distilbert': # XLM don't use segment_ids inputs[ 'token_type_ids'] = None if args.model_type == 'xlm' else batch[ 'segment_ids'] # example_ids = batch['example_id'] outputs = model(**inputs) for i, example_id in enumerate(example_ids): result = RawResult( unique_id=example_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i]), retrieval_logits=[1]) # retrieval_logits is not used all_results.append(result) examples = dataset.all_examples features = dataset.all_features # assert len(examples) == len(dataset), (len(examples), len(dataset)) # assert len(features) == len(dataset), (len(features), len(dataset)) evalTime = timeit.default_timer() - start_time logger.info(" Evaluation done in total %f secs (%f sec per example)", evalTime, evalTime / len(dataset)) # Compute predictions output_prediction_file = os.path.join( predict_dir, "instance_predictions_{}.json".format(prefix)) output_nbest_file = os.path.join( predict_dir, "instance_nbest_predictions_{}.json".format(prefix)) output_final_prediction_file = os.path.join( predict_dir, "final_predictions_{}.json".format(prefix)) if args.version_2_with_negative: output_null_log_odds_file = os.path.join( predict_dir, "instance_null_odds_{}.json".format(prefix)) else: output_null_log_odds_file = None all_predictions = write_predictions( examples, features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, args.version_2_with_negative, args.null_score_diff_threshold) write_weak_supervisor_predictions(all_predictions, output_final_prediction_file) eval_metrics = weak_supervisor_eval(eval_file, output_final_prediction_file) metrics_file = os.path.join(predict_dir, "metrics_{}.json".format(prefix)) with open(metrics_file, 'w') as fout: json.dump(eval_metrics, fout) return eval_metrics
def evaluate(args, model, tokenizer, max_depth, prefix=""): r""" Evaluate the model """ dataset, examples, features = load_and_cache_examples(args, tokenizer, max_depth=max_depth, evaluate=True, output_examples=True) if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler( dataset) if args.local_rank == -1 else DistributedSampler(dataset) eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size, num_workers=args.dataloader_workers) # multi-gpu evaluate if args.n_gpu > 1 and not isinstance(model, torch.nn.DataParallel): model = torch.nn.DataParallel(model) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(dataset)) logger.info(" Batch size = %d", args.eval_batch_size) all_results = [] start_time = timeit.default_timer() for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = { 'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'xpath_tags_seq': batch[4], 'xpath_subs_seq': batch[5], } feature_indices = batch[3] outputs = model(**inputs) for i, feature_index in enumerate(feature_indices): eval_feature = features[feature_index.item()] unique_id = int(eval_feature.unique_id) result = RawResult(unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i])) all_results.append(result) eval_time = timeit.default_timer() - start_time logger.info(" Evaluation done in total %f secs (%f sec per example)", eval_time, eval_time / len(dataset)) # Compute predictions output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(prefix)) output_tag_prediction_file = os.path.join( args.output_dir, "tag_predictions_{}.json".format(prefix)) output_nbest_file = os.path.join( args.output_dir, "nbest_predictions_{}.json".format(prefix)) output_result_file = os.path.join( args.output_dir, "qas_eval_results_{}.json".format(prefix)) output_file = os.path.join(args.output_dir, "eval_matrix_results_{}".format(prefix)) write_predictions(examples, features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_tag_prediction_file, output_nbest_file, args.verbose_logging, tokenizer) # Evaluate evaluate_options = EvalOpts(data_file=args.predict_file, root_dir=args.root_dir, pred_file=output_prediction_file, tag_pred_file=output_tag_prediction_file, result_file=output_result_file, out_file=output_file) results = evaluate_on_squad(evaluate_options) return results
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_dir', required=True) parser.add_argument('--model_dir', required=True) parser.add_argument('--output_dir', required=True) parser.add_argument('--gpu_num', default=1, type=int) parser.add_argument('--batch_size', default=10, type=int) parser.add_argument('--n_best_size', default=20, type=int) parser.add_argument('--max_answer_length', default=30, type=int) conf = parser.parse_args() if os.path.isfile(conf.model_dir): model_path = conf.model_dir else: model_path = get_best_model_path(conf.model_dir) with open(os.path.join(os.path.dirname(model_path), "config.json"), "r") as f: model_conf = json.load(f) max_seq_length = model_conf['max_seq_length'] doc_stride = model_conf['doc_stride'] max_query_length = model_conf['max_query_length'] do_lower_case = model_conf['do_lower_case'] examples = read_squad_examples(conf.input_dir) tokenizer = FullTokenizer(os.path.join(os.path.dirname(model_path), 'vocab.txt'), do_lower_case=do_lower_case) generator = data_generator(tokenizer, examples, conf.batch_size, max_seq_length, doc_stride, max_query_length, for_predict=True) data_list = [f for f in generator] data_size = len(data_list) if conf.gpu_num == 1: results = predict(data_list, model_path, show_summary=True, gpuid=None) else: with concurrent.futures.ProcessPoolExecutor( max_workers=conf.gpu_num) as executor: per_data = int(ceil(data_size / conf.gpu_num)) futures = [] for idx in range(conf.gpu_num): sub_data_list = data_list[per_data * idx:min(per_data * (idx + 1), data_size)] future = executor.submit(predict, sub_data_list, model_path, show_summary=idx == 0, gpuid=idx) futures.append(future) results = [] for future in futures: results.extend(future.result()) features = convert_examples_to_features(examples=examples, tokenizer=tokenizer, max_seq_length=max_seq_length, doc_stride=doc_stride, max_query_length=max_query_length, insert_unk=False) output_prediction_file = os.path.join(conf.output_dir, "predictions.json") output_nbest_file = os.path.join(conf.output_dir, "nbest_predictions.json") output_prediction_with_answer_file = os.path.join(conf.output_dir, "ans_predictions.json") write_predictions(examples, features, results, conf.n_best_size, conf.max_answer_length, do_lower_case, output_prediction_file, output_nbest_file, output_prediction_with_answer_file)
def main(args): # set up logging and device args.save_dir = utils.get_save_dir(args.save_dir, args.name, training=True) logger = utils.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') logger.info( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_predict: raise ValueError( "At least one of `do_train` or `do_predict` must be True.") if args.do_train: if not args.train_file: raise ValueError( "If `do_train` is True, then `train_file` must be specified.") if args.do_predict: if not args.predict_file: raise ValueError( "If `do_predict` is True, then `predict_file` must be specified." ) if os.path.exists(args.output_dir) and os.listdir( args.output_dir) and args.do_train: raise ValueError( "Output directory () already exists and is not empty.") os.makedirs(args.output_dir, exist_ok=True) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) # Generating the dictionaries dep_dict, pos_dict, ent_dict, total_features = generate_dictionary( args.train_ling_features_file, args.eval_ling_features_file, args.test_ling_features_file) # from IPython import embed; embed() # Generating total_dictionary total_dict = convert_string_features_to_array(total_features, dep_dict, pos_dict, ent_dict) # from IPython import embed; embed() train_examples = None num_train_optimization_steps = None if args.do_train: train_examples = read_squad_examples( input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative, total_dictionary=total_dict) num_train_optimization_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs if args.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size( ) # Prepare model model = BertForQuestionAnsweringLing.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank)) if args.fp16: model.half() model.to(device) if args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) model = DDP(model) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) # hack to remove pooler, which is not used # thus it produce None grad that break apex param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if args.fp16: try: from apex.optimizer import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=False, max_grad_norm=1.0) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_optimization_steps) global_step = 0 # load training features cached_train_features_file = args.train_file + '_{0}_{1}_{2}_{3}'.format( list(filter(None, args.bert_model.split('/'))).pop(), str(args.max_seq_length), str(args.doc_stride), str(args.max_query_length)) train_features = None print(cached_train_features_file) try: with open(cached_train_features_file, "rb") as reader: train_features = pickle.load(reader) except: train_features = convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=True) if args.local_rank == -1 or torch.distributed.get_rank() == 0: logger.info(" Saving train features into cached file %s", cached_train_features_file) with open(cached_train_features_file, "wb") as writer: pickle.dump(train_features, writer) # load eval features eval_examples = read_squad_examples( input_file=args.predict_file, is_training=False, version_2_with_negative=args.version_2_with_negative, total_dictionary=total_dict) eval_features = convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=False) test_examples = read_squad_examples( input_file=args.test_file, is_training=False, version_2_with_negative=args.version_2_with_negative, total_dictionary=total_dict) test_features = convert_examples_to_features( examples=test_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=False) if args.do_train: logger.info("***** Running training *****") logger.info(" Num orig examples = %d", len(train_examples)) logger.info(" Num split examples = %d", len(train_features)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_start_positions = torch.tensor( [f.start_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor( [f.end_position for f in train_features], dtype=torch.long) # from IPython import embed; embed() all_ling_features = torch.tensor( [f.ling_features for f in train_features], dtype=torch.float) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_ling_features, all_start_positions, all_end_positions) steps_till_eval = args.eval_steps if args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() best_F1 = 0 for _ in trange(int(args.num_train_epochs), desc="Epoch"): for step, batch in enumerate( tqdm(train_dataloader, desc="Iteration")): if n_gpu == 1: batch = tuple( t.to(device) for t in batch) # multi-gpu does scattering it-self input_ids, input_mask, segment_ids, ling_features, start_positions, end_positions = batch # from IPython import embed; embed() loss = model(input_ids, segment_ids, input_mask, ling_features, start_positions, end_positions) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used and handles this automatically lr_this_step = args.learning_rate * warmup_linear( global_step / num_train_optimization_steps, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 # add to tensorboard loss_val = loss.item() tbx.add_scalar('train/NLL', loss_val, global_step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], global_step) steps_till_eval -= args.train_batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint logger.info('Evaluating at step {}...'.format(step)) # ema.assign(model) results, _ = evaluate(model, eval_examples, eval_features, device, args, logger, args.version_2_with_negative, args.dev_eval_file) # saver.save(step, model, results[args.metric_name], device) # ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) logger.info('Dev {}'.format(results_str)) # Log to TensorBoard logger.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, global_step) """ util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals) """ if results['F1'] > best_F1: best_F1 = results['F1'] model_to_save = model.module if hasattr( model, 'module') else model # Only save the model it-self output_model_file = os.path.join( args.output_dir, "pytorch_model_best.bin") torch.save(model_to_save.state_dict(), output_model_file) #model.to(device) # Save a trained model """ model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, "pytorch_model.bin") if args.do_train: torch.save(model_to_save.state_dict(), output_model_file) # Load a trained model that you have fine-tuned model_state_dict = torch.load(output_model_file) model = BertForQuestionAnsweringLing.from_pretrained(args.bert_model, state_dict=model_state_dict) else: model = BertForQuestionAnsweringLing.from_pretrained(args.bert_model) model.to(device) """ # load the best trained model and eval on the eval set and test set best_model_file = os.path.join(args.output_dir, "pytorch_model_best.bin") model_state_dict = torch.load(best_model_file) model = BertForQuestionAnsweringLing.from_pretrained( args.bert_model, state_dict=model_state_dict) model.to(device) if args.do_predict and (args.local_rank == -1 or torch.distributed.get_rank() == 0): logger.info('Evaluating at the best model') results, all_results = evaluate(model, eval_examples, eval_features, device, args, logger, args.version_2_with_negative, args.dev_eval_file) logger.info('Write the best eval results') output_prediction_file = os.path.join(args.output_dir, "predictions.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json") write_predictions(eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, args.version_2_with_negative, args.null_score_diff_threshold, 'dev') logger.info('Test set at the best model') results, all_results = evaluate(model, test_examples, test_features, device, args, logger, args.version_2_with_negative, args.test_eval_file) logger.info('Write the best test set results') output_prediction_file = os.path.join(args.output_dir, "predictions_test.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_test.json") output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_test.json") write_predictions(test_examples, test_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, args.verbose_logging, args.version_2_with_negative, args.null_score_diff_threshold, 'test') """
plt.subplot(223) plt.plot(n_trees_list, cv_means_cas) plt.xlabel("Number of trees") plt.ylabel("Mean RMSLE for casual") plt.subplot(224) plt.plot(n_trees_list, cv_stds_cas) plt.xlabel("Number of trees") plt.ylabel("Standard dev RMSLE for casual") plt.tight_layout() plt.show() # Make predictions according to best result cv_summary = np.add(cv_means_cas, cv_means_reg) clf_reg = RandomForestRegressor(n_estimators=n_trees_list[np.argmin(cv_summary)]) clf_cas = RandomForestRegressor(n_estimators=n_trees_list[np.argmin(cv_summary)]) test_data = utils.get_data("test.csv") clf_reg.fit(data, registered_result) clf_cas.fit(data, casual_result) pred_test_reg = clf_reg.predict(test_data) pred_test_cas = clf_cas.predict(test_data) pred_test = np.add(pred_test_reg, pred_test_cas) utils.write_predictions(pred_test, "res_RF_reg_casual.csv")
plt.xlabel("Number of trees") plt.ylabel("Mean RMSLE for casual") plt.subplot(224) plt.plot(n_trees_list, cv_stds_cas) plt.xlabel("Number of trees") plt.ylabel("Standard dev RMSLE for casual") plt.tight_layout() plt.show() # Make predictions according to best result cv_summary = np.add(cv_means_cas, cv_means_reg) clf_reg = RandomForestRegressor( n_estimators=n_trees_list[np.argmin(cv_summary)]) clf_cas = RandomForestRegressor( n_estimators=n_trees_list[np.argmin(cv_summary)]) test_data = utils.get_data("test.csv") clf_reg.fit(data, registered_result) clf_cas.fit(data, casual_result) pred_test_reg = clf_reg.predict(test_data) pred_test_cas = clf_cas.predict(test_data) pred_test = np.add(pred_test_reg, pred_test_cas) utils.write_predictions(pred_test, "res_RF_reg_casual.csv")
def exec_demo(demo_params): """ Train the crf with different size of the train set Tune the hyperparameter over the development set Then test the best model :param demo_params: :return: """ logger = log.setup_logger(__name__) #ignore this line... It's a long story. feature_type = "ver1" # extract base parameters demo_id, name, train_file, dev_file, test_file, output_folder = utils.extract_base_demo_params( demo_params) print_log( logger, "\n".join([str((key, demo_params[key])) for key in list(demo_params)])) different_sizes_perc = list(range(10, 101, 10)) # define the scoring function for the grid search my_scorer = sklearn.metrics.make_scorer(metrics.my_scorer.get_evaluation) # track some result from the search used for tuning the hyperparameter delta size_evaluations = {} train_data_partitions = {} fscores = {} # pre-processing the data (remove tags and other stuff) print_log(logger, "Making datasets...") train_data = data_parsers.make_dataset.parse_file(open(train_file)) dev_data = data_parsers.make_dataset.parse_file(open(dev_file)) test_data = data_parsers.make_dataset.parse_file(open(test_file)) # compute the maximum delta possible (from the length of the longest word # in the train and development set) max_delta = max(utils.find_max_len(train_data), utils.find_max_len(dev_data)) print_log(logger, "max delta: %s, len train set:%s" % (max_delta, len(train_data))) # train the model for different train sizes for size in different_sizes_perc: print_log( logger, "train the model with percentage of the train set: %02d%%" % size) train_data_shuffled = copy.deepcopy(train_data) random.shuffle(train_data_shuffled) current_size = round(len(train_data) * size / 100) print_log(logger, "current train set size: %d" % current_size) train_data_partition = train_data_shuffled[:current_size] print_log( logger, "train set: " + "; ".join(list(map(str, train_data_partition[0:5]))) + "...") size_evaluations[size] = {} train_data_partitions[size] = train_data_partition current_max_delta = utils.find_max_len(train_data_partition) print_log(logger, "current max delta: %s" % current_max_delta) for delta in range(1, current_max_delta + 1): print_log(logger, "train the model with delta: %d" % delta) X_train, y_train = features.extract_features.get_features_and_labels( train_data_partition, delta, feature_type) X_dev, y_dev = features.extract_features.get_features_and_labels( dev_data, delta, feature_type) X_test, y_test = features.extract_features.get_features_and_labels( test_data, delta, feature_type) crf = sklearn_crfsuite.CRF( algorithm='ap', all_possible_transitions=True, all_possible_states=False, ) crf.fit(X_train, y_train) y_dev_pred = crf.predict(X_dev) delta_evaluation = metrics.evaluation.get_evaluation( feature_type, y_dev, y_dev_pred) print_log( logger, "F-score on development set: %s" % delta_evaluation["F-score"]) size_evaluations[size][delta] = (delta_evaluation["Precision"], delta_evaluation["Recall"], delta_evaluation["F-score"]) # find delta that yields best F-score sizes = list(size_evaluations.keys()) sizes.sort() deltas = [] for size in sizes: max_fscore = max(size_evaluations[size].values()) max_delta_for_size = [ i for i in size_evaluations[size] if size_evaluations[size][i] == max_fscore ][0] deltas.append(max_delta_for_size) print_log( logger, "\nBest delta=%s for train size perc=%s%%. " "\nOn development set:" "\n\tPrecision=%s" "\n\tRecall=%s" "\n\tF-score=%s" % (max_delta_for_size, size, size_evaluations[size][max_delta_for_size][0], size_evaluations[size][max_delta_for_size][1], size_evaluations[size][max_delta_for_size][2])) test_evaluations = {} print_log(logger, "Test models with different sizes of training set") for size, best_delta in zip(sizes, deltas): print_log(logger, "Train with size: %d and delta: %s" % (size, best_delta)) cur_train_set = train_data_partitions[size] print_log( logger, "train set: " + "; ".join(list(map(str, cur_train_set[0:5]))) + "...") X_train, y_train = features.extract_features.get_features_and_labels( cur_train_set, best_delta, feature_type) X_test, y_test = features.extract_features.get_features_and_labels( test_data, best_delta, feature_type) crf = sklearn_crfsuite.CRF( algorithm='ap', all_possible_transitions=True, all_possible_states=False, ) crf.fit(X_train, y_train) y_test_pred = crf.predict(X_test) delta_evaluation = metrics.evaluation.get_evaluation( feature_type, y_test, y_test_pred) test_evaluations[size] = (delta_evaluation["Precision"], delta_evaluation["Recall"], delta_evaluation["F-score"]) print_log( logger, "train score (delta=%s): F-score, : %s" % (best_delta, delta_evaluation["F-score"])) # save some result from the tests curpath = output_folder + "/size_%02d_delta_%02d" % (size, best_delta) os.makedirs(curpath) curpath = curpath\ +"/"+name+"_" \ + "size_%02d_delta_%02d"%(size,best_delta) utils.write_model(crf, open(curpath + ".model", "wb+")) utils.write_predictions(feature_type, open(test_file), y_test_pred, open(curpath + ".pred", "w+")) utils.write_evaluation(delta_evaluation, open(curpath + ".eval", "w+")) utils.write_fails(open(test_file), y_test, y_test_pred, open(curpath + ".fails", "w+"), feature_type) details.print_details(crf, file=open(curpath + ".details", "w+")) freport = open(output_folder + "/report.txt", "w+") for size, best_delta in zip(sizes, deltas): print( "Best delta=%s for train size perc=%s%%. " "\nOn development set:" "\n\tPrecision=%s" "\n\tRecall=%s" "\n\tF-score=%s" "\nOn test set:" "\n\tPrecision=%s" "\n\tRecall=%s" "\n\tF-score=%s" % (best_delta, size, size_evaluations[size][best_delta][0], size_evaluations[size][best_delta][1], size_evaluations[size][best_delta][2], test_evaluations[size][0], test_evaluations[size][1], test_evaluations[size][2]) + "\n" + "-" * 50, file=freport)
clf = RandomForestRegressor(n_estimators=n_trees) # cross-validation evaluation scores = cross_validation.cross_val_score(clf, data, result_vect, cv=10, scoring=rmsle_scorer) scores = -scores cv_means.append(np.mean(scores)) cv_stds.append(np.std(scores)) # Plots mean and std depending on number of trees plt.subplot(211) plt.plot(n_trees_list, cv_means) plt.xlabel("Number of trees") plt.ylabel("Mean RMSLE") plt.subplot(212) plt.plot(n_trees_list, cv_stds) plt.xlabel("Number of trees") plt.ylabel("Standard dev RMSLE") plt.tight_layout() plt.show() # Make predictions according to best result clf = RandomForestRegressor(n_estimators=n_trees_list[np.argmin(cv_means)]) test_data = utils.get_data("test.csv") clf.fit(data, result_vect) pred_test = clf.predict(test_data) utils.write_predictions(pred_test, "res_RF_all.csv")
def exec_demo(demo_params): """ Execute grid search over the param_grid defined in demo_params, using the data from the crowd-sourced annotations. :param demo_params: :return: """ logger = log.setup_logger(__name__) #ignore this line... It's a long story. feature_type = "ver1" # extract base parameters demo_id, name, train_file, dev_file, test_file, output_folder = utils.extract_base_demo_params( demo_params) print_log( logger, "\n".join([str((key, demo_params[key])) for key in list(demo_params)])) train_file_extra_points = demo_params["train_file_extra_points"] param_grid = demo_params["param_grid"] # define the scoring function for the grid search my_scorer = sklearn.metrics.make_scorer(metrics.my_scorer.get_evaluation) # track some result from the grid search used for tuning the hyperparameter delta fscores = {} epsilons_list = {} max_iterations_list = {} best_eval = {"F-score": 0} # pre-processing the data (remove tags and other stuff) print_log(logger, "Making datasets...") task1_train_data = data_parsers.make_dataset.parse_file(open(train_file)) dev_data = data_parsers.make_dataset.parse_file(open(dev_file)) test_data = data_parsers.make_dataset.parse_file(open(test_file)) extra_points_train_data = data_parsers.make_dataset.parse_file( open(train_file_extra_points)) train_data = task1_train_data + extra_points_train_data print_log(logger, "train data size: %s" % len(train_data)) print_log(logger, "development data size: %s" % len(dev_data)) print_log(logger, "test data size: %s" % len(test_data)) # compute the maximum delta possible (from the length of the longest word # in the train and development set) max_delta = max(utils.find_max_len(train_data), utils.find_max_len(dev_data)) if max_delta > settings.MAX_ALLOWABLE_DELTA: max_delta = settings.MAX_ALLOWABLE_DELTA print_log(logger, "max delta: %s" % max_delta) # repeat the grid search for each possible value of delta for delta in range(1, max_delta + 1): os.makedirs(output_folder + "/%02d" % delta, exist_ok=True) print_log(logger, "Training with delta=%s" % delta) X_train, y_train = features.extract_features.get_features_and_labels( train_data, delta, feature_type) X_dev, y_dev = features.extract_features.get_features_and_labels( dev_data, delta, feature_type) X_test, y_test = features.extract_features.get_features_and_labels( test_data, delta, feature_type) model = utils.run_grid_search(X_train, y_train, X_dev, y_dev, param_grid, my_scorer) best_cv_epsilon = model.best_params_["epsilon"] best_cv_max_iterations = model.best_params_["max_iterations"] # the best score will be considered in order to pick the best model fscores[delta] = model.best_score_ epsilons_list[delta] = best_cv_epsilon max_iterations_list[delta] = best_cv_max_iterations print_log( logger, "Best params for delta %02d: max_iterations=%d\tepsilon=%.2E" % (delta, best_cv_max_iterations, best_cv_epsilon)) print_log(logger, "Best CV score: " + str(model.best_score_)) # test the model on the test set. NOTICE: the result will not be considered for the choice # of the hyperparameter delta! print_log(logger, "***Predict test with the grid search model:***") y_test_pred = model.predict(X_test) test_eval = metrics.evaluation.get_evaluation(feature_type, y_test, y_test_pred) print_log( logger, "F-score on test (grid search with delta=%s): %s" % (delta, test_eval["F-score"])) # save some result from the grid search curpath = output_folder + "/%02d" % delta + "/" + name + "_" + "%02d" % delta utils.write_model(model, open(curpath + "_gridsearch.model", "wb+")) utils.write_predictions(feature_type, open(test_file), y_test_pred, open(curpath + ".pred", "w+")) utils.write_evaluation(test_eval, open(curpath + ".eval", "w+")) utils.write_fails(open(test_file), y_test, y_test_pred, open(curpath + ".fails", "w+"), feature_type) details.print_gridsearch_details(model, file=open( curpath + "_gridsearch.details", "w+")) print_log(logger, "#" * 50) print_log(logger, "-" * 50) max_fscore = max(fscores.values()) max_fscore_delta = [i for i in fscores.keys() if fscores[i] == max_fscore][0] best_model_num = max_fscore_delta best_epsilon = epsilons_list[best_model_num] best_max_iterations = max_iterations_list[best_model_num] freport = open(output_folder + "/report.txt", "w+") print_log( logger, "The best model found is the one with delta: %s" % best_model_num) print_log( logger, "With best parameters: max_iterations=%s, epsilon=%s" % (best_max_iterations, best_epsilon)) print_log(logger, "CV F-score: %s" % max_fscore) print("The best model found is the one with delta: %s" % best_model_num, file=freport) print("With best parameters: max_iterations=%s, epsilon=%s" % (best_max_iterations, best_epsilon), file=freport) print("CV F-score: %s" % max_fscore, file=freport) best_model_path = output_folder + "/%02d" % best_model_num + "/" + name + "_" + "%02d" % best_model_num + "_gridsearch.model" best_model = pickle.load(open(best_model_path, "rb")) X_test, y_test = features.extract_features.get_features_and_labels( test_data, best_model_num, feature_type) y_pred = best_model.predict(X_test) delta_evaluation = metrics.evaluation.get_evaluation( feature_type, y_test, y_pred) print_log( logger, "delta: %s\tF-score, : %s" % (best_model_num, delta_evaluation["F-score"])) print("F-score on test set: %s" % delta_evaluation["F-score"], file=freport)
'''Trains SupportVectorMachine and uses it to write predictions. ''' from sklearn import svm from utils import load_data, write_predictions if __name__ == "__main__": ids, data, labels = load_data() clf = svm.SVC().fit(data,labels) write_predictions(clf)