def main(): input_lang, output_lang, pairs = prepare_data('ques', 'ans', '../debug.json', reverse=False) encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1, max_length=1000).to(device) rate = 0.9 pairs_train, pairs_test = pairs[0:int(len(pairs) * rate)], pairs[int(len(pairs) * rate):] encoder.load_state_dict(torch.load('model/encoder-0.model')) encoder.eval() attn_decoder.load_state_dict(torch.load('model/decoder-0.model')) attn_decoder.eval() evaluate_all(encoder, attn_decoder, pairs_test, max_length=1000, input_lang=input_lang, output_lang=output_lang, n=len(pairs_test)) # show_plot(loss_history) print('done test')
def main(): input_lang, output_lang, pairs = prepare_data('ques', 'ans', '../test.json', reverse=False) model = Transformer( src_vocab_size=input_lang.n_words, src_max_len=MAX_LENGTH, tgt_vocab_size=output_lang.n_words, tgt_max_len=MAX_LENGTH, ).to(device) rate = 0.9 pairs_train, pairs_test = pairs[0:int(len(pairs) * rate)], pairs[int(len(pairs) * rate):] model.load_state_dict(torch.load('model/transformer-0.model')) model.eval() evaluate_all(model, pairs_train, max_length=100, input_lang=input_lang, output_lang=output_lang, n=len(pairs_train)) # show_plot(loss_history) print('done test')
def main(): print(HELLO_MSG) args = parse_args() print(args) if args.action == "run": launch_on_all_nodes() elif args.action == "eval": evaluate_all()
def human_performance(args, params): x_gt, ground_truths, processor, dl = load.load_test( params, fit_processor=True) ground_truths, probs = human_gt_and_probs( params, x_gt, ground_truths, processor, review_indiv=True) evaluate.evaluate_all( ground_truths, probs, processor.classes, model_title='Human Performance Average', plot_flag=args.plot)
def train(model, data, words, params): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if (utils.checkIfQuarter(uidx, len(kf))): if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) sys.stdout.flush() #undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(model, data, words, params): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if (utils.checkIfQuarter(uidx, len(kf))): if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) sys.stdout.flush() # undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def agreement(args, params): _, ground_truths, classes = load.load_test(params) NUM_REPETITIONS = 10 gt_all = [] probs_all = [] for i in range(NUM_REPETITIONS): gt, probs = get_ground_truths_and_human_probs(ground_truths, params["num_reviewers"]) gt_all.append(gt) probs_all.append(probs) ground_truths = np.concatenate(tuple(gt_all), axis=1) probs = np.concatenate(tuple(probs_all), axis=0) evaluate.evaluate_all(ground_truths, probs, classes, model_title='Human Agreement')
def run_and_evaluate(**suite_params): suite = EntailmentSuite(**suite_params) try: type = eval(suite.cfgparser.get('DEFAULT', 'type')) except: logging.info( "Warning: type of experiment not specified. Assuming cross-validation." ) type = "cv" if type == "heldout": suite = EntailmentSuiteHeldOut(**suite_params) elif type == "traintest": suite = EntailmentSuiteTrainTest(**suite_params) elif type == "heldoutstrict": suite = EntailmentSuiteHeldOutStrict(**suite_params) suite.start() experiments = suite.cfgparser.sections() for experiment in experiments: logging.info("Running experiment: %s", experiment) experiment_path = os.path.join( eval(suite.cfgparser.get('DEFAULT', 'path')), experiment) params = suite.get_params(experiment_path) path = os.path.join(params['path'], params['name']) rows = evaluate.evaluate_all(path) evaluate.write_summary(rows, os.path.join(path, 'analysis.csv'))
def run_and_evaluate(**suite_params): suite = EntailmentSuite(**suite_params) try: type = eval(suite.cfgparser.get('DEFAULT', 'type')) except: logging.info("Warning: type of experiment not specified. Assuming cross-validation.") type="cv" if type=="heldout": suite = EntailmentSuiteHeldOut(**suite_params) elif type=="traintest": suite = EntailmentSuiteTrainTest(**suite_params) elif type=="heldoutstrict": suite = EntailmentSuiteHeldOutStrict(**suite_params) suite.start() experiments = suite.cfgparser.sections() for experiment in experiments: logging.info("Running experiment: %s", experiment) experiment_path = os.path.join(eval(suite.cfgparser.get('DEFAULT', 'path')), experiment) params = suite.get_params(experiment_path) path = os.path.join(params['path'], params['name']) rows = evaluate.evaluate_all(path) evaluate.write_summary(rows, os.path.join(path, 'analysis.csv'))
def test(self, dataloader): epoch = self.best_valid_epoch if self.data_parallel: model = nn.DataParallel(self.custom_net( **self.custom_net_args)).to(self.device).eval() else: model = self.custom_net(**self.custom_net_args).to( self.device).eval() params_path = os.path.join(self.params_dir, self.descriptor) print( 'For test set predictions, loading model params from params_path=', params_path) check_point = torch.load(params_path) model.load_state_dict(check_point['params']) with torch.no_grad(): epoch_loss, pred_epoch, gr_truth_epoch, volume_accs_epoch = self.iterate_through_batches( model, dataloader, epoch, training=False) self.eval_results_test = evaluate.evaluate_all(self.eval_results_test, epoch, self.label_meanings, gr_truth_epoch, pred_epoch) self.plot_roc_and_pr_curves('test', epoch, pred_epoch, gr_truth_epoch) self.save_all_pred_probs('test', epoch, pred_epoch, gr_truth_epoch, volume_accs_epoch) print("{:5s} {:<3d} {:11s} {:.3f}".format('Epoch', epoch, 'Test Loss', epoch_loss))
def human_gt_and_probs(params, x_gt, ground_truths, processor, review_indiv=False): gt_all = [] probs_all = [] for i in TEST_REVIEWS: params["epi_ext"] = "_rev" + str(i) + ".episodes.json" x_rev, probs, dl = load.load_x_y_with_processor(params, processor) gt_i, rev_i = get_matching_indices(x_gt, x_rev) gt = ground_truths[:, gt_i] probs = probs[rev_i] if review_indiv is True: evaluate.evaluate_all( gt, probs, processor.classes, model_title='Human Performance with review ' + str(i)) gt_all.append(gt) probs_all.append(probs) ground_truths = np.concatenate(tuple(gt_all), axis=1) probs = np.concatenate(tuple(probs_all), axis=0) return ground_truths, probs
def main(): input_lang, output_lang, pairs = prepare_data('ques', 'ans', '../data.json',reverse=False) encoder = Encoder(input_lang.n_words, MAX_LENGTH).to(device) attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1, max_length=MAX_LENGTH).to(device) rate = 0.9 epoch = 10 pairs_train,pairs_test = pairs[0:int(len(pairs)*rate)], pairs[int(len(pairs)*rate):] for i in range(epoch): encoder.train() attn_decoder.train() train(encoder, attn_decoder, len(pairs_train), pairs=pairs_train, input_lang=input_lang,output_lang=output_lang, print_every=10) encoder.eval() attn_decoder.eval() evaluate_all(encoder, attn_decoder, pairs_test, max_length=MAX_LENGTH, input_lang=input_lang, output_lang=output_lang, n=len(pairs_test)) torch.save(encoder.state_dict(), 'model/encoder-' + str(i) + '.model') torch.save(attn_decoder.state_dict(), 'model/decoder-' + str(i) + '.model') #show_plot(loss_history) print('done training')
def valid(self, dataloader, epoch): model = self.model.eval() with torch.no_grad(): epoch_loss, pred_epoch, gr_truth_epoch, volume_accs_epoch = self.iterate_through_batches( model, dataloader, epoch, training=False) self.valid_loss[epoch] = epoch_loss self.eval_results_valid = evaluate.evaluate_all( self.eval_results_valid, epoch, self.label_meanings, gr_truth_epoch, pred_epoch) self.early_stopping_check(epoch, pred_epoch, gr_truth_epoch, volume_accs_epoch) print("{:5s} {:<3d} {:11s} {:.3f}".format('Epoch', epoch, 'Valid Loss', epoch_loss))
def run_and_evaluate(**suite_params): suite = EntailmentSuite(**suite_params) suite.start() experiments = suite.cfgparser.sections() for experiment in experiments: logging.info("Running experiment: %s", experiment) experiment_path = os.path.join(eval(suite.cfgparser.get('DEFAULT', 'path')), experiment) params = suite.get_params(experiment_path) path = os.path.join(params['path'], params['name']) rows = evaluate.evaluate_all(path) evaluate.write_summary(rows, os.path.join(path, 'analysis.csv'))
def predict(): """ Uses the models to make the predictions and displays results on the page """ article = get_article(request.form['article_url']) if article is None: return "The article URL is invalid/not supported. " + \ "Try one of the following news sources: {}".format(TRUSTED_SOURCES) predictions = evaluate_all(article.text, article.title, BASE_PATH, True) sentiment = predictions['sentiment'] fake = predictions['fake'] category = predictions['category'] emotion = predictions['emotion'] sentiment = '{:.1f}% positive'.format(float(sentiment) * 100 / 4) return render_template('index.html', sentiment=sentiment, fake=fake, emotion=emotion, category=category)
def plot_bleu(model_filter=None, token_filter=None, opt_filter=None): # Calculate all scores first bleu_scores = evaluate_all(model_filter, token_filter, opt_filter) print('Finished calculating BLEU scores: %s' % "\n".join(bleu_scores.keys())) file_out = "plots/bleu" if model_filter is not None: file_out += '_' + model_filter if token_filter is not None: file_out += '_' + token_filter if opt_filter is not None: file_out += '_' + opt_filter labels = [] xs = [] ys = [] plt.figure() # reset the plot with open(file_out + '.csv', mode='w') as csv_file: # overwrite any existing file csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # For each, lookup the val_loss and plot them for model_name, model_class in models.items(): if model_filter is None or model_filter == model_name: for token_id, tokenizer in tokenizers.items(): if token_filter is None or token_filter == token_id: for opt_id, optimizer in optimizer_opts.items(): if opt_filter is None or opt_filter == opt_id: # save each one label = model_name + '_' + token_id + '_' + opt_id filename = label + '_' + version try: history = genfromtxt('checkpoints/' + filename + '.csv', delimiter=',', skip_header=1) bleu = bleu_scores[filename] val_loss = history[:, 2][-1] csv_writer.writerow( [filename, bleu, val_loss]) print("bleu=%s, val_loss=%s" % (bleu, val_loss)) labels.append(label) xs.append(bleu) ys.append(val_loss) # if isinstance(bleu, numbers.Number): # plt.plot(bleu, val_loss, label=filename, markersize=12) print("Plotted: " + filename) except UserWarning as uw: # print(uw) traceback.print_exc() # No model trained yet print('No val_los history: ' + filename) except Exception as e: # print(e) traceback.print_exc() # No model trained yet print('No model logs for: ' + filename) if not labels: print("No matching data for filter %s, %s, %s" % (model_filter, token_filter, opt_filter)) else: plt.scatter(xs, ys, marker='o') for label, x, y in zip(labels, xs, ys): plt.annotate(label, xy=(x, y), xytext=(-20, 20), textcoords='offset points', ha='right', va='bottom', bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=0')) # summarize history for loss plt.style.use('seaborn-whitegrid') plt.title('BLEU-1 vs val_loss') plt.ylabel('val_loss') plt.xlabel('BLEU') # plt.show() plt.subplots_adjust(left=0.2, right=0.9, top=0.9, bottom=0.1) plt.savefig(file_out + '.png') print("Wrote plot to " + file_out)
for l in [0, 1, 10, 100]: params['CLR_c k={} l={}'.format( k, l)] = [CLRcRegressor(k, l, constr_id=constr_id), X, y] params['CLR_c k={} l={} ens=10'.format(k, l)] = [ RegressorEnsemble(CLRcRegressor(k, l, constr_id=constr_id)), X, y ] params['kplane k={} l={} w=size'.format( k, l)] = [KPlaneRegressor(k, l, weighted='size'), X, y] params['kplane k={} l={} w=size ens=10'.format(k, l)] = [ RegressorEnsemble(KPlaneRegressor(k, l, weighted='size')), X, y ] results = evaluate_all( params, file_name="results/{}-tmp1.csv".format(args.dataset), n_jobs=args.n_jobs, gl_parallel=args.global_parallel, ) results = results.sort_values('test_mse_mean') add_params = {} # assuming ensembles are always best algos = [CLRpRegressor, KPlaneRegressor] algo_names = ['CLR_p', 'kplane'] for algo, algo_name in zip(algos, algo_names): for idx in results.index: if algo_name in idx: k = int(idx.split()[1].split('=')[1]) l = int(idx.split()[2].split('=')[1]) w = idx.split()[3].split('=')[1] == 'True' f = idx.split()[4].split('=')[1] == 'True'
import evaluate if __name__ == '__main__': evaluate.evaluate_all(models_dir='models/robotics_klcoeff')
def train(self, data, words, params): start_time = time.time() evaluate_all(self, words) counter = 0 try: for eidx in xrange(params.epochs): kf = self.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words, True) i[1].populate_embeddings(words, True) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = self.getpairs(batch, params) cost = self.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if utils.check_if_quarter(uidx, len(kf)): if params.save: counter += 1 self.save_params( params.outfile + str(counter) + '.pickle', words) if params.evaluate: evaluate_all(self, words) for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() if params.save: counter += 1 self.save_params(params.outfile + str(counter) + '.pickle', words) if params.evaluate: evaluate_all(self, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(self, data, words, params): start_time = time.time() evaluate_all(self, words) counter = 0 try: for eidx in xrange(params.epochs): kf = self.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: if params.scramble: n = np.random.binomial(1, params.scramble, 1)[0] if n > 0: self.scramble(i[0], words) self.scramble(i[1], words) else: i[0].populate_embeddings(words) i[1].populate_embeddings(words) else: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = self.get_pairs(batch, params) cost = self.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected. Exiting.' sys.exit(0) if (check_quarter(uidx, len(kf))): if (params.save): counter += 1 self.save_params(params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(self, words) #undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() if (params.save): counter += 1 self.save_params(params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(self, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(self, data, words, params): start_time = time.time() evaluate_all(self, words, params) old_v = 0 try: for eidx in xrange(params.epochs): kf = self.get_minibatches_idx(len(data), params.batchsize, shuffle=True) lkf = len(kf) uidx = 0 while (len(kf) > 0): megabatch = [] idxs = [] idx = 0 for i in range(params.mb_batchsize): if len(kf) > 0: arr = [data[t] for t in kf[0][1]] curr_idxs = [i + idx for i in range(len(kf[0][1]))] kf.pop(0) megabatch.extend(arr) idxs.append(curr_idxs) idx += len(curr_idxs) uidx += len(idxs) for i in megabatch: if params.wordtype == "words": if params.scramble > 0: n = np.random.binomial(1, params.scramble, 1)[0] if n > 0: i[0].populate_embeddings_scramble(words) i[1].populate_embeddings_scramble(words) else: i[0].populate_embeddings(words, True) i[1].populate_embeddings(words, True) else: i[0].populate_embeddings(words, True) i[1].populate_embeddings(words, True) else: i[0].populate_embeddings_ngrams(words, 3, True) i[1].populate_embeddings_ngrams(words, 3, True) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = self.get_pairs(megabatch, params) cost = 0 for i in idxs: cost += self.train_function(g1x[i], g2x[i], p1x[i], p2x[i], g1mask[i], g2mask[i], p1mask[i], p2mask[i]) cost = cost / len(idxs) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if utils.check_if_quarter(uidx - len(idxs), uidx, lkf): if params.evaluate: v = evaluate_all(self, words, params) if params.save: if v > old_v: old_v = v self.save_params(params.outfile + '.pickle', words) for i in megabatch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() if params.evaluate: v = evaluate_all(self, words, params) if params.save: if v > old_v: old_v = v self.save_params(params.outfile + '.pickle', words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(self, data, words, params): start_time = time.time() evaluate_all(self, words, params) old_v = 0 try: for eidx in range(params.epochs): kf = self.get_minibatches_idx(len(data), params.batchsize, shuffle=True) lkf = len(kf) uidx = 0 sentence_samples = [] while(len(kf) > 0): megabatch = [] idxs = [] idx = 0 for i in range(params.mb_batchsize): if len(kf) > 0: arr = [data[t] for t in kf[0][1]] curr_idxs = [i + idx for i in range(len(kf[0][1]))] kf.pop(0) megabatch.extend(arr) idxs.append(curr_idxs) idx += len(curr_idxs) uidx += len(idxs) for i in megabatch: if params.wordtype == "words": if params.scramble > 0: n = np.random.binomial(1, params.scramble, 1)[0] if n > 0: i[0].populate_embeddings_scramble(words) i[1].populate_embeddings_scramble(words) else: i[0].populate_embeddings(words, True) i[1].populate_embeddings(words, True) else: i[0].populate_embeddings(words, True) i[1].populate_embeddings(words, True) else: i[0].populate_embeddings_ngrams(words, 3, True) i[1].populate_embeddings_ngrams(words, 3, True) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask),(g1_s,g2_s,p1_s,p2_s) = self.get_pairs(megabatch, params) cost = 0 for i in idxs: # cc1,cc2 = self.cost_each_data(g1x[i], g2x[i], p1x[i], p2x[i], g1mask[i], g2mask[i], p1mask[i], p2mask[i]) cost += self.train_function(g1x[i], g2x[i], p1x[i], p2x[i], g1mask[i], g2mask[i], p1mask[i], p2mask[i]) # for j in range(len(i)): # try: # sentence_samples.append({'orig':g1_s[i[j]],'para':g2_s[i[j]], 'neg_orign':p1_s[i[j]], 'neg_para':p2_s[i[j]], 'orig_cost':str(cc1[j]),'para_cost':str(cc2[j])}) # except IndexError: # print(j,i[j]) cost = cost / len(idxs) if np.isnan(cost) or np.isinf(cost): print('NaN detected') if utils.check_if_quarter(uidx-len(idxs), uidx, lkf): if params.evaluate: v = evaluate_all(self, words, params) if params.save: if v > old_v: old_v = v self.save_params(params.outfile + '.pickle', words) for i in megabatch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() if params.evaluate: v = evaluate_all(self, words, params) if params.save: if v > old_v: old_v = v self.save_params(params.outfile + '.pickle', words) print('Epoch ', (eidx + 1), 'Cost ', cost) #with open("../data/sampled_samples/%s_epoch_%d.json" % (params.model, eidx),'w') as f: # json_result = json.dumps(sentence_samples, indent=2) # f.write(json_result) except KeyboardInterrupt: print("Training interupted") end_time = time.time() print("total time:", (end_time - start_time))
from evaluate import evaluate_all evaluate_all()
y = abalone_data.iloc[:, 8].as_matrix().astype(np.float) constr_id = 10 elif args.dataset == 'auto-mpg': data = pd.read_csv('data/auto-mpg.data', header=None, sep='\s+', na_values='?') data = data.dropna() X = pd.get_dummies(data.iloc[:,1:-1], columns=[7]).as_matrix().astype(np.float) y = data[0].as_matrix().astype(np.float) constr_id = 5 else: print("Dataset is not supported") sys.exit(0) X, y = preprocess_data(X, y) params = {} for k in [2, 8]: for ens in range(1, 21): params['kplane k={} ens={}'.format(k, ens)] = [ RegressorEnsemble(KPlaneRegressor(k, 100), n_estimators=ens), X, y, 10, 20] params['CLR_p k={} ens={}'.format(k, ens)] = [ RegressorEnsemble(CLRpRegressor(k, 10, weighted=True), n_estimators=ens), X, y, 10, 20] params['CLR_c k={} ens={}'.format(k, ens)] = [ RegressorEnsemble(CLRcRegressor(k, 10, constr_id=constr_id), n_estimators=ens), X, y, 10, 20] results = evaluate_all( params, file_name="results_ens/{}.csv".format(args.dataset), n_jobs=args.n_jobs, gl_parallel=args.global_parallel, )
for min_samples_leaf in [1, 10, 30, 50]: params['rf md={}, mf={}, mss={}, msl={}'.format( max_depth, max_features, min_samples_split, min_samples_leaf)] = [ RandomForestRegressor( n_estimators=30, max_depth=max_depth, max_features=max_features, min_samples_leaf=min_samples_leaf, min_samples_split=min_samples_split, n_jobs=n_jobs), X, y, 3, 1 ] results = evaluate_all( params, file_name="results/patient-claims-rf.csv", n_jobs=args.n_jobs, gl_parallel=args.global_parallel, ) if args.run_clrs: print("Run clrs") if args.n_jobs == 1: for k in [2, 4, 6, 8]: for l in [0, 1, 10, 100, 1000, 10000]: tm = time.time() gen_clrs(k, l, X, y, max_iter=5, n_estimators=10) print("k={}, l={}, time={}".format(k, l, time.time() - tm)) tm = time.time() gen_clrs(k, l, X,
def predict(self, data, words, params): start_time = time.time() evaluate_all(self, words, params) end_time = time.time() print "total time:", (end_time - start_time)