def write_grids(self, grid, l0, s1, l2, final): batch_size, num_sample_sets, context_len, num_alt_utts = l0.shape stride = [ len(grid) / batch_size, len(grid) / (batch_size * num_sample_sets) ] with gzip.open(config.get_file_path('grids.0.jsons.gz'), 'a') as outfile: for i in range(batch_size): final_dist = final[i, :].tolist() sample_sets = [] for ss in range(num_sample_sets): loc = i * stride[0] + ss * stride[1] utts = [ inst.input for inst in grid[loc:loc + num_alt_utts] ] l0_grid = l0[i, ss, :, :].tolist() s1_grid = s1[i, ss, :, :].tolist() l2_grid = l2[i, ss, :, :].tolist() sample_sets.append({ 'utts': utts, 'L0': l0_grid, 'S1': s1_grid, 'L2': l2_grid }) json.dump({'final': final_dist, 'sets': sample_sets}, outfile) outfile.write('\n')
def predict_and_score(self, eval_instances, random=False, verbosity=0): predictions = [] scores = [] batches = iterators.gen_batches(eval_instances, batch_size=self.options.eval_batch_size) with gzip.open(config.get_file_path('dists.b64.gz'), 'w'): pass if self.options.verbosity + verbosity >= 1: progress.start_task('Eval batch', len(batches)) for i, batch in enumerate(batches): if self.options.verbosity + verbosity >= 1: progress.progress(i) batch = list(batch) feed_dict = self.vectorize_inputs(batch) feed_dict.update(self.vectorize_labels(batch)) output = self.run_predict(feed_dict) predictions_batch = self.output_to_preds(output, batch, sample=random) predictions.extend(predictions_batch) labels = self.vectorize_labels(batch) scores_batch = self.output_to_scores(output, labels) scores.extend(scores_batch) if self.options.verbosity + verbosity >= 1: progress.end_task() return predictions, scores
def output_to_preds(self, output, batch, sample='ignored'): card_names = [rank + suit for rank in cards_env.RANKS for suit in cards_env.SUITS] num_cards = len(card_names) card_loc_rows, p2_loc = output assert card_loc_rows.shape[1:] == (num_cards, NUM_LOCS + 2), card_loc_rows.shape assert p2_loc.shape[1:] == (NUM_LOCS,), p2_loc.shape with gzip.open(config.get_file_path('dists.b64.gz'), 'a') as outfile: for row in summarize_output(card_loc_rows, p2_loc): outfile.write(row) outfile.write('\n') card_loc_indices = card_loc_rows.argmax(axis=2) p2_loc_indices = p2_loc.argmax(axis=1) preds = [] for i, inst in enumerate(batch): cards_to_loc_pred = { name: loc_index_to_coord(idx, card=True) for name, idx in zip(card_names, card_loc_indices[i]) } p2_loc_pred = loc_index_to_coord(p2_loc_indices[i]) state = world.build_world(inst.input['walls'], cards_to_loc_pred, p2_loc=p2_loc_pred) preds.append(state.__dict__) return preds
def train(self, training_instances, validation_instances=None, metrics=None): id_tag = (self.id + ': ') if self.id else '' if self.options.verbosity >= 2: print(id_tag + 'Training priors') self.train_priors(training_instances, listener_data=self.options.listener) self.dataset = training_instances xs, ys = self._data_to_arrays(training_instances, init_vectorizer=True) self._build_model() if self.options.verbosity >= 2: print(id_tag + 'Training conditional model') summary_path = config.get_file_path('losses.tfevents') if summary_path: writer = summary.SummaryWriter(summary_path) else: writer = None progress.start_task('Iteration', self.options.train_iters) for iteration in range(self.options.train_iters): progress.progress(iteration) self.model.fit(xs, ys, batch_size=self.options.batch_size, num_epochs=self.options.train_epochs, summary_writer=writer, step=iteration * self.options.train_epochs) validation_results = self.validate(validation_instances, metrics, iteration=iteration) if writer is not None: step = (iteration + 1) * self.options.train_epochs self.on_iter_end(step, writer) for key, value in validation_results.iteritems(): tag = 'val/' + key.split('.', 1)[1].replace('.', '/') writer.log_scalar(step, tag, value) writer.flush() progress.end_task()
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = color_instances.SOURCES[options.data_source].train_data( listener=options.listener)[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = color_instances.SOURCES[options.data_source].test_data( options.listener)[:options.test_size] learner = learners.new(options.learner) m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic ] if options.listener and not isinstance(test_data[0].output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(test_data[0].output, (tuple, list)): m.append(metrics.prec1) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) else: m.append(metrics.accuracy) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='dev', write_data=options.output_test_data) output.output_results(test_results, 'dev')
def train(self, training_instances, validation_instances=None, metrics=None, keep_params=False): id_tag = (self.id + ': ') if self.id else '' if self.options.verbosity >= 2: print(id_tag + 'Training priors') self.train_priors(training_instances, listener_data=self.options.listener) self.dataset = training_instances xs, ys = self._data_to_arrays(training_instances, init_vectorizer=not hasattr(self, 'model')) if not hasattr(self, 'model') or not keep_params: if self.options.verbosity >= 2: print(id_tag + 'Building model') if keep_params: warnings.warn("keep_params was passed, but the model hasn't been built; " "initializing all parameters.") self._build_model() else: if not hasattr(self.options, 'reset_optimizer_vars') or \ self.options.reset_optimizer_vars: if self.options.verbosity >= 2: print(id_tag + 'Resetting optimizer') self.model.reset_optimizer() if self.options.verbosity >= 2: print(id_tag + 'Training conditional model') if hasattr(self, 'writer'): writer = self.writer else: summary_path = config.get_file_path('losses.tfevents') if summary_path: writer = summary.SummaryWriter(summary_path) else: writer = None self.writer = writer if not hasattr(self, 'step_base'): self.step_base = 0 progress.start_task('Iteration', self.options.train_iters) for iteration in range(self.options.train_iters): progress.progress(iteration) self.model.fit(xs, ys, batch_size=self.options.batch_size, num_epochs=self.options.train_epochs, summary_writer=writer, step=self.step_base + iteration * self.options.train_epochs) validation_results = self.validate(validation_instances, metrics, iteration=iteration) if writer is not None: step = self.step_base + (iteration + 1) * self.options.train_epochs self.on_iter_end(step, writer) for key, value in validation_results.iteritems(): tag = 'val/' + key.split('.', 1)[1].replace('.', '/') writer.log_scalar(step, tag, value) self.step_base += self.options.train_iters * self.options.train_epochs writer.flush() progress.end_task()
def output_to_preds(self, output, batch, sample='ignored'): assert output.shape[1:] == (NUM_LOCS,), output.shape with gzip.open(config.get_file_path('dists.b64.gz'), 'a') as outfile: for row in summarize_output(np.zeros((output.shape[0], 52, NUM_LOCS + 2)), output): outfile.write(row) outfile.write('\n') p2_loc_indices = output.argmax(axis=1) return [loc_index_to_coord(p2_loc_indices[i]) for i, inst in enumerate(batch)]
def output_html_dists(): options = config.options(read=True) with gzip.open(config.get_file_path('dists.b64.gz'), 'r') as infile: rows = list(infile) with config.open('dists.js', 'w') as outfile: write_json_dists(rows, outfile) write_json_ents(rows, outfile) with config.open('data.eval.jsons', 'r') as infile: insts = list(infile) with config.open('predictions.eval.jsons', 'r') as infile: preds = list(infile) try: with config.open('samples.eval.jsons', 'r') as infile: samples = list(infile) except IOError: samples = None with config.open('insts.js', 'w') as outfile: write_json_insts(insts, preds, samples, outfile, listener=options.listener) shutil.copy('dists.html', config.get_file_path('dists.html'))
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = color_instances.SOURCES[options.data_source].train_data( listener=options.listener )[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = color_instances.SOURCES[options.data_source].test_data( options.listener )[:options.test_size] learner = learners.new(options.learner) m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic] if options.listener and not isinstance(test_data[0].output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(test_data[0].output, (tuple, list)): m.append(metrics.prec1) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) else: m.append(metrics.accuracy) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='dev', write_data=options.output_test_data) output.output_results(test_results, 'dev')
def __init__(self, module, loss, optimizer, optimizer_params, vectorizer): self.get_options() self.module = cu(module) self.loss = cu(loss) self.optimizer_class = optimizer self.optimizer_params = optimizer_params self.build_optimizer() self.vectorizer = vectorizer summary_path = config.get_file_path('monitoring.tfevents') if summary_path: self.summary_writer = summary.SummaryWriter(summary_path) else: self.summary_writer = None self.step = 0 self.last_timestamp = datetime.datetime.now()
def output_to_preds(self, output, batch, sample=False): _, predictions, samples = output indices = samples if sample else predictions p2_loc_arrays = -11.0 * (np.array([inst.input['walls'] for inst in batch]) + 2.0) for i, inst in enumerate(batch): p2_loc_arrays[i][inst.input['loc']] = 0.0 p2_loc_linear = p2_loc_arrays.reshape([p2_loc_arrays.shape[0], NUM_LOCS]) with gzip.open(config.get_file_path('dists.b64.gz'), 'a') as outfile: for row in summarize_output(np.zeros((p2_loc_linear.shape[0], 52, NUM_LOCS + 2)), p2_loc_linear): outfile.write(row) outfile.write('\n') return sanitize_preds(self.seq_vec.unvectorize_all(indices))
def write_grids(self, grid, l0, s1, l2, final): batch_size, num_sample_sets, context_len, num_alt_utts = l0.shape stride = [len(grid) / batch_size, len(grid) / (batch_size * num_sample_sets)] with gzip.open(config.get_file_path('grids.0.jsons.gz'), 'a') as outfile: for i in range(batch_size): final_dist = final[i, :].tolist() sample_sets = [] for ss in range(num_sample_sets): loc = i * stride[0] + ss * stride[1] utts = [inst.input for inst in grid[loc:loc + num_alt_utts]] l0_grid = l0[i, ss, :, :].tolist() s1_grid = s1[i, ss, :, :].tolist() l2_grid = l2[i, ss, :, :].tolist() sample_sets.append({'utts': utts, 'L0': l0_grid, 'S1': s1_grid, 'L2': l2_grid}) json.dump({'final': final_dist, 'sets': sample_sets}, outfile) outfile.write('\n')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_size = options.train_size if options.train_size >= 0 else None test_size = options.test_size if options.test_size >= 0 else None train_data = datasets.SOURCES[options.data_source].train_data()[:train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[options.data_source].test_data()[:test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: learner.load(options.load) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model') if model_path: learner.dump(model_path) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') if options.output_train_samples: samples = learner.predict(train_data, random=True) config.dump(samples, 'samples.train.jsons', lines=True) test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval') if options.output_test_samples: samples = learner.predict(test_data, random=True) config.dump(samples, 'samples.eval.jsons', lines=True)
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = datasets.SOURCES[ options.data_source].train_data()[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[ options.data_source].test_data()[:options.test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval')
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): self.build_graph() env = gym.make(cards_env.register()) self.init_params() if self.options.verbosity >= 1: progress.start_task('Epoch', self.options.pg_train_epochs) for epoch in range(self.options.pg_train_epochs): if self.options.verbosity >= 1: progress.progress(epoch) batches = iterators.iter_batches(training_instances, self.options.pg_batch_size) num_batches = (len(training_instances) - 1) // self.options.pg_batch_size + 1 if self.options.verbosity >= 1: progress.start_task('Batch', num_batches) try: for batch_num, batch in enumerate(batches): if self.options.verbosity >= 1: progress.progress(batch_num) step = epoch * num_batches + batch_num self.train_one_batch(list(batch), env, t=step) if step % 10 == 0: check_prefix = config.get_file_path('checkpoint') self.saver.save(self.session, check_prefix, global_step=step) except KeyboardInterrupt: self.summary_writer.flush() raise if self.options.verbosity >= 1: progress.end_task() if self.options.verbosity >= 1: progress.end_task()
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = datasets.SOURCES[options.data_source].train_data()[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[options.data_source].test_data()[:options.test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval')
def patch(model): def __quickpickle_setstate__(self, state): self.__dict__ = state def __quickpickle_getstate__(self): state = dict(self.__dict__) del state['__getstate__'] del state['__setstate__'] state['quickpickle'] = True return state def __quickpickle_numparams__(self): return self.quickpickle_numparams model.__getstate__ = types.MethodType(__quickpickle_getstate__, model) model.__setstate__ = types.MethodType(__quickpickle_setstate__, model) model.quickpickle_numparams = model.num_params if __name__ == '__main__': sys.setrecursionlimit(50000) options = config.options(read=True) if options.load: modelfile = options.load else: modelfile = config.get_file_path('model.p') with open(modelfile, 'rb') as infile, config.open('quickpickle.p', 'wb') as outfile: model = pickle.load(infile) patch(model) pickle.dump(model, outfile)
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_datasets = [] validation_datasets = [] test_datasets = [] if len(options.train_size) == 1: options.train_size = options.train_size * len(options.data_source) else: assert len(options.train_size) == len(options.data_source) if len(options.validation_size) == 1: options.validation_size = options.validation_size * len( options.data_source) else: assert len(options.validation_size) == len(options.data_source) if len(options.test_size) == 1: options.test_size = options.test_size * len(options.data_source) else: assert len(options.test_size) == len(options.data_source) for source, train_size, validation_size, test_size in zip( options.data_source, options.train_size, options.validation_size, options.test_size): train_insts = color_instances.SOURCES[source].train_data( listener=options.listener)[:train_size] if validation_size: assert validation_size < len(train_insts), \ ('No training data after validation split! (%d <= %d)' % (len(train_insts), validation_size)) validation_insts = train_insts[-validation_size:] validation_datasets.append(validation_insts) train_insts = train_insts[:-validation_size] else: validation_datasets.append(None) train_datasets.append(train_insts) test_insts = color_instances.SOURCES[source].test_data( options.listener)[:test_size] test_datasets.append(test_insts) learner = learners.new(options.learner) m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic ] example_inst = get_example_inst(test_datasets, train_datasets) if options.listener and not isinstance(example_inst.output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(example_inst.output, (tuple, list)): m.append(metrics.prec1) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([ metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro ]) else: m.append(metrics.accuracy) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([ metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro ]) multi_train = (len(options.data_source) > 1) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) train_results = None else: if hasattr(learner, '_data_to_arrays'): # XXX: is there a better way to ensure that the vocabulary is defined # before training starts? for train_insts in train_datasets[1:]: learner._data_to_arrays(train_insts, init_vectorizer=True) for i, (source, train_insts, validation_insts) in enumerate( zip(options.data_source, train_datasets, validation_datasets)): if not train_insts: continue if i > 0: learner.train(train_insts, validation_insts, metrics=m, keep_params=True) else: learner.train(train_insts, validation_insts, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) if multi_train: split_id = 'train_' + source else: split_id = 'train' train_results = evaluate.evaluate( learner, train_insts, metrics=m, split_id=split_id, write_data=options.output_train_data) if options.verbosity != 0: output.output_results(train_results, split_id) for i, (source, test_insts) in enumerate(zip(options.data_source, test_datasets)): if not test_insts: continue if multi_train: split_id = 'eval_' + source else: split_id = 'eval' test_results = evaluate.evaluate(learner, test_insts, metrics=m, split_id=split_id, write_data=options.output_test_data) if options.verbosity != 0: output.output_results(test_results, split_id) return train_results, test_results
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_datasets = [] validation_datasets = [] test_datasets = [] if len(options.train_size) == 1: options.train_size = options.train_size * len(options.data_source) else: assert len(options.train_size) == len(options.data_source) if len(options.validation_size) == 1: options.validation_size = options.validation_size * len(options.data_source) else: assert len(options.validation_size) == len(options.data_source) if len(options.test_size) == 1: options.test_size = options.test_size * len(options.data_source) else: assert len(options.test_size) == len(options.data_source) for source, train_size, validation_size, test_size in zip(options.data_source, options.train_size, options.validation_size, options.test_size): train_insts = color_instances.SOURCES[source].train_data( listener=options.listener )[:train_size] if validation_size: assert validation_size < len(train_insts), \ ('No training data after validation split! (%d <= %d)' % (len(train_insts), validation_size)) validation_insts = train_insts[-validation_size:] validation_datasets.append(validation_insts) train_insts = train_insts[:-validation_size] else: validation_datasets.append(None) train_datasets.append(train_insts) test_insts = color_instances.SOURCES[source].test_data( options.listener )[:test_size] test_datasets.append(test_insts) learner = learners.new(options.learner) m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic] example_inst = get_example_inst(test_datasets, train_datasets) if options.listener and not isinstance(example_inst.output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(example_inst.output, (tuple, list)): m.append(metrics.prec1) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro]) else: m.append(metrics.accuracy) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro]) multi_train = (len(options.data_source) > 1) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) train_results = None else: if hasattr(learner, '_data_to_arrays'): # XXX: is there a better way to ensure that the vocabulary is defined # before training starts? for train_insts in train_datasets[1:]: learner._data_to_arrays(train_insts, init_vectorizer=True) for i, (source, train_insts, validation_insts) in enumerate(zip(options.data_source, train_datasets, validation_datasets)): if not train_insts: continue if i > 0: learner.train(train_insts, validation_insts, metrics=m, keep_params=True) else: learner.train(train_insts, validation_insts, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) if multi_train: split_id = 'train_' + source else: split_id = 'train' train_results = evaluate.evaluate(learner, train_insts, metrics=m, split_id=split_id, write_data=options.output_train_data) if options.verbosity != 0: output.output_results(train_results, split_id) for i, (source, test_insts) in enumerate(zip(options.data_source, test_datasets)): if not test_insts: continue if multi_train: split_id = 'eval_' + source else: split_id = 'eval' test_results = evaluate.evaluate(learner, test_insts, metrics=m, split_id=split_id, write_data=options.output_test_data) if options.verbosity != 0: output.output_results(test_results, split_id) return train_results, test_results
def main(): options = config.options() with thutils.device_context(options.device): progress.set_resolution( datetime.timedelta(seconds=options.progress_tick)) SG = iterators.SizedGenerator if not hasattr(options, 'verbosity') or options.verbosity >= 2: print('Pre-calculating dataset sizes') train_data = SG(lambda: islice(dataset(options.train_file), 0, nin(options.train_size)), length=None) if not hasattr(options, 'verbosity') or options.verbosity >= 4: print('Training set size: {}'.format(len(train_data))) validation_data = None if options.validation_file: validation_data = SG( lambda: islice(dataset(options.validation_file), 0, nin(options.validation_size)), length=None) if not hasattr(options, 'verbosity') or options.verbosity >= 4: print('Validation set size: {}'.format(len(validation_data))) eval_data = SG(lambda: islice(dataset(options.eval_file), 0, nin(options.eval_size)), length=None) if not hasattr(options, 'verbosity') or options.verbosity >= 4: print('Eval set size: {}'.format(len(eval_data))) learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate( learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data, pass_split=True) output.output_results(train_results, 'train') eval_results = evaluate.evaluate(learner, eval_data, metrics=m, split_id='eval', write_data=options.output_eval_data, pass_split=True) output.output_results(eval_results, 'eval')
def __quickpickle_setstate__(self, state): self.__dict__ = state def __quickpickle_getstate__(self): state = dict(self.__dict__) del state['__getstate__'] del state['__setstate__'] state['quickpickle'] = True return state def __quickpickle_numparams__(self): return self.quickpickle_numparams model.__getstate__ = types.MethodType(__quickpickle_getstate__, model) model.__setstate__ = types.MethodType(__quickpickle_setstate__, model) model.quickpickle_numparams = model.num_params if __name__ == '__main__': sys.setrecursionlimit(50000) options = config.options(read=True) if options.load: modelfile = options.load else: modelfile = config.get_file_path('model.p') with open(modelfile, 'rb') as infile, config.open('quickpickle.p', 'wb') as outfile: model = pickle.load(infile) patch(model) pickle.dump(model, outfile)