def main(opt): """ Spawns 1 process per GPU """ nb_gpu = len(opt.gpuid) logger = get_logger(opt.log_file) mp = torch.multiprocessing.get_context('spawn') # Create a thread to listen for errors in the child processes. error_queue = mp.SimpleQueue() error_handler = ErrorHandler(error_queue) # Train with multiprocessing. procs = [] for i in range(nb_gpu): opt.gpu_rank = i opt.device_id = i procs.append( mp.Process(target=run, args=( opt, error_queue, ), daemon=True)) procs[i].start() logger.info(" Starting process pid: %d " % procs[i].pid) error_handler.add_child(procs[i].pid) for p in procs: p.join()
def init_model(model, use_gpu=False): opt = FakeOpt(model=model, gpu=0 if use_gpu else -1) translator = build_translator(opt, report_score=False, logger=get_logger(), use_output=False) return translator
def _modify(corpus=None, neurons=None, values=None, model=None): opt = FakeOpt(model=model) translator = build_translator(opt, report_score=False, logger=get_logger(), use_output=False) sources, toggles = zip(*corpus) print('Originally toggles is the following:') print(toggles[:10]) _, toggles = zip(*toggles) print(sources[:10], toggles[:10]) def intervene(layer_data, sentence_index, index): for (layer, neuron), value in zip(neurons, values): if index == layer: for i in toggles[sentence_index]: tqdm.write('Successfully modifying %d %d %d %f' % (i, layer, neuron, value)) layer_data[i][0][neuron] = value return layer_data modified = [] for i, source in enumerate(tqdm(sources)): stream = io.StringIO() # Logging: tqdm.write('Source: %s' % ' '.join(source)) tqdm.write('Target: %s' % ' '.join(source[j] for j in toggles[i])) translator.translate(src_data_iter=[' '.join(source)], src_dir='', batch_size=1, attn_debug=False, intervention=lambda l, j: intervene(l, i, j), out_file=stream) translation = stream.getvalue() # Logging: tqdm.write('Result: %s' % translation) modified.append(translation.strip().split(' ')) return modified
def translate( sentences=None, modifications=None, model=None): global translator global translator_model_name if model != translator_model_name: opt = FakeOpt(model=model) translator = build_translator(opt, report_score=False, logger=get_logger(), use_output=False) translator_model_name = model def intervene(layer_data, sentence_index, index): for token, layer, neuron, value in modifications[sentence_index]: if layer == index: print('Succesfully flipping %d %d %d %f' % (token, layer, neuron, value)) layer_data[token][0][neuron] = value return layer_data modified = [] dumps = [] # NB. Some of this is kind of hacky with passing streams and things # and also returning them; it may be good to go back later and try to dedupe # all the plumbing. Everything should presently work though. for i, source in enumerate(sentences): stream = io.StringIO() layer_dump, scores, predictions = translator.translate(src_data_iter=[source], src_dir='', batch_size=1, attn_debug=False, intervention=lambda l, j: intervene(l, i, j), out_file=stream) translation = stream.getvalue() sys.stdout.flush() modified.append(translation) dumps.append(layer_dump) return modified, dumps
def preprocess_main(opt): logger = get_logger(opt.log_file) src_nfeats = inputters.get_num_features(opt.data_type, opt.train_src, 'src') tgt_nfeats = inputters.get_num_features(opt.data_type, opt.train_tgt, 'tgt') logger.info(" * number of source features: %d." % src_nfeats) logger.info(" * number of target features: %d." % tgt_nfeats) logger.info("Building `Fields` object...") fields = inputters.get_fields(opt.data_type, src_nfeats, tgt_nfeats) logger.info("Building & saving training data...") train_dataset_files = build_save_dataset('train', fields, opt, logger) logger.info("Building & saving vocabulary...") build_save_vocab(train_dataset_files, fields, opt, logger) logger.info("Building & saving validation data...") build_save_dataset('valid', fields, opt, logger)
def __init__(self, opt, model_id, tokenizer_opt=None, load=False, timeout=-1, on_timeout="to_cpu", model_root="./"): """ Args: opt: (dict) options for the Translator model_id: (int) model id tokenizer_opt: (dict) options for the tokenizer or None load: (bool) whether to load the model during __init__ timeout: (int) seconds before running `do_timeout` Negative values means no timeout on_timeout: (str) in ["to_cpu", "unload"] set what to do on timeout (see function `do_timeout`) model_root: (str) path to the model directory it must contain de model and tokenizer file """ self.model_root = model_root self.opt = self.parse_opt(opt) if self.opt.n_best > 1: raise ValueError("Values of n_best > 1 are not supported") self.model_id = model_id self.tokenizer_opt = tokenizer_opt self.timeout = timeout self.on_timeout = on_timeout self.unload_timer = None self.user_opt = opt self.tokenizer = None self.logger = get_logger(opt.log_file) if load: self.load()
def intervene(layer_data, layer_index): rnn_size = layer_data.shape[2] start_range = layer_index * rnn_size end_range = start_range + rnn_size neurons = [n-start_range for n in neurons_to_ablate if n >= start_range and n < end_range] layer_data[:,:,neurons] = 0 return layer_data translator.translate(src_path=opt.src, tgt_path=opt.tgt, src_dir=opt.src_dir, batch_size=opt.batch_size, attn_debug=opt.attn_debug, intervention=lambda l, i: intervene(l, i), ) if __name__ == "__main__": parser = argparse.ArgumentParser( description='ablate.py', formatter_class=argparse.ArgumentDefaultsHelpFormatter) onmt.opts.add_md_help_argument(parser) onmt.opts.translate_opts(parser) parser.add_argument('-neurons-to-ablate', dest='neurons', type=str, default="") opt = parser.parse_args() logger = get_logger(opt.log_file) main(opt)
def main(opt): logger = get_logger(opt.log_file) opt = training_opt_postprocessing(opt, logger) # Load checkpoint if we resume from a previous training. if opt.train_from: logger.info('Loading checkpoint from %s' % opt.train_from) checkpoint = torch.load(opt.train_from, map_location=lambda storage, loc: storage) model_opt = checkpoint['opt'] else: checkpoint = None model_opt = opt # Peek the fisrt dataset to determine the data_type. # (All datasets have the same data_type). first_dataset = next(lazily_load_dataset("train", opt, logger)) data_type = first_dataset.data_type # Load fields generated from preprocess phase. fields = _load_fields(first_dataset, data_type, opt, checkpoint, logger) # Report src/tgt features. src_features, tgt_features = _collect_report_features(fields) for j, feat in enumerate(src_features): logger.info(' * src feature %d size = %d' % (j, len(fields[feat].vocab))) for j, feat in enumerate(tgt_features): logger.info(' * tgt feature %d size = %d' % (j, len(fields[feat].vocab))) # Build model. model = build_model(model_opt, opt, fields, checkpoint, logger) n_params, enc, dec = _tally_parameters(model) logger.info('encoder: %d' % enc) logger.info('decoder: %d' % dec) logger.info('* number of parameters: %d' % n_params) _check_save_model_path(opt) # Build optimizer. optim = build_optim(model, opt, checkpoint) # Build model saver model_saver = build_model_saver(model_opt, opt, model, fields, optim) trainer = build_trainer(opt, model, fields, optim, data_type, logger, model_saver=model_saver) def train_iter_fct(): return build_dataset_iter(lazily_load_dataset("train", opt, logger), fields, opt) def valid_iter_fct(): return build_dataset_iter(lazily_load_dataset("valid", opt, logger), fields, opt) # Do training. trainer.train(train_iter_fct, valid_iter_fct, opt.train_steps, opt.valid_steps) if opt.tensorboard: trainer.report_manager.tensorboard_writer.close()
def forward(self, input): laplacian = input.exp() + self.eps output = input.clone() for b in range(input.size(0)): lap = laplacian[b].masked_fill( torch.eye(input.size(1)).cuda().ne(0), 0) lap = -lap + torch.diag(lap.sum(0)) # store roots on diagonal lap[0] = input[b].diag().exp() inv_laplacian = lap.inverse() factor = inv_laplacian.diag().unsqueeze(1)\ .expand_as(input[b]).transpose(0, 1) term1 = input[b].exp().mul(factor).clone() term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone() term1[:, 0] = 0 term2[0] = 0 output[b] = term1 - term2 roots_output = input[b].diag().exp().mul( inv_laplacian.transpose(0, 1)[0]) output[b] = output[b] + torch.diag(roots_output) return output if __name__ == "__main__": logger = get_logger('StructuredAttention.log') dtree = MatrixTree() q = torch.rand(1, 5, 5).cuda() marg = dtree.forward(q) logger.info(marg.sum(1))
filtered_enc_embeddings, enc_count = match_embeddings( enc_vocab, embeddings_enc, opt) filtered_dec_embeddings, dec_count = match_embeddings( dec_vocab, embeddings_dec, opt) logger.info("\nMatching: ") match_percent = [ _['match'] / (_['match'] + _['miss']) * 100 for _ in [enc_count, dec_count] ] logger.info("\t* enc: %d match, %d missing, (%.2f%%)" % (enc_count['match'], enc_count['miss'], match_percent[0])) logger.info("\t* dec: %d match, %d missing, (%.2f%%)" % (dec_count['match'], dec_count['miss'], match_percent[1])) logger.info("\nFiltered embeddings:") logger.info("\t* enc: ", filtered_enc_embeddings.size()) logger.info("\t* dec: ", filtered_dec_embeddings.size()) enc_output_file = opt.output_file + ".enc.pt" dec_output_file = opt.output_file + ".dec.pt" logger.info("\nSaving embedding as:\n\t* enc: %s\n\t* dec: %s" % (enc_output_file, dec_output_file)) torch.save(filtered_enc_embeddings, enc_output_file) torch.save(filtered_dec_embeddings, dec_output_file) logger.info("\nDone.") if __name__ == "__main__": logger = get_logger('embeddings_to_torch.log') main()
fields = onmt.inputters.load_fields_from_vocab(checkpoint['vocab']) model_opt = checkpoint['opt'] for arg in dummy_opt.__dict__: if arg not in model_opt: model_opt.__dict__[arg] = dummy_opt.__dict__[arg] model = onmt.model_builder.build_base_model(model_opt, fields, use_gpu(opt), checkpoint) encoder = model.encoder decoder = model.decoder encoder_embeddings = encoder.embeddings.word_lut.weight.data.tolist() decoder_embeddings = decoder.embeddings.word_lut.weight.data.tolist() logger.info("Writing source embeddings") write_embeddings(opt.output_dir + "/src_embeddings.txt", src_dict, encoder_embeddings) logger.info("Writing target embeddings") write_embeddings(opt.output_dir + "/tgt_embeddings.txt", tgt_dict, decoder_embeddings) logger.info('... done.') logger.info('Converting model...') if __name__ == "__main__": logger = get_logger('extract_embeddings.log') main()