def __init__(self, train_path, test_path, source_dic_path, target_dic_path, model_type): self.train_path = train_path self.test_path = test_path self.source_dic_path = source_dic_path self.target_dic_path = target_dic_path self.model_type = ModelType(model_type) self.source_dic = load_dic(self.source_dic_path) self.target_dic = load_dic(self.target_dic_path) _record_reader = { ModelType.CLASSIFICATION_MODE: self._read_classification_record, ModelType.REGRESSION_MODE: self._read_regression_record, ModelType.RANK_MODE: self._read_rank_record, } assert isinstance(model_type, ModelType) self.record_reader = _record_reader[model_type.mode] self.is_infer = False self.train_data_csv = "/home/kesci/input/qichedashi/train_set.csv" self.dev_data_csv = "/home/kesci/input/qichedashi/final_round_dev_set.csv" self.test_data_csv = "/home/kesci/input/qichedashi/final_round_test_set.csv" self.NEG = 3 self.train_samples = 200000
def __init__(self, train_paths, test_paths, source_dic_path, target_dic_path): self.train_paths = train_paths self.test_paths = test_paths self.source_dic_path = source_dic_path self.target_dic_path = target_dic_path self.source_dic = load_dic(self.source_dic_path) self.target_dic = load_dic(self.target_dic_path) self.record_reader = self._read_classification_record self.is_infer = False
def __init__(self, model_path): logger.info("create DSSM model") self.source_dic_path = config.config["source_dic_path"] self.target_dic_path = config.config["target_dic_path"] dnn_dims = config.config["dnn_dims"] layer_dims = [int(i) for i in dnn_dims.split(',')] model_arch = ModelArch(config.config["model_arch"]) share_semantic_generator = config.config[ "share_network_between_source_target"] share_embed = config.config["share_embed"] class_num = config.config["class_num"] prediction = DSSM( dnn_dims=layer_dims, vocab_sizes=[ len(load_dic(path)) for path in [self.source_dic_path, self.target_dic_path] ], model_arch=model_arch, share_semantic_generator=share_semantic_generator, class_num=class_num, share_embed=share_embed, is_infer=True)() # load parameter logger.info("load model parameters from %s " % model_path) self.parameters = paddle.parameters.Parameters.from_tar( open(model_path, "r")) self.inferer = paddle.inference.Inference(output_layer=prediction, parameters=self.parameters)
def __init__(self, train_path, test_path, source_dic_path, target_dic_path, model_type): self.train_path = train_path self.test_path = test_path self.source_dic_path = source_dic_path self.target_dic_path = target_dic_path self.model_type = ModelType(model_type) self.source_dic = load_dic(self.source_dic_path) self.target_dic = load_dic(self.target_dic_path) _record_reader = { ModelType.CLASSIFICATION_MODE: self._read_classification_record, ModelType.REGRESSION_MODE: self._read_regression_record, ModelType.RANK_MODE: self._read_rank_record, } assert isinstance(model_type, ModelType) self.record_reader = _record_reader[model_type.mode] self.is_infer = False
def __init__(self, param_path): prediction = DSSM( dnn_dims=layer_dims, vocab_sizes=[ len(load_dic(path)) for path in [args.source_dic_path, args.target_dic_path] ], model_type=args.model_type, model_arch=args.model_arch, share_semantic_generator=args.share_network_between_source_target, class_num=args.class_num, share_embed=args.share_embed, is_infer=True)() # load parameter logger.info("Load the trained model from %s." % param_path) self.parameters = paddle.parameters.Parameters.from_tar( open(param_path, "r")) self.inferer = paddle.inference.Inference(output_layer=prediction, parameters=self.parameters)
def train(train_data_path=None, test_data_path=None, source_dic_path=None, target_dic_path=None, model_type=ModelType.create_classification(), model_arch=ModelArch.create_cnn(), batch_size=10, num_passes=10, share_semantic_generator=False, share_embed=False, class_num=None, num_workers=1, use_gpu=False): ''' Train the DSSM. ''' default_train_path = './data/rank/train.txt' default_test_path = './data/rank/test.txt' default_dic_path = './data/vocab.txt' if not model_type.is_rank(): default_train_path = './data/classification/train.txt' default_test_path = './data/classification/test.txt' use_default_data = not train_data_path if use_default_data: train_data_path = default_train_path test_data_path = default_test_path source_dic_path = default_dic_path target_dic_path = default_dic_path dataset = reader.Dataset( train_path=train_data_path, test_path=test_data_path, source_dic_path=source_dic_path, target_dic_path=target_dic_path, model_type=model_type, ) train_reader = paddle.batch(paddle.reader.shuffle(dataset.train, buf_size=1000), batch_size=batch_size) test_reader = paddle.batch(paddle.reader.shuffle(dataset.test, buf_size=1000), batch_size=batch_size) paddle.init(use_gpu=use_gpu, trainer_count=num_workers) cost, prediction, label = DSSM( dnn_dims=layer_dims, vocab_sizes=[ len(load_dic(path)) for path in [source_dic_path, target_dic_path] ], model_type=model_type, model_arch=model_arch, share_semantic_generator=share_semantic_generator, class_num=class_num, share_embed=share_embed)() parameters = paddle.parameters.create(cost) adam_optimizer = paddle.optimizer.Adam( learning_rate=1e-3, regularization=paddle.optimizer.L2Regularization(rate=1e-3), model_average=paddle.optimizer.ModelAverage(average_window=0.5)) trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prediction, label=label) if not model_type.is_rank() else None, parameters=parameters, update_equation=adam_optimizer) feeding = {} if model_type.is_classification() or model_type.is_regression(): feeding = {'source_input': 0, 'target_input': 1, 'label_input': 2} else: feeding = { 'source_input': 0, 'left_target_input': 1, 'right_target_input': 2, 'label_input': 3 } def _event_handler(event): ''' Define batch handler ''' if isinstance(event, paddle.event.EndIteration): # output train log if event.batch_id % args.num_batches_to_log == 0: logger.info( "Pass %d, Batch %d, Cost %f, %s" % (event.pass_id, event.batch_id, event.cost, event.metrics)) # test model if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0: if test_reader is not None: if model_type.is_classification(): result = trainer.test(reader=test_reader, feeding=feeding) logger.info("Test at Pass %d, %s" % (event.pass_id, result.metrics)) else: result = None # save model if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0: model_desc = "{type}_{arch}".format(type=str(args.model_type), arch=str(args.model_arch)) with open( "%sdssm_%s_pass_%05d.tar" % (args.model_output_prefix, model_desc, event.pass_id), "w") as f: parameters.to_tar(f) trainer.train(reader=train_reader, event_handler=_event_handler, feeding=feeding, num_passes=num_passes) logger.info("Training has finished.")
def train(train_data_paths=None, test_data_paths=None, source_dic_path=None, target_dic_path=None, model_arch=ModelArch.create_rnn(), batch_size=10, num_passes=10, share_semantic_generator=False, share_embed=False, class_num=2, num_workers=1, use_gpu=False): """ train DSSM """ default_train_paths = ["./data/classification/train/right.txt", "./data/classification/train/wrong.txt"] default_test_paths = ["./data/classification/test/right.txt", "./data/classification/test/wrong.txt"] default_dic_path = "./data/vocab.txt" layer_dims = [int(i) for i in config.config['dnn_dims'].split(',')] use_default_data = not train_data_paths if use_default_data: train_data_paths = default_train_paths test_data_paths = default_test_paths source_dic_path = default_dic_path target_dic_path = default_dic_path dataset = reader.Dataset( train_paths=train_data_paths, test_paths=test_data_paths, source_dic_path=source_dic_path, target_dic_path=target_dic_path ) train_reader = paddle.batch(paddle.reader.shuffle(dataset.train, buf_size=1000), batch_size=batch_size) test_reader = paddle.batch(paddle.reader.shuffle(dataset.test, buf_size=1000), batch_size=batch_size) paddle.init(use_gpu=use_gpu, trainer_count=num_workers) # DSSM cost, prediction, label = DSSM( dnn_dims=layer_dims, vocab_sizes=[len(load_dic(path)) for path in [source_dic_path, target_dic_path]], model_arch=model_arch, share_semantic_generator=share_semantic_generator, class_num=class_num, share_embed=share_embed)() parameters = paddle.parameters.create(cost) adam_optimizer = paddle.optimizer.Adam( learning_rate=1e-3, regularization=paddle.optimizer.L2Regularization(rate=1e-3), model_average=paddle.optimizer.ModelAverage(average_window=0.5)) trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prediction, label=label), parameters=parameters, update_equation=adam_optimizer) feeding = {"source_input": 0, "target_input": 1, "label_input": 2} def _event_handler(event): """ Define batch handler :param event: :return: """ if isinstance(event, paddle.event.EndIteration): # output train log if event.batch_id % config.config['num_batches_to_log'] == 0: logger.info("Pass %d, Batch %d, Cost %f, %s" % (event.pass_id, event.batch_id, event.cost, event.metrics)) # test model if event.batch_id > 0 and event.batch_id % config.config['num_batches_to_test'] == 0: if test_reader is not None: result = trainer.test(reader=test_reader, feeding=feeding) logger.info("Test at Pass %d, %s" % (event.pass_id, result.metrics)) # save model if event.batch_id > 0 and event.batch_id % config.config['num_batches_to_save_model'] == 0: model_desc = "classification_{arch}".format(arch=str(model_arch)) with open("%sdssm_%s_pass_%05d.tar" % (config.config['model_output_prefix'], model_desc, event.pass_id), "w") as f: parameters.to_tar(f) logger.info("save model: %sdssm_%s_pass_%05d.tar" % (config.config['model_output_prefix'], model_desc, event.pass_id)) # if isinstance(event, paddle.event.EndPass): # result = trainer.test(reader=test_reader, feeding=feeding) # logger.info("Test with pass %d, %s" % (event.pass_id, result.metrics)) # with open("./data/output/endpass/dssm_params_pass" + str(event.pass_id) + ".tar", "w") as f: # parameters.to_tar(f) trainer.train(reader=train_reader, event_handler=_event_handler, feeding=feeding, num_passes=num_passes) logger.info("training finish.")