def __init__(self, dnn_layer_dims, dnn_input_dim, lr_input_dim, model_type=ModelType.create_classification(), is_infer=False): ''' @dnn_layer_dims: list of integer DNN每一层的维度 @dnn_input_dim: int DNN输入层的大小 @lr_input_dim: int LR输入层大小 @is_infer: bool 是否建立预估模型 ''' self.dnn_layer_dims = dnn_layer_dims self.dnn_input_dim = dnn_input_dim self.lr_input_dim = lr_input_dim self.model_type = model_type self.is_infer = is_infer self._declare_input_layers() self.dnn = self._build_dnn_submodel_(self.dnn_layer_dims) self.lr = self._build_lr_submodel_() # 模型预测 if self.model_type.is_classification(): self.model = self._build_classification_model(self.dnn, self.lr) if self.model_type.is_regression(): self.model = self._build_regression_model(self.dnn, self.lr)
def __init__(self, dnn_layer_dims, dnn_input_dim, lr_input_dim, model_type=ModelType.create_classification(), is_infer=False): ''' @dnn_layer_dims: list of integer dims of each layer in dnn @dnn_input_dim: int size of dnn's input layer @lr_input_dim: int size of lr's input layer @is_infer: bool whether to build a infer model ''' self.dnn_layer_dims = dnn_layer_dims self.dnn_input_dim = dnn_input_dim self.lr_input_dim = lr_input_dim self.model_type = model_type self.is_infer = is_infer self._declare_input_layers() self.dnn = self._build_dnn_submodel_(self.dnn_layer_dims) self.lr = self._build_lr_submodel_() # model's prediction # TODO(superjom) rename it to prediction if self.model_type.is_classification(): self.model = self._build_classification_model(self.dnn, self.lr) if self.model_type.is_regression(): self.model = self._build_regression_model(self.dnn, self.lr)
def __init__(self, dnn_dims=[], vocab_sizes=[], model_type=ModelType.create_classification(), model_arch=ModelArch.create_rnn(), share_semantic_generator=False, class_num=2, share_embed=False, is_infer=False): """ init dssm network :param dnn_dims: list of int (dimentions of each layer in semantic vector generator.) :param vocab_sizes: 2d tuple (size of both left and right items.) :param model_type: classification :param model_arch: model architecture :param share_semantic_generator: bool (whether to share the semantic vector generator for both left and right.) :param class_num: number of categories. :param share_embed: bool (whether to share the embeddings between left and right.) :param is_infer: inference """ assert len(vocab_sizes) == 2, ( "vocab sizes specify the sizes left and right inputs, dim is 2.") assert len(dnn_dims) > 1, "more than two layers is needed." self.dnn_dims = dnn_dims self.vocab_sizes = vocab_sizes self.share_semantic_generator = share_semantic_generator self.share_embed = share_embed self.model_type = ModelType(model_type) self.model_arch = ModelArch(model_arch) self.class_num = class_num self.is_infer = is_infer logger.warning("build DSSM model with config of %s, %s" % (self.model_type, self.model_arch)) logger.info("vocabulary sizes: %s" % str(self.vocab_sizes)) _model_arch = { "rnn": self.create_rnn, "cnn": self.create_cnn, "fc": self.create_fc, } def _model_arch_creater(emb, prefix=""): sent_vec = _model_arch.get(str(model_arch))(emb, prefix) dnn = self.create_dnn(sent_vec, prefix) return dnn self.model_arch_creater = _model_arch_creater self.model_type_creater = self._build_classification_model
def train(data_path=None, model_type=ModelType.create_classification(), batch_size=100, num_passes=50, class_num=None, num_workers=1, use_gpu=False): ''' Train the DNN. ''' paddle.init(use_gpu=use_gpu, trainer_count=num_workers) # network config input_layer = paddle.layer.data(name='input_layer', type=paddle.data_type.dense_vector(feature_dim)) dnn = create_dnn(input_layer) prediction = None label = None cost = None if args.model_type.is_classification(): prediction = paddle.layer.fc(input=dnn, size=class_num, act=paddle.activation.Softmax()) label = paddle.layer.data(name='label', type=paddle.data_type.integer_value(class_num)) cost = paddle.layer.classification_cost(input=prediction, label=label) elif args.model_type.is_regression(): prediction = paddle.layer.fc(input=dnn, size=1, act=paddle.activation.Linear()) label = paddle.layer.data(name='label', type=paddle.data_type.dense_vector(1)) cost = paddle.layer.mse_cost(input=prediction, label=label) # create parameters parameters = paddle.parameters.create(cost) # create optimizer optimizer = paddle.optimizer.Momentum(momentum=0) trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prediction, label=label), parameters=parameters, update_equation=optimizer) feeding = {'input_layer': 0, 'label': 1} # event_handler to print training and testing info def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: print "Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): result = trainer.test( reader=paddle.batch(reader.test(data_path, feature_dim+1, args.model_type.is_classification()), batch_size=batch_size), feeding=feeding) print "Test %d, Cost %f, %s" % (event.pass_id, result.cost, result.metrics) model_desc = "{type}".format( type=str(args.model_type)) with open("%sdnn_%s_pass_%05d.tar" % (args.model_output_prefix, model_desc, event.pass_id), "w") as f: parameters.to_tar(f) # training trainer.train( reader=paddle.batch( paddle.reader.shuffle(reader.train(data_path, feature_dim+1, args.model_type.is_classification()), buf_size=batch_size*10), batch_size=batch_size), feeding=feeding, event_handler=event_handler, num_passes=num_passes)
def __init__(self, dnn_dims=[], vocab_sizes=[], model_type=ModelType.create_classification(), model_arch=ModelArch.create_cnn(), share_semantic_generator=False, class_num=None, share_embed=False, is_infer=False): """ :param dnn_dims: The dimention of each layer in the semantic vector generator. :type dnn_dims: list of int :param vocab_sizes: The size of left and right items. :type vocab_sizes: A list having 2 elements. :param model_type: The type of task to train the DSSM model. The value should be "rank: 0", "regression: 1" or "classification: 2". :type model_type: int :param model_arch: A value indicating the model architecture to use. :type model_arch: int :param share_semantic_generator: A flag indicating whether to share the semantic vector between the left and the right item. :type share_semantic_generator: bool :param share_embed: A floag indicating whether to share the embeddings between the left and the right item. :type share_embed: bool :param class_num: The number of categories. :type class_num: int """ assert len(vocab_sizes) == 2, ( "The vocab_sizes specifying the sizes left and right inputs. " "Its dimension should be 2.") assert len(dnn_dims) > 1, ("In the DNN model, more than two layers " "are needed.") self.dnn_dims = dnn_dims self.vocab_sizes = vocab_sizes self.share_semantic_generator = share_semantic_generator self.share_embed = share_embed self.model_type = ModelType(model_type) self.model_arch = ModelArch(model_arch) self.class_num = class_num self.is_infer = is_infer logger.warning("Build DSSM model with config of %s, %s" % (self.model_type, self.model_arch)) logger.info("The vocabulary size is : %s" % str(self.vocab_sizes)) # bind model architecture _model_arch = { "cnn": self.create_cnn, "fc": self.create_fc, "rnn": self.create_rnn, } def _model_arch_creater(emb, prefix=""): sent_vec = _model_arch.get(str(model_arch))(emb, prefix) dnn = self.create_dnn(sent_vec, prefix) return dnn self.model_arch_creater = _model_arch_creater _model_type = { "classification": self._build_classification_model, "rank": self._build_rank_model, "regression": self._build_regression_model, } print("model type: ", str(self.model_type)) self.model_type_creater = _model_type[str(self.model_type)]
def train(train_data_path=None, test_data_path=None, source_dic_path=None, target_dic_path=None, model_type=ModelType.create_classification(), model_arch=ModelArch.create_cnn(), batch_size=10, num_passes=10, share_semantic_generator=False, share_embed=False, class_num=None, num_workers=1, use_gpu=False): ''' Train the DSSM. ''' default_train_path = './data/rank/train.txt' default_test_path = './data/rank/test.txt' default_dic_path = './data/vocab.txt' if not model_type.is_rank(): default_train_path = './data/classification/train.txt' default_test_path = './data/classification/test.txt' use_default_data = not train_data_path if use_default_data: train_data_path = default_train_path test_data_path = default_test_path source_dic_path = default_dic_path target_dic_path = default_dic_path dataset = reader.Dataset( train_path=train_data_path, test_path=test_data_path, source_dic_path=source_dic_path, target_dic_path=target_dic_path, model_type=model_type, ) train_reader = paddle.batch(paddle.reader.shuffle(dataset.train, buf_size=1000), batch_size=batch_size) test_reader = paddle.batch(paddle.reader.shuffle(dataset.test, buf_size=1000), batch_size=batch_size) paddle.init(use_gpu=use_gpu, trainer_count=num_workers) cost, prediction, label = DSSM( dnn_dims=layer_dims, vocab_sizes=[ len(load_dic(path)) for path in [source_dic_path, target_dic_path] ], model_type=model_type, model_arch=model_arch, share_semantic_generator=share_semantic_generator, class_num=class_num, share_embed=share_embed)() parameters = paddle.parameters.create(cost) adam_optimizer = paddle.optimizer.Adam( learning_rate=1e-3, regularization=paddle.optimizer.L2Regularization(rate=1e-3), model_average=paddle.optimizer.ModelAverage(average_window=0.5)) trainer = paddle.trainer.SGD( cost=cost, extra_layers=paddle.evaluator.auc(input=prediction, label=label) if not model_type.is_rank() else None, parameters=parameters, update_equation=adam_optimizer) feeding = {} if model_type.is_classification() or model_type.is_regression(): feeding = {'source_input': 0, 'target_input': 1, 'label_input': 2} else: feeding = { 'source_input': 0, 'left_target_input': 1, 'right_target_input': 2, 'label_input': 3 } def _event_handler(event): ''' Define batch handler ''' if isinstance(event, paddle.event.EndIteration): # output train log if event.batch_id % args.num_batches_to_log == 0: logger.info( "Pass %d, Batch %d, Cost %f, %s" % (event.pass_id, event.batch_id, event.cost, event.metrics)) # test model if event.batch_id > 0 and event.batch_id % args.num_batches_to_test == 0: if test_reader is not None: if model_type.is_classification(): result = trainer.test(reader=test_reader, feeding=feeding) logger.info("Test at Pass %d, %s" % (event.pass_id, result.metrics)) else: result = None # save model if event.batch_id > 0 and event.batch_id % args.num_batches_to_save_model == 0: model_desc = "{type}_{arch}".format(type=str(args.model_type), arch=str(args.model_arch)) with open( "%sdssm_%s_pass_%05d.tar" % (args.model_output_prefix, model_desc, event.pass_id), "w") as f: parameters.to_tar(f) trainer.train(reader=train_reader, event_handler=_event_handler, feeding=feeding, num_passes=num_passes) logger.info("Training has finished.")
def __init__(self, dnn_dims=[], vocab_sizes=[], model_type=ModelType.create_classification(), model_arch=ModelArch.create_cnn(), share_semantic_generator=False, class_num=None, share_embed=False, is_infer=False): ''' @dnn_dims: list of int dimentions of each layer in semantic vector generator. @vocab_sizes: 2-d tuple size of both left and right items. @model_type: int type of task, should be 'rank: 0', 'regression: 1' or 'classification: 2' @model_arch: int model architecture @share_semantic_generator: bool whether to share the semantic vector generator for both left and right. @share_embed: bool whether to share the embeddings between left and right. @class_num: int number of categories. ''' assert len( vocab_sizes ) == 2, "vocab_sizes specify the sizes left and right inputs, and dim should be 2." assert len(dnn_dims) > 1, "more than two layers is needed." self.dnn_dims = dnn_dims self.vocab_sizes = vocab_sizes self.share_semantic_generator = share_semantic_generator self.share_embed = share_embed self.model_type = ModelType(model_type) self.model_arch = ModelArch(model_arch) self.class_num = class_num self.is_infer = is_infer logger.warning("build DSSM model with config of %s, %s" % (self.model_type, self.model_arch)) logger.info("vocabulary sizes: %s" % str(self.vocab_sizes)) # bind model architecture _model_arch = { 'cnn': self.create_cnn, 'fc': self.create_fc, 'rnn': self.create_rnn, } def _model_arch_creater(emb, prefix=''): sent_vec = _model_arch.get(str(model_arch))(emb, prefix) dnn = self.create_dnn(sent_vec, prefix) return dnn self.model_arch_creater = _model_arch_creater # build model type _model_type = { 'classification': self._build_classification_model, 'rank': self._build_rank_model, 'regression': self._build_regression_model, } print 'model type: ', str(self.model_type) self.model_type_creater = _model_type[str(self.model_type)]