def _init_pred(self, instance, infer_model_path): inst = instance if 'pred_output_path' not in inst.config: inst.config['pred_output_path'] = os.path.join( inst.config.get('save_path', '.'), inst.name) if not os.path.exists(inst.config['pred_output_path']): os.makedirs(inst.config['pred_output_path']) pred_backbone = self.Backbone(self.bb_conf, phase='pred') pred_parad = inst.Paradigm(inst.config, phase='pred', backbone_config=self.bb_conf) inst.task_layer['pred'] = pred_parad pred_joint_input_names, pred_joint_shape_and_dtypes, name_to_position = merge_input_attrs( pred_backbone.inputs_attr, inst.task_layer['pred'].inputs_attrs['reader'], insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) pred_prog = inst.load(infer_model_path) pred_prog = fluid.CompiledProgram(pred_prog).with_data_parallel() if inst.reader['pred'] is None: pred_reader = inst.Reader(inst.config, phase='pred') inst.reader['pred'] = pred_reader return pred_prog
def build_predict_forward(self, pred_backbone, pred_head): """ Build computation graph for evaluation and prediction. Arguments: - pred_backbone: a Backbone object with phase == 'predict'. For evaluating model during training, the predict backbone should keep the same with train backbone. - pred_head: a Head object with phase == 'predict'. For evaluating model during training, the predict head should keep the same with train head. Return: - output_vars: dict type. Each value is a computational graph variable(node) argumented by pred_head outputs_attr. """ self._pred_head = pred_head self._pred_backbone = pred_backbone # self._pred_reader = self._reader.clone(phase='pred') pred_task_attr_from_reader = helper.encode_inputs(self._pred_head.inputs_attrs['reader'], self.name) # pred_task_attr_from_reader = self._pred_head.inputs_attrs['reader'] # _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred') # _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred') # _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred') # _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name+'_backbone') pred_input_names, pred_shape_and_dtypes, pred_name_to_position = reader_helper.merge_input_attrs(pred_backbone.inputs_attr, pred_task_attr_from_reader, insert_taskid=False) pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_input_names, pred_shape_and_dtypes)] self._pred_shape_and_dtypes = pred_shape_and_dtypes self._pred_name_to_position = pred_name_to_position pred_prog = fluid.Program() self._pred_prog = pred_prog pred_init_prog = fluid.Program() self._pred_init_prog = pred_init_prog with fluid.program_guard(pred_prog, pred_init_prog): pred_net_inputs = reader_helper.create_net_inputs(pred_input_attrs) # pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_') pred_bb_output_vars = pred_backbone.build(pred_net_inputs) self._pred_net_inputs = pred_net_inputs # prepare predict vars for saving inference model with fluid.program_guard(pred_prog, pred_init_prog): cur_inputs = helper.decode_inputs(pred_net_inputs, self.name) # self.pred_input = cur_inputs self._pred_input_name_list, self._pred_input_varname_list = \ zip(*[[k, v.name] for k,v in cur_inputs.items()]) pred_task_inputs = {'backbone': pred_bb_output_vars, 'reader': cur_inputs} scope = self.name + '.' with fluid.unique_name.guard(scope): output_vars = self._build_head(pred_task_inputs, phase='predict', scope=scope) if output_vars is not None: self._pred_fetch_name_list, self._pred_fetch_list = zip(*output_vars.items()) else: self._pred_fetch_name_list = [] self._pred_fetch_var_list = [] if not self._multi_task: self._init_exe_prog(for_train=False) self._exe.run(self._pred_init_prog) return output_vars
def _init_train(self): instances = self.instances Backbone = self.Backbone bb_conf = self.bb_conf bb_name = self.bb_name dev_count = self.dev_count num_instances = len(instances) mrs = self.mrs # set first_target/main task instance main_inst = None for inst in instances: if inst.is_target: main_inst = inst inst.is_first_target = True break main_conf = main_inst.config if not os.path.exists(main_conf['save_path']): os.makedirs(main_conf['save_path']) os.makedirs(os.path.join(main_conf['save_path'], 'ckpt')) # prepare backbone train_backbone = Backbone(bb_conf, phase='train') pred_backbone = Backbone(bb_conf, phase='pred') # create reader, task # then check i/o across reader, backbone and task_layer task_attrs = [] pred_task_attrs = [] for inst in instances: train_reader = inst.Reader(inst.config, phase='train') inst.reader['train'] = train_reader train_parad = inst.Paradigm(inst.config, phase='train', backbone_config=bb_conf) inst.task_layer['train'] = train_parad task_attr_from_reader = _encode_inputs(train_parad.inputs_attrs['reader'], inst.name) task_attrs.append(task_attr_from_reader) _check_io(train_backbone.inputs_attr, train_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.train') _check_io(train_parad.inputs_attrs['reader'], train_reader.outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train') _check_io(train_parad.inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name+'_backbone') if inst.is_target: if 'pred_file' not in inst.config: inst.config['pred_file'] = '' pred_reader = inst.Reader(inst.config, phase='pred') pred_parad = inst.Paradigm(inst.config, phase='pred', backbone_config=bb_conf) inst.task_layer['pred'] = pred_parad task_attr_from_reader = _encode_inputs(pred_parad.inputs_attrs['reader'], inst.name) pred_task_attrs.append(task_attr_from_reader) _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred') _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred') _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name+'_backbone') # merge reader input attrs from backbone and task_instances joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs) pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] if DEBUG: print('----- for debug -----') print('joint input names:') print(joint_input_names) print('joint input shape and dtypes:') print(joint_shape_and_dtypes) # load data for inst in instances: print(inst.name+": preparing data...", end='') inst.reader['train'].load_data() print('ok!') # merge dataset iterators and create net input vars iterators = [] prefixes = [] mrs = [] for inst in instances: iterators.append(inst.reader['train'].iterator()) prefixes.append(inst.name) mrs.append(inst.mix_ratio) joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, return_type='dict') self._joint_iterator_fn = joint_iterator_fn input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)] pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)] # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3) net_inputs = create_net_inputs(input_attrs, async=False) self._net_inputs = net_inputs # build backbone and task layers train_prog = fluid.default_main_program() train_init_prog = fluid.default_startup_program() bb_output_vars = train_backbone.build(net_inputs, scope_name='__paddlepalm_') assert sorted(bb_output_vars.keys()) == sorted(train_backbone.outputs_attr.keys()) pred_prog = fluid.Program() pred_init_prog = fluid.Program() with fluid.program_guard(main_program = pred_prog, startup_program = pred_init_prog): pred_net_inputs = create_net_inputs(pred_input_attrs) pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_') fluid.framework.switch_main_program(train_prog) fluid.framework.switch_startup_program(train_init_prog) task_output_vars = {} for inst in instances: task_inputs = {'backbone': bb_output_vars} task_inputs_from_reader = _decode_inputs(net_inputs, inst.name) task_inputs['reader'] = task_inputs_from_reader scope = inst.task_reuse_scope + '/' with fluid.unique_name.guard(scope): output_vars = inst.build_task_layer(task_inputs, phase='train', scope=scope) output_vars = {inst.name+'/'+key: val for key, val in output_vars.items()} old = len(task_output_vars) # for debug task_output_vars.update(output_vars) assert len(task_output_vars) - old == len(output_vars) # for debug # prepare predict vars for saving inference model if inst.is_target: with fluid.program_guard(pred_prog, pred_init_prog): cur_inputs = _decode_inputs(pred_net_inputs, inst.name) inst.pred_input = cur_inputs pred_task_inputs = {'backbone': pred_bb_output_vars, 'reader': cur_inputs} scope = inst.task_reuse_scope + '/' with fluid.unique_name.guard(scope): inst.build_task_layer(pred_task_inputs, phase='pred', scope=scope) bb_fetches = {k: v.name for k,v in bb_output_vars.items()} task_fetches = {k: v.name for k,v in task_output_vars.items()} fetches = task_fetches fetches['__task_id'] = net_inputs['__task_id'].name # compute loss task_id_var = net_inputs['__task_id'] task_id_vec = fluid.one_hot(task_id_var, num_instances) losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0) loss = layers.reduce_sum(task_id_vec * losses) main_reader = main_inst.reader['train'] num_examples = main_reader.num_examples for inst in instances: max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * (num_examples // main_conf['batch_size'] // dev_count)) if inst.is_target: print('{}: expected train steps {}.'.format(inst.name, max_train_steps)) inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size'] // dev_count inst.expected_train_steps = max_train_steps global_max_train_steps = int(main_conf['num_epochs'] * sum(mrs) * (num_examples // main_conf['batch_size'] // dev_count)) print('Estimated overall train steps {}.'.format(global_max_train_steps)) if 'warmup_proportion' in main_conf and main_conf['warmup_proportion'] > 0: warmup_steps = int(global_max_train_steps * main_conf['warmup_proportion']) print('Warmup steps: '+str(warmup_steps)) else: warmup_steps = 0 # build optimizer if 'optimizer' in main_conf: optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' + main_conf['optimizer']) optimize = getattr(optim_mod, OPTIMIZE_METHOD) optimize(loss, main_conf, max_train_steps, warmup_steps, fluid.default_main_program()) loss.persistable = True if main_conf.get('use_ema', False): assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled." ema = fluid.optimizer.ExponentialMovingAverage(main_conf['ema_decay']) ema.update() # prepare for train self.train_backbone = train_backbone self.train_program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(loss_name=loss.name) self.saver_program = fluid.default_main_program() self.main_inst = main_inst self.fetches = fetches self.has_init_train = True self.has_init_pred = True self.exe.run(fluid.default_startup_program()) print("\nRandomly initialize parameters...\n")
def build_forward(self, backbone, task_head): """ Build forward computation graph for training, which usually built from input layer to loss node. Args: backbone: a Backbone object with phase == 'train', which is used to extract multi-level text features, e.g., contextual word embedding and sentence embedding. head: a Head object with phase == 'train', which is used to build task specific output layers. Return: loss_var: a Variable object. The computational graph variable(node) of loss. """ self._task_head = task_head self._backbone = backbone self._build_forward = True # create reader, task # then check i/o across reader, backbone and task_layer task_attrs = [] pred_task_attrs = [] task_attr_from_reader = helper.encode_inputs(self._task_head.inputs_attrs['reader'], self.name) # merge reader input attrs from backbone and task_instances input_names, shape_and_dtypes, name_to_position = reader_helper.merge_input_attrs(backbone.inputs_attr, task_attr_from_reader, insert_taskid=False) # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] self._shape_and_dtypes = shape_and_dtypes self._name_to_position = name_to_position self._input_names = input_names if DEBUG: print('----- for debug -----') print('joint input names:') print(joint_input_names) print('joint input shape and dtypes:') print(joint_shape_and_dtypes) input_attrs = [[i, j, k] for i, (j,k) in zip(input_names, shape_and_dtypes)] train_prog = fluid.Program() train_init_prog = fluid.Program() if not self._lock_prog: self._train_prog = train_prog self._train_init_prog = train_init_prog if not self._lock_prog: with fluid.program_guard(train_prog, train_init_prog): net_inputs = reader_helper.create_net_inputs(input_attrs, is_async=False) bb_output_vars = backbone.build(net_inputs) else: net_inputs = reader_helper.create_net_inputs(input_attrs, is_async=False) bb_output_vars = backbone.build(net_inputs) self._net_inputs = net_inputs assert sorted(bb_output_vars.keys()) == sorted(backbone.outputs_attr.keys()) task_output_vars = {} task_inputs = {'backbone': bb_output_vars} task_inputs_from_reader = helper.decode_inputs(net_inputs, self.name) task_inputs['reader'] = task_inputs_from_reader scope = self.name+'.' if not self._lock_prog: with fluid.program_guard(train_prog, train_init_prog): with fluid.unique_name.guard(scope): output_vars = self._build_head(task_inputs, phase='train', scope=scope) else: with fluid.unique_name.guard(scope): output_vars = self._build_head(task_inputs, phase='train', scope=scope) output_vars = {self.name+'.'+key: val for key, val in output_vars.items()} old = len(task_output_vars) # for debug task_output_vars.update(output_vars) assert len(task_output_vars) - old == len(output_vars) # for debug bb_fetches = {k: v.name for k,v in bb_output_vars.items()} task_fetches = {k: v.name for k,v in task_output_vars.items()} self._fetches = task_fetches self._fetch_names, self._fetch_list = zip(*self._fetches.items()) if not self._lock_prog: with fluid.program_guard(train_prog, train_init_prog): loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss']) else: loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss']) self._loss_var = loss_var if not self._multi_task: self._init_exe_prog(for_train=True) return loss_var
def build_forward(self, backbone, task_head): """ Build forward computation graph for training, which usually built from input layer to loss node. Args: backbone: a Backbone object with phase == 'train', which is used to extract multi-level text features, e.g., contextual word embedding and sentence embedding. head: a Head object with phase == 'train', which is used to build task specific output layers. Return: loss_var: a Variable object. The computational graph variable(node) of loss. """ # assert not self._multi_task, "you cannot build_forward in trainer when a train is wrapper by MultiHeadTrainer." self._task_head = task_head self._backbone = backbone # assert self._backbone is not None, "backbone is required for Trainer to build net forward to run with single task mode" self._build_forward = True # create reader, task # then check i/o across reader, backbone and task_layer task_attrs = [] pred_task_attrs = [] task_attr_from_reader = helper.encode_inputs( self._task_head.inputs_attrs['reader'], self.name) # task_attr_from_reader = self._task_head.inputs_attrs['reader'] # _check_io(backbone.inputs_attr, inst._reader['train'].outputs_attr, in_name=bb_name+'_backbone', out_name='reader.train') # _check_io(inst.taskblock['train'].inputs_attrs['reader'], inst._reader['train'].outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train') # _check_io(inst._taskblock['train'].inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name+'_backbone') # merge reader input attrs from backbone and task_instances input_names, shape_and_dtypes, name_to_position = reader_helper.merge_input_attrs( backbone.inputs_attr, task_attr_from_reader, insert_taskid=False) # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] self._shape_and_dtypes = shape_and_dtypes self._name_to_position = name_to_position self._input_names = input_names if DEBUG: print('----- for debug -----') print('joint input names:') print(joint_input_names) print('joint input shape and dtypes:') print(joint_shape_and_dtypes) input_attrs = [[i, j, k] for i, (j, k) in zip(input_names, shape_and_dtypes)] train_prog = fluid.Program() train_init_prog = fluid.Program() if not self._lock_prog: self._train_prog = train_prog self._train_init_prog = train_init_prog if not self._lock_prog: with fluid.program_guard(train_prog, train_init_prog): net_inputs = reader_helper.create_net_inputs(input_attrs, is_async=False) bb_output_vars = backbone.build(net_inputs) else: net_inputs = reader_helper.create_net_inputs(input_attrs, is_async=False) bb_output_vars = backbone.build(net_inputs) self._net_inputs = net_inputs assert sorted(bb_output_vars.keys()) == sorted( backbone.outputs_attr.keys()) # self._bb_output_vars.keys # fluid.framework.switch_main_program(train_prog) # fluid.framework.switch_startup_program(train_init_prog) task_output_vars = {} task_inputs = {'backbone': bb_output_vars} task_inputs_from_reader = helper.decode_inputs(net_inputs, self.name) task_inputs['reader'] = task_inputs_from_reader scope = self.name + '.' if not self._lock_prog: with fluid.program_guard(train_prog, train_init_prog): with fluid.unique_name.guard(scope): output_vars = self._build_head(task_inputs, phase='train', scope=scope) else: with fluid.unique_name.guard(scope): output_vars = self._build_head(task_inputs, phase='train', scope=scope) output_vars = { self.name + '.' + key: val for key, val in output_vars.items() } old = len(task_output_vars) # for debug task_output_vars.update(output_vars) assert len(task_output_vars) - old == len(output_vars) # for debug bb_fetches = {k: v.name for k, v in bb_output_vars.items()} task_fetches = {k: v.name for k, v in task_output_vars.items()} self._fetches = task_fetches self._fetch_names, self._fetch_list = zip(*self._fetches.items()) # fetches = task_fetches # fetches['__task_id'] = net_inputs['__task_id'].name # compute loss # task_id_var = net_inputs['__task_id'] # task_id_vec = layers.one_hot(task_id_var, num_instances) # losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0) # loss = layers.reduce_sum(task_id_vec * losses) if not self._lock_prog: with fluid.program_guard(train_prog, train_init_prog): loss_var = fluid.layers.reduce_sum(task_output_vars[self.name + '.loss']) else: loss_var = fluid.layers.reduce_sum(task_output_vars[self.name + '.loss']) # for _id, block in enumerate(self._train_prog.blocks): # for var in block.vars: # print("[debug] : %d, %s" % (_id, var)) self._loss_var = loss_var if not self._multi_task: self._init_exe_prog(for_train=True) return loss_var