예제 #1
0
 def process_item(self):
     clsname = self.__class__.__name__.lower()
     item = self.item
     line = item.get_line()
     m = self.match(line)
     i = line.lower().find(clsname)
     assert i!=-1,`clsname, line`
     self.prefix = line[:i].rstrip()
     self.name = line[i:m.end()].lstrip()[len(clsname):].strip()
     line = line[m.end():].lstrip()
     args = []
     if line.startswith('('):
         i = line.find(')')
         assert i!=-1,`line`
         line2 = item.apply_map(line[:i+1])
         for a in line2[1:-1].split(','):
             a=a.strip()
             if not a: continue
             args.append(a)
         line = line[i+1:].lstrip()
     suffix = item.apply_map(line)
     self.bind, suffix = parse_bind(suffix, item)
     self.result = None
     if isinstance(self, Function):
         self.result, suffix = parse_result(suffix, item)
         if suffix:
             assert self.bind is None,`self.bind`
             self.bind, suffix = parse_result(suffix, item)
         if self.result is None:
             self.result = self.name
     assert not suffix,`suffix`
     self.args = args
     self.typedecl = None
     return BeginStatement.process_item(self)
예제 #2
0
 def process_item(self):
     clsname = self.__class__.__name__.lower()
     item = self.item
     line = item.get_line()
     m = self.match(line)
     i = line.lower().find(clsname)
     assert i != -1, ` clsname, line `
     self.prefix = line[:i].rstrip()
     self.name = line[i:m.end()].lstrip()[len(clsname):].strip()
     line = line[m.end():].lstrip()
     args = []
     if line.startswith('('):
         i = line.find(')')
         assert i != -1, ` line `
         line2 = item.apply_map(line[:i + 1])
         for a in line2[1:-1].split(','):
             a = a.strip()
             if not a: continue
             args.append(a)
         line = line[i + 1:].lstrip()
     suffix = item.apply_map(line)
     self.bind, suffix = parse_bind(suffix, item)
     self.result = None
     if isinstance(self, Function):
         self.result, suffix = parse_result(suffix, item)
         if suffix:
             assert self.bind is None, ` self.bind `
             self.bind, suffix = parse_result(suffix, item)
         if self.result is None:
             self.result = self.name
     assert not suffix, ` suffix `
     self.args = args
     self.typedecl = None
     return BeginStatement.process_item(self)
예제 #3
0
def infer_process(exe, program, reader, fetch_vars, dataset):
    """
    the function to execute the infer process
    :param exe: the fluid Executor
    :param program: the infer_program
    :param reader: data reader
    :return: the list of prediction result
    """
    def input_check(data):
        if data[0]['words'].lod()[0][-1] == 0:
            return data[0]['words']
        return None

    results = []
    for data in reader():
        crf_decode = input_check(data)
        if crf_decode:
            results += utils.parse_result(crf_decode, crf_decode, dataset)
            continue

        words, crf_decode = exe.run(
            program,
            fetch_list=fetch_vars,
            feed=data,
            return_numpy=False,
            use_program_cache=True,
        )
        results += utils.parse_result(words, crf_decode, dataset)
    return results
예제 #4
0
def do_infer(args):
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()

    # define network and reader
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()
    infer_program = fluid.Program()
    with fluid.program_guard(infer_program, fluid.default_startup_program()):
        with fluid.unique_name.guard():
            infer_ret = creator.create_ernie_model(args,
                                                   ernie_config,
                                                   is_prediction=False)
    infer_program = infer_program.clone(for_test=True)
    print(args.test_data)
    pyreader, reader = creator.create_pyreader(
        args,
        file_name=args.test_data,
        feed_list=infer_ret['feed_list'],
        mode="ernie",
        place=place,
        iterable=True,
        return_reader=True,
        for_test=True)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # load model
    if not args.init_checkpoint:
        raise ValueError(
            "args 'init_checkpoint' should be set if only doing test or infer!"
        )
    utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    # create dict
    id2word_dict = dict([(str(word_id), word)
                         for word, word_id in reader.vocab.items()])
    id2label_dict = dict([(str(label_id), label)
                          for label, label_id in reader.label_map.items()])
    Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
    dataset = Dataset(id2word_dict, id2label_dict)

    # make prediction
    for data in pyreader():
        (words, crf_decode) = exe.run(
            infer_program,
            fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
            feed=data[0],
            return_numpy=False)
        # User should notice that words had been clipped if long than args.max_seq_len
        results = utils.parse_result(words, crf_decode, dataset)
        for sent, tags in results:
            result_list = [
                '(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)
            ]
            print(''.join(result_list))
예제 #5
0
def test_inference_model(model_dir, text_list, dataset):
    """
    :param model_dir: model's dir
    :param text_list: a list of input text, which decode as unicode
    :param dataset:
    :return:
    """
    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    # transfer text data to input tensor
    lod = []
    for text in text_list:
        lod.append(
            np.array(dataset.word_to_ids(text.strip())).astype(np.int64))
    base_shape = [[len(c) for c in lod]]
    tensor_words = fluid.create_lod_tensor(lod, base_shape, place)

    # for empty input, output the same empty
    if (sum(base_shape[0]) == 0):
        crf_decode = [tensor_words]
    else:
        # load inference model
        inference_scope = fluid.core.Scope()
        with fluid.scope_guard(inference_scope):
            [inferencer, feed_target_names,
             fetch_targets] = fluid.io.load_inference_model(
                 model_dir,
                 exe,
                 model_filename='model.pdmodel',
                 params_filename='params.pdparams',
             )
            assert feed_target_names[0] == "words"
            print("Load inference model from %s" % (model_dir))

            # get lac result
            crf_decode = exe.run(
                inferencer,
                feed={feed_target_names[0]: tensor_words},
                fetch_list=fetch_targets,
                return_numpy=False,
                use_program_cache=True,
            )

    # parse the crf_decode result
    result = utils.parse_result(tensor_words, crf_decode[0], dataset)
    for i, (sent, tags) in enumerate(result):
        result_list = ['(%s, %s)' % (ch, tag) for ch, tag in zip(sent, tags)]
        print(''.join(result_list))
예제 #6
0
def execute_selection(sql, values=None):
    """Runs a select query and returns the result

    :param sql: sql query
    :param value: values for the query
    :returns: query result
    """
    with closing(mysql.connector.connect(**connetion_params)) as db:
        with closing(db.cursor(dictionary=True, buffered=True)) as cursor:
            if values == None:
                cursor.execute(sql)
            else:
                cursor.execute(sql, [values])
            data = parse_result(cursor)
            return data
def main(args):
    """
    Main Function
    """
    args = parser.parse_args()
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)

    reader = task_reader.SequenceLabelReader(
        vocab_path=args.vocab_path,
        label_map_config=args.label_map_config,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        in_tokens=False,
        random_seed=args.random_seed)

    if not (args.do_train or args.do_test or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_test`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        num_train_examples = reader.get_num_examples(args.train_set)
        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
        print("Device count: %d" % dev_count)
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                train_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='train_reader')
                train_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.train_set,
                                          args.batch_size,
                                          args.epoch,
                                          shuffle=True,
                                          phase="train"))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                train_ret = create_model(args,
                                         embeddings,
                                         labels=labels,
                                         is_prediction=False)

                optimizer = fluid.optimizer.Adam(learning_rate=args.lr)
                fluid.clip.set_gradient_clip(
                    clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
                optimizer.minimize(train_ret["loss"])

        lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
            program=train_program, batch_size=args.batch_size)
        print("Theoretical memory usage in training: %.3f - %.3f %s" %
              (lower_mem, upper_mem, unit))

    if args.do_test:
        test_program = fluid.Program()
        with fluid.program_guard(test_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                test_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='test_reader')
                test_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.test_set,
                                          args.batch_size,
                                          phase='test',
                                          epoch=1,
                                          shuffle=False))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                test_ret = create_model(args,
                                        embeddings,
                                        labels=labels,
                                        is_prediction=False)

        test_program = test_program.clone(for_test=True)

    if args.do_infer:
        infer_program = fluid.Program()
        with fluid.program_guard(infer_program, startup_prog):
            with fluid.unique_name.guard():
                # create ernie_pyreader
                infer_pyreader, ernie_inputs, words, labels = ernie_pyreader(
                    args, pyreader_name='infer_reader')
                infer_pyreader.decorate_tensor_provider(
                    reader.data_generator(args.infer_set,
                                          args.batch_size,
                                          phase='infer',
                                          epoch=1,
                                          shuffle=False))
                # get ernie_embeddings
                embeddings = ernie_encoder(ernie_inputs,
                                           ernie_config=ernie_config)
                # user defined model based on ernie embeddings
                infer_ret = create_model(args,
                                         embeddings,
                                         labels=labels,
                                         is_prediction=True)
                infer_ret["words"] = words

        infer_program = infer_program.clone(for_test=True)

    exe.run(startup_prog)

    # load checkpoints
    if args.do_train:
        if args.init_checkpoint and args.init_pretraining_params:
            print(
                "WARNING: args 'init_checkpoint' and 'init_pretraining_params' "
                "both are set! Only arg 'init_checkpoint' is made valid.")
        if args.init_checkpoint:
            utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)
        elif args.init_pretraining_params:
            utils.init_pretraining_params(exe, args.init_pretraining_params,
                                          startup_prog)
    elif args.do_test or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError(
                "args 'init_checkpoint' should be set if only doing test or infer!"
            )
        utils.init_checkpoint(exe, args.init_checkpoint, startup_prog)

    if args.do_train:
        train_pyreader.start()
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        while True:
            try:
                steps += 1
                if steps % args.skip_steps == 0:
                    fetch_list = [
                        train_ret["loss"],
                        train_ret["num_infer_chunks"],
                        train_ret["num_label_chunks"],
                        train_ret["num_correct_chunks"],
                    ]
                else:
                    fetch_list = []

                start_time = time.time()
                outputs = exe.run(program=train_program, fetch_list=fetch_list)
                end_time = time.time()
                if steps % args.skip_steps == 0:
                    loss, nums_infer, nums_label, nums_correct = outputs
                    train_ret["chunk_evaluator"].reset()
                    train_ret["chunk_evaluator"].update(
                        nums_infer, nums_label, nums_correct)
                    precision, recall, f1_score = train_ret[
                        "chunk_evaluator"].eval()
                    print(
                        "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f, "
                        "pyreader queue_size: %d " %
                        (steps, loss, precision, recall, f1_score,
                         end_time - start_time, train_pyreader.queue.size()))

                if steps % args.save_steps == 0:
                    save_path = os.path.join(args.checkpoints,
                                             "step_" + str(steps))
                    print("\tsaving model as %s" % (save_path))
                    fluid.io.save_persistables(exe, save_path, train_program)

                if steps % args.validation_steps == 0:
                    # evaluate test set
                    if args.do_test:
                        evaluate(exe, test_program, test_pyreader, test_ret)

            except fluid.core.EOFException:
                save_path = os.path.join(args.checkpoints,
                                         "step_" + str(steps))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_pyreader.reset()
                break

    # final eval on test set
    if args.do_test:
        evaluate(exe, test_program, test_pyreader, test_ret)

    if args.do_infer:
        # create dict
        id2word_dict = dict([(str(word_id), word)
                             for word, word_id in reader.vocab.items()])
        id2label_dict = dict([(str(label_id), label)
                              for label, label_id in reader.label_map.items()])
        Dataset = namedtuple("Dataset", ["id2word_dict", "id2label_dict"])
        dataset = Dataset(id2word_dict, id2label_dict)

        infer_pyreader.start()
        while True:
            try:
                (words, crf_decode) = exe.run(
                    infer_program,
                    fetch_list=[infer_ret["words"], infer_ret["crf_decode"]],
                    return_numpy=False)
                # User should notice that words had been clipped if long than args.max_seq_len
                results = utils.parse_result(words, crf_decode, dataset)
                for result in results:
                    print(result)
            except fluid.core.EOFException:
                infer_pyreader.reset()
                break
예제 #8
0
파일: analyze.py 프로젝트: mcai/heo
         lambda r: r.stats['SimulationTimeInSeconds']),
        ('Throughput', lambda r: r.stats['Throughput']),
        ('Average Packet Delay', lambda r: r.stats['AveragePacketDelay']),
        ('Payload Throughput', lambda r: r.stats['PayloadThroughput']),
        ('Average Payload Packet Delay',
         lambda r: r.stats['AveragePayloadPacketDelay']),
    ])


for data_packet_injection_rate in synthesized_data_packet_injection_rate_range:
    results = []

    for traffic in synthesized_traffic_range:
        results.append(
            parse_result(working_directory(traffic, '', max_cycles, num_nodes,
                                           'XY', 'Random',
                                           data_packet_injection_rate, -1, -1),
                         bench=traffic))
        results.append(
            parse_result(working_directory(traffic, '', max_cycles, num_nodes,
                                           'OddEven', 'BufferLevel',
                                           data_packet_injection_rate, -1, -1),
                         bench=traffic))

        for aco_selection_alpha in aco_selection_alpha_range:
            for reinforcement_factor in reinforcement_factor_range:
                results.append(
                    parse_result(working_directory(traffic, '', max_cycles,
                                                   num_nodes, 'OddEven', 'ACO',
                                                   data_packet_injection_rate,
                                                   aco_selection_alpha,
                                                   reinforcement_factor),
예제 #9
0
def main(args):

    startup_program = fluid.Program()
    if args.random_seed is not None:
        startup_program.random_seed = args.random_seed

    # prepare dataset
    dataset = reader.Dataset(args)

    if args.do_train:
        train_program = fluid.Program()
        if args.random_seed is not None:
            train_program.random_seed = args.random_seed
        with fluid.program_guard(train_program, startup_program):
            with fluid.unique_name.guard():
                train_ret = create_model(args, "train_reader",
                                         dataset.vocab_size,
                                         dataset.num_labels)
                train_ret["pyreader"].decorate_paddle_reader(
                    paddle.batch(paddle.reader.shuffle(
                        dataset.file_reader(args.train_data),
                        buf_size=args.traindata_shuffle_buffer),
                                 batch_size=args.batch_size))

                optimizer = fluid.optimizer.Adam(
                    learning_rate=args.base_learning_rate)
                optimizer.minimize(train_ret["avg_cost"])

    if args.do_test:
        test_program = fluid.Program()
        with fluid.program_guard(test_program, startup_program):
            with fluid.unique_name.guard():
                test_ret = create_model(args, "test_reader",
                                        dataset.vocab_size, dataset.num_labels)
                test_ret["pyreader"].decorate_paddle_reader(
                    paddle.batch(dataset.file_reader(args.test_data),
                                 batch_size=args.batch_size))
        test_program = test_program.clone(
            for_test=True)  # to share parameters with train model

    if args.do_infer:
        infer_program = fluid.Program()
        with fluid.program_guard(infer_program, startup_program):
            with fluid.unique_name.guard():
                infer_ret = create_model(args, "infer_reader",
                                         dataset.vocab_size,
                                         dataset.num_labels)
                infer_ret["pyreader"].decorate_paddle_reader(
                    paddle.batch(dataset.file_reader(args.infer_data),
                                 batch_size=args.batch_size))
        infer_program = infer_program.clone(for_test=True)

    # init executor
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = multiprocessing.cpu_count()
    exe = fluid.Executor(place)
    exe.run(startup_program)

    # load checkpoints
    if args.do_train:
        if args.init_checkpoint:
            utils.init_checkpoint(exe, args.init_checkpoint, train_program)
    elif args.do_test:
        if not args.init_checkpoint:
            raise ValueError(
                "args 'init_checkpoint' should be set if only doing validation or testing!"
            )
        utils.init_checkpoint(exe, args.init_checkpoint, test_program)
    if args.do_infer:
        utils.init_checkpoint(exe, args.init_checkpoint, infer_program)

    # do start to train
    if args.do_train:
        num_train_examples = dataset.get_num_examples(args.train_data)
        max_train_steps = args.epoch * num_train_examples // args.batch_size
        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        ce_info = []
        batch_id = 0
        for epoch_id in range(args.epoch):
            train_ret["pyreader"].start()
            ce_time = 0
            try:
                while True:
                    start_time = time.time()
                    avg_cost, nums_infer, nums_label, nums_correct = exe.run(
                        train_program,
                        fetch_list=[
                            train_ret["avg_cost"],
                            train_ret["num_infer_chunks"],
                            train_ret["num_label_chunks"],
                            train_ret["num_correct_chunks"],
                        ],
                    )
                    end_time = time.time()
                    train_ret["chunk_evaluator"].reset()
                    train_ret["chunk_evaluator"].update(
                        nums_infer, nums_label, nums_correct)
                    precision, recall, f1_score = train_ret[
                        "chunk_evaluator"].eval()
                    batch_id += 1
                    print(
                        "[train] batch_id = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f "
                        % (batch_id, avg_cost, precision, recall, f1_score,
                           end_time - start_time))
                    ce_time += end_time - start_time
                    ce_info.append(
                        [ce_time, avg_cost, precision, recall, f1_score])

                    # save checkpoints
                    if (batch_id % args.save_model_per_batches == 0):
                        save_path = os.path.join(args.model_save_dir,
                                                 "step_" + str(batch_id))
                        fluid.io.save_persistables(exe, save_path,
                                                   train_program)

                    # evaluate
                    if (batch_id % args.valid_model_per_batches
                            == 0) and args.do_test:
                        evaluate(exe, test_program, test_ret)

            except fluid.core.EOFException:
                save_path = os.path.join(args.model_save_dir,
                                         "step_" + str(batch_id))
                fluid.io.save_persistables(exe, save_path, train_program)
                train_ret["pyreader"].reset()
                # break?
    if args.do_train and args.enable_ce:
        card_num = get_cards()
        ce_cost = 0
        ce_f1 = 0
        ce_p = 0
        ce_r = 0
        ce_time = 0
        try:
            ce_time = ce_info[-2][0]
            ce_cost = ce_info[-2][1]
            ce_p = ce_info[-2][2]
            ce_r = ce_info[-2][3]
            ce_f1 = ce_info[-2][4]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time))
        print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost))
        print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p))
        print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r))
        print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))

    # only test
    if args.do_test:
        evaluate(exe, test_program, test_ret)

    if args.do_infer:
        infer_ret["pyreader"].start()
        while True:
            try:
                (
                    words,
                    crf_decode,
                ) = exe.run(infer_program,
                            fetch_list=[
                                infer_ret["words"],
                                infer_ret["crf_decode"],
                            ],
                            return_numpy=False)
                results = utils.parse_result(words, crf_decode, dataset)
                for result in results:
                    print(result)
            except fluid.core.EOFException:
                infer_ret["pyreader"].reset()
                break
예제 #10
0
                    vocab,
                    pred_useful,
                    pred_verbs,
                    pred_args,
                    do_post=use_post_heuristics)
                pred_for_summarize[yid].append(
                    get_ordered_pred_for_summarize(filtered_chunks))

    else:
        srl_predictor = get_srl_predictor()
        dump_srl_raw_results = {}
        for yid in sents:
            dump_srl_raw_results[yid] = []
            for sent in sents[yid]:
                srl = srl_predictor.predict_json({'sentence': sent})
                srl_chunks = parse_result(srl)
                dump_srl_raw_results[yid].append((sent, srl_chunks))
                srl_post_heuristics(srl_chunks, yid, vocab, pred_useful,
                                    pred_verbs, pred_args)

        print("Dumping SRL raw results")
        with open("raw_srl.pkl", 'wb') as raw_file:
            pickle.dump(dump_srl_raw_results, raw_file)

    print("Key sentence:")
    evaluate_keysent(gt, pred_useful)

    print("Verbs:")
    for sota_pred in (None, sota_pred_keysent):
        for fuzzy, partial_ratio in ((False, False), (True, False), (True,
                                                                     True)):