示例#1
0
文件: bert.py 项目: tomzhang/gnes
 def post_init(self):
     from bert_serving.server import BertServer
     from bert_serving.server import get_args_parser
     self.bert_server = BertServer(get_args_parser().parse_args(
         self._bert_args))
     self.bert_server.start()
     self.bert_server.is_ready.wait()
示例#2
0
def run_benchmark(args):
    from copy import deepcopy
    from bert_serving.server import BertServer

    # load vocabulary
    with open(args.client_vocab_file, encoding='utf8') as fp:
        vocab = list(set(vv for v in fp for vv in v.strip().split()))
    print('vocabulary size: %d' % len(vocab))

    # select those non-empty test cases
    all_exp_names = [
        k.replace('test_', '') for k, v in vars(args).items()
        if k.startswith('test_') and v
    ]

    for exp_name in all_exp_names:
        # set common args
        cargs = deepcopy(args)
        exp_vars = vars(args)['test_%s' % exp_name]
        avg_speed = []

        for cvar in exp_vars:
            # override exp args
            setattr(cargs, exp_name, cvar)
            server = BertServer(cargs)
            server.start()
            time.sleep(cargs.wait_till_ready)

            # sleep until server is ready
            all_clients = [
                BenchmarkClient(cargs, vocab) for _ in range(cargs.num_client)
            ]
            for bc in all_clients:
                bc.start()

            clients_speed = []
            for bc in all_clients:
                bc.join()
                clients_speed.append(cargs.client_batch_size / bc.avg_time)
            server.close()

            max_speed, min_speed, cavg_speed = int(max(clients_speed)), int(
                min(clients_speed)), int(mean(clients_speed))

            print('avg speed: %d\tmax speed: %d\tmin speed: %d' %
                  (cavg_speed, max_speed, min_speed),
                  flush=True)

            avg_speed.append(cavg_speed)

        with open(
                'benchmark-%d%s.result' %
            (args.num_worker, '-fp16' if args.fp16 else ''), 'a') as fw:
            print('\n|`%s`\t|samples/s|\n|---|---|' % exp_name, file=fw)
            for cvar, cavg_speed in zip(exp_vars, avg_speed):
                print('|%s\t|%d|' % (cvar, cavg_speed), file=fw)
            # for additional plotting
            print('\n%s = %s\n%s = %s' %
                  (exp_name, exp_vars, 'speed', avg_speed),
                  file=fw)
示例#3
0
def get_model(TUNED_FLAG=False):
    args = [
        '-model_dir',
        'english_L-12_H-768_A-12/',
        '-port',
        '5555',
        '-port_out',
        '5556',
        '-max_seq_len',
        'NONE',
        '-mask_cls_sep',
        'num_worker',
        '4',
        '-cpu',
    ]
    if TUNED_FLAG == True:
        args.extend([
            '-tuned_model_dir',
            '/tmp/mrpc_output/',
            '-ckpt_name',
            'model.ckpt-343',
        ])

    bert_args = get_args_parser().parse_args(args)
    server = BertServer(bert_args)
    server.start()
    BertServer.shutdown(port=5555)
示例#4
0
    def _init_bert_client(model_dir, max_seq_len, device_map,
                          num_worker) -> BertClient:
        """Initialize bert client for sentence embeddings and avoid restarting bert-server if already running.

        For more information, see: https://github.com/hanxiao/bert-as-service
        Bert-server can take a long time to start, take over stdout during training, and create many temp log files.
        It's highly recommended to run bert-server beforehand from command-line in a dedicated folder:
        e.g:
        ~/gym-summarizer/data/bert $
            bert-serving-start -model_dir uncased_L-12_H-768_A-12/ -max_seq_len 40 -device_map 1 2 3 4 -num_worker 4

        :param model_dir: directory containing bert model
        :param max_seq_len: max sequence length for bert
        :return bc: bert-client
        """

        try:
            bc = BertClient()
        except:
            from bert_serving.server.helper import get_args_parser
            from bert_serving.server import BertServer
            args = get_args_parser().parse_args([
                '-model_dir', model_dir, '-max_seq_len', max_seq_len,
                '-device_map', device_map, '-num_worker', num_worker
            ])
            server = BertServer(args)
            server.start()
            bc = BertClient()

        return bc
def main():
    from bert_serving.server import BertServer
    from bert_serving.server.helper import get_run_args
    args = get_run_args()
    server = BertServer(args)
    server.start()
    server.join()
示例#6
0
def start_server(max_seq_len, pretrained_model):
    args = get_args_parser().parse_args([
        '-model_dir', pretrained_model, '-port', '5555', '-port_out', '5556',
        '-pooling_strategy', 'NONE', '-show_tokens_to_client', '-max_seq_len',
        str(max_seq_len), '-mask_cls_sep', '-cpu'
    ])
    server = BertServer(args)
    server.start()
示例#7
0
 def __init__(self, model_path):
     args = get_args_parser().parse_args([
         '-num_worker', '4', '-model_dir', model_path, '-port', '5555',
         '-port_out', '5556', '-max_seq_len', 'NONE', '-mask_cls_sep',
         '-cpu'
     ])
     # 详细说明,请参考:https://github.com/hanxiao/bert-as-service
     self._server = BertServer(args)
def main():
    args = get_args_parser().parse_args([
        '-model_dir', r'../data/chinese_L-12_H-768_A-12', '-port', '86500',
        '-port_out', '86501', '-max_seq_len', '512', '-mask_cls_sep', '-cpu'
    ])

    bs = BertServer(args)
    bs.start()
示例#9
0
def main():
    args = get_args_parser().parse_args([
        '-model_dir', './uncased_L-12_H-768_A-12', '-port', '5555',
        '-port_out', '5556', '-max_seq_len', '25', '-num_worker', '1',
        '-mask_cls_sep', '-cpu'
    ])
    server = BertServer(args)
    server.start()
示例#10
0
def main():
    args = get_args_parser().parse_args([
        '-model_dir', './biobert', '-ckpt_name', 'model.ckpt-1000000', '-port',
        '5555', '-port_out', '5556', '-max_seq_len', '30', '-num_worker', '1',
        '-mask_cls_sep', '-cpu'
    ])
    server = BertServer(args)
    server.start()
示例#11
0
def main():
    from bert_serving.server import BertServer
    from bert_serving.server.helper import get_run_args

    import tensorflow as tf
    tf.compat.v1.disable_eager_execution()

    with BertServer(get_run_args()) as server:
        server.join()
示例#12
0
def run_benchmark(args):
    from copy import deepcopy
    from bert_serving.server import BertServer

    # load vocabulary
    with open(args.client_vocab_file, encoding='utf8') as fp:
        vocab = list(set(vv for v in fp for vv in v.strip().split()))
    print('vocabulary size: %d' % len(vocab))

    all_exp_names = [
        k.replace('test_', '') for k in vars(args).keys()
        if k.startswith('test_')
    ]
    fp = open(
        'benchmark-%d%s.result' %
        (args.num_worker, '-fp16' if args.fp16 else ''), 'w')
    for exp_name in all_exp_names:
        # set common args
        cargs = deepcopy(args)
        exp_vars = vars(args)['test_%s' % exp_name]
        avg_speed = []
        fp.write('\n%s\tsamples/s\n' % exp_name)
        for cvar in exp_vars:
            # override exp args
            setattr(cargs, exp_name, cvar)
            server = BertServer(cargs)
            server.start()
            time.sleep(cargs.wait_till_ready)

            # sleep until server is ready
            all_clients = [
                BenchmarkClient(cargs, vocab) for _ in range(cargs.num_client)
            ]
            for bc in all_clients:
                bc.start()

            clients_speed = []
            for bc in all_clients:
                bc.join()
                clients_speed.append(cargs.client_batch_size / bc.avg_time)
            server.close()

            max_speed, min_speed, cavg_speed = int(max(clients_speed)), int(
                min(clients_speed)), int(mean(clients_speed))

            print('avg speed: %d\tmax speed: %d\tmin speed: %d' %
                  (cavg_speed, max_speed, min_speed),
                  flush=True)
            fp.write('%s\t%d\n' % (cvar, cavg_speed))
            fp.flush()
            avg_speed.append(cavg_speed)

        # for plotting
        fp.write('%s\n%s\n' % (exp_vars, avg_speed))
        fp.flush()
    fp.close()
def main():
    args = get_args_parser().parse_args(
        ['-model_dir', 'uncased_L-12_H-768_A-12'])
    #                                      ,'-port', '5555',
    #                                      '-port_out', '5556',
    #                                      '-max_seq_len', 'NONE',
    #                                      '-mask_cls_sep',
    #                                      '-cpu'])
    server = BertServer(args)
    server.start()
示例#14
0
 def __init__(self):
     args = get_args_parser().parse_args([
         '-model_dir',
         '/Data_HDD/zhipengye/projects/bert/multi_cased_L-12_H-768_A-12',
         '-port', '5555', '-port_out', '5556', '-max_seq_len', 'NONE',
         '-mask_cls_sep', '-cpu'
     ])
     self.server = BertServer(args)
     self.server.start()
     print('bert sever has started')