Exemplo n.º 1
0
def reload_model():
    global model, tokenizer, checkpoint_fn
    try:
        start = time.time()
        logging.info('prediction requested')
        params = get_params()
        logger.debug(json.dumps(params, indent=2))
        args_model_checkpoint = args.model_checkpoint
        full_model_checkpoint = os.path.join(
            os.path.dirname(args_model_checkpoint), params['model_checkpoint'])
        model, tokenizer, checkpoint_fn = load_model(
            model_checkpoint=full_model_checkpoint,
            model_type=params['model_type'])

        response = Response(
            f'loaded model {params["model_checkpoint"]} successfully')
        logger.info("Time spent handling the request: %f" %
                    (time.time() - start))
    except Exception as e:
        tb = traceback.format_exc()
        logger.error(tb)
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        raise InvalidUsage('%s: %s @line %s in %s' %
                           (type(e).__name__, str(e), exc_tb.tb_lineno, fname))
    return response
Exemplo n.º 2
0
def load():
    """
    load
    """
    return interact.load_model()
Exemplo n.º 3
0
        ])
        return res

    return _context_fetcher


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    logger = logging.getLogger(__file__)

    args = get_args()
    random.seed(args.seed)
    torch.random.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    model, tokenizer, checkpoint_fn = load_model(
        model_checkpoint=args.model_checkpoint, model_type=args.model)

    try:
        logger.info('create wikipedia context fetcher ...')
        context_fetcher = create_wikipedia_context_fetcher(
            wikipedia_file=args.wikipedia_dump,
            base_url=args.entity_linking_service_url)
    except IOError as e:
        logger.warning(
            'could not create a context fetcher. Please provide a context with every request.'
            % args.spacy_model)
        context_fetcher = None

    logger.info('Starting the API')
    if args.deploy:
        logger.info('use deployment server')
Exemplo n.º 4
0
def load():
    """
    load
    """
    return interact.load_model(), load_candidate_set(
        "../data/candidate_set.txt")
def pred_attacks(df,
                 model_path,
                 args,
                 output_clm,
                 context='title',
                 attacks_clm='premise_counter_premise_pairs',
                 post_clm='post',
                 rand_premise_idx=-1,
                 rand_premises_clm='',
                 baseline=False,
                 random=False):
    model, tokenizer = interact.load_model(model_path)

    predictions = []
    for row_idx, row in df.iterrows():
        post_attacks = []
        for premise_counter in row[attacks_clm]:

            if context == 'title+full_post':
                argument = [tokenizer.encode(row['title'])] + [
                    tokenizer.encode(sent) for sent in row[post_clm]
                ]
            else:
                argument = [tokenizer.encode(
                    row['title'])] if context == 'title' else [
                        tokenizer.encode(sent) for sent in row[post_clm]
                    ]

            #print([row['title']]+row[post_clm])
            #print(premise_counter[0])
            if baseline:
                argument = trim_argument(argument, 510, args)
                pred_counter = interact.sample_sequence(argument, [],
                                                        tokenizer,
                                                        model,
                                                        args,
                                                        baseline=True)
            elif random:
                if rand_premise_idx != None:
                    weak_premise_encoded = [
                        tokenizer.encode(
                            row[rand_premises_clm][rand_premise_idx])
                    ]
                else:
                    weak_premise_encoded = [
                        tokenizer.encode(row[rand_premises_clm])
                    ]

                argument = trim_argument(argument, 510, args)
                pred_counter = interact.sample_sequence(
                    argument, weak_premise_encoded, tokenizer, model, args)
            else:
                weak_premise_encoded = [
                    tokenizer.encode(premise) for premise in premise_counter[0]
                ]
                argument = trim_argument(argument, 510, args)
                pred_counter = interact.sample_sequence(
                    argument, weak_premise_encoded, tokenizer, model, args)

            #remove quoted text
            #pred_counter = tokenizer.decode(pred_counter)
            #for p in premise_counter[0]:
            #    pred_counter = pred_counter.replace(p.lower(), '')

            post_attacks.append([premise_counter[0], pred_counter])

        predictions.append(post_attacks)

    df[output_clm] = predictions

    return df
def perform_attacks_hua_df(df,
                           model_path,
                           args,
                           output_clm,
                           context,
                           weak_premise_clm,
                           weak_premise_idx=None,
                           baseline=False):
    model, tokenizer = interact.load_model(model_path)

    pred_attacks = []
    for idx, row in df.iterrows():
        if baseline:
            argument = [row['claim']] + row['post']
            argument = [tokenizer.encode(sentence) for sentence in argument]
            argument = trim_argument(argument, 510, args)

            pred_counter = interact.sample_sequence(argument, [],
                                                    tokenizer,
                                                    model,
                                                    args,
                                                    baseline=True)

            pred_counter = pred_counter.replace(row['claim'].lower(), '')
            for sent in row['post']:
                pred_counter = pred_counter.replace(sent.lower(), '')
        else:

            if context == 'title+full_post':
                argument = [tokenizer.encode(row['claim'])] + [
                    tokenizer.encode(sent) for sent in row['post']
                ]
            else:
                argument = [tokenizer.encode(row['claim'])
                            ] if context == 'title' else [
                                tokenizer.encode(sent) for sent in row['post']
                            ]

            if weak_premise_idx != None:
                weak_premise_encoded = [
                    tokenizer.encode(premise)
                    for premise in row[weak_premise_clm][weak_premise_idx]
                ]
            else:
                weak_premise_encoded = [
                    tokenizer.encode(premise)
                    for premise in row[weak_premise_clm]
                ]

            if args.premise_extra:
                argument = trim_argument(
                    argument, 510 -
                    len([token for p in weak_premise_encoded for token in p]),
                    args)
            else:
                argument = trim_argument(argument, 510, args)

            pred_counter = interact.sample_sequence(argument,
                                                    weak_premise_encoded,
                                                    tokenizer, model, args)

            #remove quoted text
            #pred_counter = tokenizer.decode(pred_counter)
            for p in row[weak_premise_clm]:
                pred_counter = pred_counter.replace(p.lower(), '')

        pred_attacks.append(pred_counter)

    df[output_clm] = pred_attacks

    return df