Exemplo n.º 1
0
sample_inputs = []
val = {"e1": args.input, "r": "Causes"}
sample_inputs.append(val)

if "bs" in opt.eval:
    opt.eval.pop("bs")
if "k" in opt.eval:
    opt.eval.pop("k")

if "beam" in args.sampling_algorithm:
    opt.eval.sample = "beam"
    opt.eval.bs = int(args.sampling_algorithm.split("-")[1])
    sampler = BeamSampler(opt, data_loader)
else:
    opt.eval.sample = "greedy"
    opt.eval.bs = 1
    sampler = GreedySampler(opt, data_loader)

outputs = []

for input_case in tqdm(sample_inputs):
    e1 = input_case["e1"]
    r = input_case["r"]

    output = interactive.get_conceptnet_sequence(e1, model, sampler,
                                                 data_loader, text_encoder, r)

    outputs.append(output)

json.dump(outputs, open(args.output_file, "w"))
Exemplo n.º 2
0
relation = ['AtLocation', 'HasPrerequisite', 'Desires', 'UsedFor']
sampling_algorithm = args.sampling_algorithm

# %%
sampler = interactive.set_sampler(opt, sampling_algorithm, data_loader)

for rel in relation:
    if rel not in data.conceptnet_data.conceptnet_relations:
        relation = "all"
        break

start = time.time()
logging.info('Getting Outputs from ConceptNet...')

outputs = [
    interactive.get_conceptnet_sequence(input_event, comet_model, sampler,
                                        data_loader, text_encoder, relation)
    for input_event in input_events
]

logging.info('Total Time Used: ' + str(time.time() - start) + "s")

# %% [markdown]
# use bert-as-service to embed the output phrases from conceptnet

# %%
start = time.time()
logging.info('Getting Happy Elements Encodings...')
num_beam = int(sampling_algorithm.split('-')[1])

embeddings = [
    beams for x in outputs for beams in [
Exemplo n.º 3
0
 def _get_result(self, event, category):
     raw_result = interactive.get_conceptnet_sequence(
         event, self._model, self._sampler, self._data_loader,
         self._text_encoder, category)
     return self.all_beams_cleanup(raw_result)
 ]
 results_cache = {}
 for sample in tqdm.tqdm(raw_data):
     for turn in sample[0]:
         content = turn.split(':')[1].strip()
         sents = nltk.sent_tokenize(content)
         for sent in sents:
             sent = sent.strip()[:-1]
             subsents = sent.split(',')
             for subsent in subsents:
                 tokens = tokenizer.tokenize(subsent)
                 candidates = get_ngrams(tokens)
                 for cand in candidates:
                     if cand not in results_cache:
                         outputs = interactive.get_conceptnet_sequence(
                             cand, model, sampler, data_loader,
                             text_encoder, relation)
                         results_cache[cand] = outputs
     for qa in sample[1]:
         ques = qa['question'][:-1]
         subqs = qa['question'].split(',')
         for subq in subqs:
             subq = subq.strip()
             tokens = tokenizer.tokenize(subq)
             candidates = get_ngrams(tokens)
             for cand in candidates:
                 if cand not in results_cache:
                     outputs = interactive.get_conceptnet_sequence(
                         cand, model, sampler, data_loader, text_encoder,
                         relation)
                     results_cache[cand] = outputs
Exemplo n.º 5
0
 def _get_sequence(self, event, relations, sampler):
     return interactive.get_conceptnet_sequence(event, self._model, sampler,
                                                self._data_loader,
                                                self._text_encoder,
                                                relations)