Пример #1
0
def what_baseline(question, story, return_type, sch_flag=False):
    eligible_sents = []

    if sch_flag:
        text_actual = get_sents(story['sch'])
        text = utils.resolve_pronouns(story['sch'])
    else:
        text_actual = get_sents(story['text'])
        text = utils.resolve_pronouns(story['text'])

    sents = get_sents(text)

    keywords, pattern = get_keywords_pattern_tuple(question['text'],
                                                   question['par'])

    kw_mod = []

    if return_type == 'quotation':
        kw_mod = ['said']
    if return_type == 'verb':
        kw_mod = []
    if return_type == 'noun':
        if 'day' in question['text']:
            kw_mod += [
                'Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday',
                'Thursday', 'Friday'
            ]

    keywords += kw_mod

    rem_list = ['in', 'he', 'she', 'him', 'her', 'before', 'after']
    stop_words_cust = [i if i not in rem_list else '' for i in stop_words]

    for i in range(len(sents)):
        words = nltk.word_tokenize(sents[i])
        words_pos = nltk.pos_tag(words)
        words = list(
            filter(
                lambda x: x not in
                (stop_words_cust + [':', '’', ',', '.', '!', "'", '"', '?']),
                words))
        sim_score = utils.get_similarity(list(set(words)), list(set(keywords)),
                                         story['sid'], question['qid'])
        words = list(
            map(lambda x: lmtzr.lemmatize(x[0], pos=penn2wn(x[1])), words_pos))

        quant = len(set(words) & set(keywords))
        if sim_score:
            quant += sim_score
        eligible_sents.append((quant, text_actual[i], i))

    eligible_sents = sorted(eligible_sents,
                            key=operator.itemgetter(0),
                            reverse=True)

    best = eligible_sents[0][1]

    index = eligible_sents[0][2]

    return best, index
Пример #2
0
def get_ids_from_property_value(data: EntitiesData, property_name: str, property_value: str, fix_data_delegate: Callable[[str], str] = None, match_exact: bool = False) -> List[str]:
    # data structure: {id: content}
    # fixed_data structure: {description: id}
    if not data or not property_name or not property_value:
        print(f'- get_ids_from_property_value: invalid data or property info. Return empty list.')
        return []

    if not fix_data_delegate:
        fix_data_delegate = __fix_property_value

    fixed_value = fix_data_delegate(property_value)
    fixed_data = {entry_id: fix_data_delegate(entry_data[property_name]) for entry_id, entry_data in data.items() if entry_data[property_name]}

    if match_exact:
        results = [key for key, value in fixed_data.items() if value == fixed_value]
    else:
        similarity_map = {}
        for entry_id, entry_property in fixed_data.items():
            if entry_property.startswith(fixed_value) or fixed_value in entry_property:
                similarity_value = utils.get_similarity(entry_property, fixed_value)
                if similarity_value in similarity_map.keys():
                    similarity_map[similarity_value].append((entry_id, entry_property))
                else:
                    similarity_map[similarity_value] = [(entry_id, entry_property)]
        for similarity_value, entries in similarity_map.items():
            similarity_map[similarity_value] = sorted(entries, key=lambda entry: entry[1])
        similarity_values = sorted(list(similarity_map.keys()), reverse=True)
        results = []
        for similarity_value in similarity_values:
            if not match_exact or (match_exact is True and similarity_value.is_integer()):
                entry_ids = [entry_id for (entry_id, _) in similarity_map[similarity_value]]
                results.extend(entry_ids)

    return results
Пример #3
0
    def __init__(self,
                 model_to_explain,
                 original_molecule,
                 discount_factor,
                 fp_len,
                 fp_rad,
                 similarity_set=None,
                 weight_sim=0.5,
                 similarity_measure="tanimoto",
                 **kwargs):
        super(CF_Esol, self).__init__(**kwargs)

        self.discount_factor = discount_factor
        self.model_to_explain = model_to_explain
        self.weight_sim = weight_sim
        self.target = original_molecule.y
        self.orig_pred, _ = model_to_explain(original_molecule.x,
                                             original_molecule.edge_index)
        self.distance = lambda x, y: F.l1_loss(x, y).detach()
        self.base_loss = self.distance(self.orig_pred, self.target).item()
        self.gain = lambda p: torch.sign(self.distance(p, self.orig_pred)
                                         ).item()

        self.similarity, self.make_encoding, \
            self.original_encoding = get_similarity(similarity_measure,
                                                    model_to_explain,
                                                    original_molecule,
                                                    fp_len,
                                                    fp_rad)
Пример #4
0
def when_baseline(question, story, kw_adds, sch_flag=False):
    eligible_sents = []

    if sch_flag:
        text = story['sch']
        text_actual = get_sents(story['sch'])
    else:
        text = utils.resolve_pronouns(story['text'])
        text_actual = get_sents(story['text'])

    sents = get_sents(text)

    if len(sents) != len(text_actual):
        print(len(sents), len(text_actual))
        print(sents)
        print(text_actual)

    keywords, pattern = get_keywords_pattern_tuple(question['text'],
                                                   question['par'])

    keywords += kw_adds

    for i in range(len(sents)):
        words = nltk.word_tokenize(sents[i])
        words_pos = nltk.pos_tag(words)
        words = list(
            filter(
                lambda x: x not in
                (stop_words + [':', '`', '’', ',', '.', '!', "'", '"', '?']),
                words))
        sim_score = utils.get_similarity(list(set(words)), list(set(keywords)),
                                         story['sid'], question['qid'])

        words = list(
            map(lambda x: lmtzr.lemmatize(x[0], pos=penn2wn(x[1])), words_pos))

        quant = len(set(words) & set(keywords))

        if sim_score:
            quant += sim_score
            #joint = (set(words) & set(keywords))
            #disjointq = (set(words)-joint)
            #disjoints =  (set(keywords)- joint)
            #print('\n Question DJ Set : ',disjointq,'\n Sent DJ Set : ',disjoints)

        eligible_sents.append((quant, text_actual[i], i))

    eligible_sents = sorted(eligible_sents,
                            key=operator.itemgetter(0),
                            reverse=True)
    best = eligible_sents[0][1]
    index = eligible_sents[0][2]
    return best, index
Пример #5
0
    def _compare_mention_with_node(self,
                                   mention_candidate,
                                   node,
                                   threshold,
                                   groundings,
                                   modes=None,
                                   initial_mode_weight=1.0,
                                   verbose=0):

        if modes is None:
            modes = [
                utils.SimilarityMode.
                COMPARE_MENTION_KEYWORDS_TO_EXAMPLES_AVG_USING_BERT,
                utils.SimilarityMode.COMPARE_MENTION_KEYWORDS_TO_EXEMPLARS_AVG,
                utils.SimilarityMode.COMPARE_MENTION_STRING_TO_EXEMPLARS_AVG,
                utils.SimilarityMode.COMPARE_MENTION_STRING_TO_TYPE_NAME,
            ]

        # true backoff approach to ground this node
        groundings_key = Grounder.get_grounding_key(node)
        mode_weight = initial_mode_weight
        for mode in modes:
            similarity = utils.get_similarity(mention_candidate, node, mode,
                                              self._ontology.embeddings)
            similarity *= mode_weight

            if verbose > 0:
                print(
                    "Ontology node: {}\nExemplars:{}\nProcesses:{}\nParticipant"
                    "s:{}\nProperties:{}\nGrounding mode: {}\nSimilarity: {}"
                    "\nMode weight: {}".format(
                        node.get_path(), node.get_exemplars_with_weights(),
                        node.get_processes(), node.get_participants(),
                        node.get_properties(), mode.name, similarity,
                        mode_weight))

            if groundings_key in groundings:
                if similarity > groundings[groundings_key][0]:
                    if verbose > 0:
                        print("Updating previous score {}".format(
                            groundings[groundings_key][0]))
                    groundings[groundings_key] = (similarity, mode)
            else:
                groundings[groundings_key] = (similarity, mode)

            # use threshold only to determine backoff strategy
            if similarity > threshold:
                break
            if verbose > 0:
                print("-- No grounding with this mode.  Backing off...".format(
                    mode.name))

            mode_weight *= 0.9
Пример #6
0
  def __init__(self,
               model_to_explain,
               original_graph,
               discount_factor,
               weight_sim=0.5,
               similarity_measure="neural_encoding",
               **kwargs):

    super(CF_Cycliq, self).__init__(**kwargs)
    self.class_to_optimise = 1 - original_graph.y.item()

    self.discount_factor = discount_factor
    self.model_to_explain = model_to_explain
    self.weight_sim = weight_sim


    self.similarity, self.make_encoding, \
      self.original_encoding = get_similarity(similarity_measure,
                                              model_to_explain,
                                              original_graph)
Пример #7
0
    def __init__(self,
                 model_to_explain,
                 original_molecule,
                 discount_factor,
                 fp_len,
                 fp_rad,
                 similarity_set=None,
                 weight_sim=0.5,
                 similarity_measure="tanimoto",
                 **kwargs):
        super(CF_Tox21, self).__init__(**kwargs)

        self.class_to_optimise = 1 - original_molecule.y.item()
        self.discount_factor = discount_factor
        self.model_to_explain = model_to_explain
        self.weight_sim = weight_sim


        self.similarity, self.make_encoding, \
            self.original_encoding = get_similarity(similarity_measure,
                                                    model_to_explain,
                                                    original_molecule,
                                                    fp_len,
                                                    fp_rad)
Пример #8
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Computing Duality Diagram Similarity between Taskonomy Tasks')
    parser.add_argument(
        '-d',
        '--dataset',
        help=
        'image dataset to use for computing DDS: options are [pascal_5000, taskonomy_5000, nyuv2]',
        default="pascal_5000",
        type=str)
    parser.add_argument('-fd',
                        '--feature_dir',
                        help='path to saved features root directory',
                        default="./features/",
                        type=str)
    parser.add_argument('-fdt',
                        '--feature_dir_taskonomy',
                        help='path to saved features from taskonomy models',
                        default="./features/taskonomy_activations/",
                        type=str)
    parser.add_argument('-fdp',
                        '--feature_dir_pascal',
                        help='path to saved features from pascal models',
                        default="./features/pascal_activations/",
                        type=str)
    parser.add_argument('-sd',
                        '--save_dir',
                        help='path to save the DDS results',
                        default="./results/DDScomparison_pascal",
                        type=str)
    parser.add_argument('-n',
                        '--num_images',
                        help='number of images to compute DDS',
                        default=200,
                        type=int)
    parser.add_argument('-i',
                        '--num_iters',
                        help='number of iterations for bootstrap',
                        default=100,
                        type=int)
    args = vars(parser.parse_args())

    num_images = args['num_images']
    dataset = args['dataset']
    taskonomy_feats_path = os.path.join(args['feature_dir_taskonomy'], dataset)
    pascal_feats_path = os.path.join(args['feature_dir_pascal'], dataset)
    num_repetitions = args['num_iters']
    features_filename = os.path.join(
        "./features", "taskonomy_pascal_feats_" + args['dataset'] + ".npy")
    num_total_images = 5000
    if dataset == 'nyuv2':
        num_total_images = 1449
    save_dir = os.path.join(args['save_dir'], dataset)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    task_list = list_of_tasks.split(' ')
    print(task_list)

    taskonomy_data = get_features(taskonomy_feats_path, pascal_feats_path,
                                  features_filename)

    # setting up DDS using Q,D,f,g for kernels
    kernel_type = ['rbf', 'lap', 'linear']  # possible kernels (f in DDS)
    feature_norm_type = ['znorm']  # possible normalizations (Q,D in DDS)

    save_path = os.path.join(save_dir, 'kernels.npy')
    affinity_ablation = {}
    for kernel in (kernel_type):
        affinity_ablation[kernel] = {}
        for feature_norm in feature_norm_type:
            np.random.seed(1993)
            indices = []
            for i in range(num_repetitions):
                indices.append(
                    np.random.choice(range(num_total_images),
                                     num_images,
                                     replace=False))
            print(kernel, feature_norm)
            affinity_matrix = np.zeros((num_repetitions, len(task_list)),
                                       float)
            for i in tqdm(range(num_repetitions)):
                method = kernel + "__" + feature_norm
                start = time.time()
                for index1, task1 in (enumerate(task_list)):
                    affinity_matrix[i,index1] = get_similarity(taskonomy_data[task1][indices[i],:],\
                                                               taskonomy_data['pascal_voc_segmentation'][indices[i],:],\
                                                               kernel,feature_norm)
                end = time.time()
                print("Method is ", method)
                print("Time taken is ", end - start)
            affinity_ablation[kernel][feature_norm] = affinity_matrix

    np.save(save_path, affinity_ablation)

    # setting up DDS using Q,D,f,g for distance functions
    save_path = os.path.join(save_dir, 'rdms.npy')
    dist_type = ['pearson', 'euclidean', 'cosine']
    affinity_ablation = {}
    for dist in (dist_type):
        affinity_ablation[dist] = {}
        for feature_norm in feature_norm_type:
            np.random.seed(1993)
            indices = []
            for i in range(num_repetitions):
                indices.append(
                    np.random.choice(range(num_total_images),
                                     num_images,
                                     replace=False))
            print(dist, feature_norm)
            affinity_matrix = np.zeros((num_repetitions, len(task_list)),
                                       float)
            for i in tqdm(range(num_repetitions)):
                method = dist + "__" + feature_norm
                start = time.time()
                for index1, task1 in (enumerate(task_list)):
                    affinity_matrix[i,index1] = get_similarity_from_rdms(taskonomy_data[task1][indices[i],:],\
                                                               taskonomy_data['pascal_voc_segmentation'][indices[i],:],\
                                                               dist,feature_norm)
                end = time.time()
                print("Method is ", method)
                print("Time taken is ", end - start)
            affinity_ablation[dist][feature_norm] = affinity_matrix

    np.save(save_path, affinity_ablation)
Пример #9
0
 def is_looping(self, threshold=0.9):
     if len(self.results) >= 2:
         similarity = get_similarity(self.results[-1], self.results[-2])
         if similarity > threshold:
             return True
     return False
Пример #10
0
import utils

print('\n\nWelcome to the Yoga pose estimator')
print('----------------------------------\n')

print('Please enter 1 or 2: ')
print('1. Yoga pose from web camera')
choice = int(input('2. Yoga picture from computer\n'))
if choice == 1:
    print('Press q to start the count down timer and esc to exit once the picture is taken')
    utils.create_img_from_cam()
    path = 'images/camera.jpg'
elif choice == 2:
    path = input('Please enter the path of your image: ')
else:
    print('Choice is invalid')

pose = utils.classify_pose(path)
print("Yoga pose: ", pose)
print("Cosine similarity: ", "{:.2f}".format(utils.get_similarity(path, pose)))
utils.show_vectors(path)
Пример #11
0
def main():
    parser = argparse.ArgumentParser(description='Computing Duality Diagram Similarity between Taskonomy Tasks')
    parser.add_argument('-d','--dataset', help='image dataset to use for computing DDS: options are [pascal_5000, taskonomy_5000, nyuv2]', default = "taskonomy_5000", type=str)
    parser.add_argument('-fd','--feature_dir', help='path to saved features from taskonomy models', default = "./features/", type=str)
    parser.add_argument('-sd','--save_dir', help='path to save the DDS results', default = "./results/DDScomparison_taskonomy", type=str)
    parser.add_argument('-n','--num_images', help='number of images to compute DDS', default = 200, type=int)
    args = vars(parser.parse_args())

    num_images = args['num_images']
    dataset = args['dataset']
    features_filename = os.path.join(args['feature_dir'],"taskonomy_pascal_feats_" + args['dataset'] + ".npy")
    save_dir = os.path.join(args['save_dir'],dataset)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    task_list = list_of_tasks.split(' ')

    taskonomy_data = get_features(features_filename,num_images) # function that returns features from taskonomy models for first #num_images


    # setting up DDS using Q,D,f,g for kernels
    kernel_type = ['rbf','lap','linear'] # possible kernels (f in DDS)
    feature_norm_type = ['None','centering','znorm','group_norm','instance_norm','layer_norm','batch_norm'] # possible normalizations (Q,D in DDS)


    save_path = os.path.join(save_dir,'kernels.npy')
    affinity_ablation = {}
    for kernel in (kernel_type):
        affinity_ablation[kernel]={}
        for feature_norm in feature_norm_type:
            affinity_matrix = np.zeros((len(task_list), len(task_list)), float)
            method = kernel + "__" + feature_norm
            start = time.time()
            for index1,task1 in tqdm(enumerate(task_list)):
                for index2,task2 in (enumerate(task_list)):
                    if index1 > index2:
                        continue
                    affinity_matrix[index1,index2] = get_similarity(taskonomy_data[task1],\
                                                                    taskonomy_data[task2],\
                                                                    kernel,feature_norm)
                    affinity_matrix[index2,index1] = affinity_matrix[index1,index2]
            end = time.time()
            print("Method is ", method)
            print("Time taken is ", end - start)
            affinity_ablation[kernel][feature_norm] = affinity_matrix

        np.save(save_path,affinity_ablation)

    # setting up DDS using Q,D,f,g for distance functions
    save_path = os.path.join(save_dir,'rdms.npy')
    dist_type = ['pearson', 'euclidean', 'cosine']
    affinity_ablation = {}
    for dist in (dist_type):
        affinity_ablation[dist]={}
        for feature_norm in (feature_norm_type):
            affinity_matrix = np.zeros((len(task_list), len(task_list)), float)
            method = dist + "__" + feature_norm
            start = time.time()
            for index1,task1 in tqdm(enumerate(task_list)):
                for index2,task2 in enumerate(task_list):
                    if index1 > index2:
                        continue
                    affinity_matrix[index1,index2] = get_similarity_from_rdms(taskonomy_data[task1],\
                                                                              taskonomy_data[task2],\
                                                                              dist,feature_norm)
                    affinity_matrix[index2,index1] = affinity_matrix[index1,index2]
            end = time.time()
            print("Method is ", method)
            print("Time taken is ", end - start)
            affinity_ablation[dist][feature_norm]=affinity_matrix
    np.save(save_path,affinity_ablation)