示例#1
0
    def extract(self, item, query, target_keys=None):

        if target_keys:
            self.print_instance_difficulty(item, query)

        item_hrr = HRR(data=item)
        query_hrr = HRR(data=query)
        noisy_hrr = item_hrr.convolve(~query_hrr)
        return self.associate(noisy_hrr.v, target_keys)
示例#2
0
    def extract(self, item, query, target_keys=None):

        if target_keys:
            self.print_instance_difficulty(item, query)

        item_hrr = HRR(data=item)
        query_hrr = HRR(data=query)
        noisy_hrr = item_hrr.convolve(~query_hrr)
        return self.associate(noisy_hrr.v, target_keys)
示例#3
0
    def get_stats(self, clean_result_vector, goal, other_answers, fp):
        size = np.linalg.norm(clean_result_vector)

        if not goal:
            comparisons = self.find_matches(clean_result_vector,
                                            self.semantic_pointers)

            largest_match = max(comparisons, key=lambda x: x[1])
            return (largest_match[0], largest_match[1], size)
        else:
            comparisons = self.find_matches(clean_result_vector,
                                            self.semantic_pointers,
                                            exempt=[goal])

            if other_answers:
                invalids = []
                valids = []
                for c in comparisons:
                    if c[0] in other_answers:
                        valids.append(c)
                    else:
                        invalids.append(c)

                max_invalid = max(invalids, key=lambda x: x[1])
                max_invalid_key, max_invalid_match = max_invalid

                if len(valids) == 0:
                    second_key, second_match = max_invalid
                else:
                    max_valid = max(valids, key=lambda x: x[1])
                    max_valid_key, max_valid_match = max_valid

                    if max_invalid_match > max_valid_match:
                        second_key, second_match = max_invalid
                    else:
                        second_key, second_match = max_valid

            else:
                second_key, second_match = max(comparisons, key=lambda x: x[1])
                max_invalid_match = second_match

            hrr_vec = HRR(data=self.semantic_pointers[goal])
            target_match = hrr_vec.compare(HRR(data=clean_result_vector))

            if target_match > second_match:
                clean_result = goal
            else:
                clean_result = second_key

            return (clean_result, target_match, second_match, size,
                    max_invalid_match)
示例#4
0
    def get_stats(self, clean_result_vector, goal, other_answers, fp):
        size = np.linalg.norm(clean_result_vector)

        if not goal:
            comparisons = self.find_matches(clean_result_vector,
                                            self.semantic_pointers)

            largest_match = max(comparisons, key=lambda x: x[1])
            return (largest_match[0], largest_match[1], size)
        else:
            comparisons = self.find_matches(
                clean_result_vector, self.semantic_pointers, exempt=[goal])

            if other_answers:
                invalids = []
                valids = []
                for c in comparisons:
                    if c[0] in other_answers:
                        valids.append(c)
                    else:
                        invalids.append(c)

                max_invalid = max(invalids, key=lambda x: x[1])
                max_invalid_key, max_invalid_match = max_invalid

                if len(valids) == 0:
                    second_key, second_match = max_invalid
                else:
                    max_valid = max(valids, key=lambda x: x[1])
                    max_valid_key, max_valid_match = max_valid

                    if max_invalid_match > max_valid_match:
                        second_key, second_match = max_invalid
                    else:
                        second_key, second_match = max_valid

            else:
                second_key, second_match = max(comparisons, key=lambda x: x[1])
                max_invalid_match = second_match

            hrr_vec = HRR(data=self.semantic_pointers[goal])
            target_match = hrr_vec.compare(HRR(data=clean_result_vector))

            if target_match > second_match:
                clean_result = goal
            else:
                clean_result = second_key

            return (clean_result, target_match,
                    second_match, size, max_invalid_match)
示例#5
0
    def create_role_hrrs(self):

        role_hrrs = {}
        for role in self.sentence_symbols:

            role_hrrs[role] = HRR(self.dimension)

            if self.unitary:
                role_hrrs[role].make_unitary()

        return role_hrrs
示例#6
0
    def run(self):
        expression = self.expression

        dimension = len(self.id_vectors.values()[0])
        expression = expression.replace('!id', 'p0')

        num_ids = expression.count('id')
        expression = expression.replace('id', '%s')
        temp_names = ['id' + str(i) for i in range(num_ids)]
        expression = expression % tuple(temp_names)

        chosen_id_keys = self.rng.sample(self.id_vectors,
                                         expression.count('id') + 1)

        chosen_id_vectors = [
            HRR(data=self.id_vectors[key]) for key in chosen_id_keys
        ]
        target_key = chosen_id_keys[0]

        names_dict = dict(zip(['p0'] + temp_names, chosen_id_vectors))
        names_keys_dict = dict(zip(['p0'] + temp_names, chosen_id_keys))

        query_vectors = nf.find_query_vectors(expression, 'p0')
        query_expression = '*'.join(query_vectors)

        temp_names = expression.replace('*', '+').split('+')
        temp_names = [tn.strip() for tn in temp_names]
        unitary_names = [u for u in temp_names if u[-1:] == "u"]

        vocab = Vocabulary(dimension, unitary=unitary_names)
        for n, v in names_dict.iteritems():
            vocab.add(n, v)

        print "expression:", expression
        print "query_expression:", query_expression
        print "unitary_names:", unitary_names
        print "target_key:", target_key
        print "name_keys_dict:", names_keys_dict

        test_vector = eval(expression, {}, vocab)
        test_vector.normalize()

        query_vector = eval(query_expression, {}, vocab)

        result, correct, valid, exact = self.test_link(query_vector.v,
                                                       test_vector.v,
                                                       None,
                                                       target_key,
                                                       self.output_file,
                                                       return_vec=False,
                                                       answers=[target_key])
示例#7
0
    def print_instance_difficulty(self, item, query, target_keys):

        if target_keys:
            # Print data about how difficult the current instance is
            correct_key = target_keys[0]

            item_hrr = HRR(data=item)
            query_hrr = HRR(data=query)
            noisy_hrr = item_hrr.convolve(~query_hrr)

            correct_hrr = HRR(data=self.index_vectors[correct_key])
            sim = noisy_hrr.compare(correct_hrr)
            dot = np.dot(noisy_hrr.v, correct_hrr.v)
            norm = np.linalg.norm(noisy_hrr.v)
            print "Statistics when extraction computed exactly:"
            print ("Cosine Similarity between extracted vector "
                   "(before assoc) and correct index vector: "), sim
            print ("Dot product between extracted vector (before assoc) "
                   "and correct index vector: "), dot
            print "Norm of extracted vector (before assoc): ", norm

            self.ideal_dot = dot

            hrrs = [(key, HRR(data=iv))
                    for key, iv in self.index_vectors.iteritems()
                    if key != correct_key]

            sims = [noisy_hrr.compare(h) for (k, h) in hrrs]
            dots = [np.dot(noisy_hrr.v, h.v) for (k, h) in hrrs]
            sim = max(sims)
            dot = max(dots)

            print "Cosine Similarity of closest incorrect index vector ", sim
            print "Dot product of closest incorrect index vector ", dot

            self.second_dot = dot
示例#8
0
    def __init__(self,
                 index_vectors,
                 stored_vectors,
                 bootstrapper,
                 threshold=0.3,
                 output_dir='.'):

        self.index_vectors = index_vectors
        self.stored_vectors = stored_vectors
        self.threshold = threshold
        self.dimension = len(index_vectors.values()[0])
        self.num_items = len(index_vectors)
        self.hrr_vecs = collections.OrderedDict([
            (key, HRR(data=self.index_vectors[key]))
            for key in self.index_vectors
        ])

        self.similarities = collections.OrderedDict(
            zip(self.index_vectors, [0 for i in range(len(index_vectors))]))

        self.return_vec = True

        self.bootstrapper = bootstrapper
        self.output_dir = output_dir
示例#9
0
    def print_instance_difficulty(self, item, query, target_keys):

        if target_keys:
            # Print data about how difficult the current instance is
            correct_key = target_keys[0]

            item_hrr = HRR(data=item)
            query_hrr = HRR(data=query)
            noisy_hrr = item_hrr.convolve(~query_hrr)

            correct_hrr = HRR(data=self.index_vectors[correct_key])
            sim = noisy_hrr.compare(correct_hrr)
            dot = np.dot(noisy_hrr.v, correct_hrr.v)
            norm = np.linalg.norm(noisy_hrr.v)
            print "Statistics when extraction computed exactly:"
            print(
                "Cosine Similarity between extracted vector "
                "(before assoc) and correct index vector: "), sim
            print(
                "Dot product between extracted vector (before assoc) "
                "and correct index vector: "), dot
            print "Norm of extracted vector (before assoc): ", norm

            self.ideal_dot = dot

            hrrs = [(key, HRR(data=iv))
                    for key, iv in self.index_vectors.iteritems()
                    if key != correct_key]

            sims = [noisy_hrr.compare(h) for (k, h) in hrrs]
            dots = [np.dot(noisy_hrr.v, h.v) for (k, h) in hrrs]
            sim = max(sims)
            dot = max(dots)

            print "Cosine Similarity of closest incorrect index vector ", sim
            print "Dot product of closest incorrect index vector ", dot

            self.second_dot = dot
示例#10
0
    def findAllParents(self,
                       start_key,
                       target_key=None,
                       rtype=[],
                       use_HRR=False,
                       print_output=False):

        if print_output:
            print >> self.output_file, \
                "In find all parents, useHRR=", use_HRR

            print >> self.output_file, "Start:", start_key

            if target_key is not None:
                print >> self.output_file, "Target:", target_key

        use_vecs = use_HRR and self.extractor.return_vec

        level = 0
        if use_vecs:
            layerA = [self.semantic_pointers[start_key]]

            if target_key:
                target_vector = self.semantic_pointers[target_key]
                target_hrr = HRR(data=target_vector)
        else:
            layerA = [start_key]

        layerB = []
        parents = set()

        while len(layerA) > 0:
            word = layerA.pop()

            # test whether we've found the target
            found = False
            if use_vecs:
                word_hrr = HRR(data=word)
                found = target_hrr.compare(word_hrr) > self.decision_threshold
            else:
                found = word == target_key

            if found:
                if print_output:
                    print >> self.output_file, target_key, \
                        "found at level ", level

                return level

            if use_vecs:
                key = self.get_key_from_vector(word, self.semantic_pointers)
            else:
                key = word

            if key:
                if key in parents:
                    continue

                if level > 0:
                    parents.add(key)

                    if print_output:
                        print >> self.output_file, key, \
                            "found at level ", level

                links = []

                if not use_HRR:
                    links = [
                        r[1] for r in self.corpus_dict[word] if r[0] in rtype
                    ]
                else:

                    for symbol in rtype:
                        answers = [
                            r[1] for r in self.corpus_dict[key]
                            if r[0] == symbol
                        ]
                        relation_vec = self.relation_type_vectors[symbol]

                        if len(answers) == 0:
                            target = None
                        else:
                            target = answers[0]

                        relations = filter(
                            lambda x: x[0] in self.relation_type_vectors,
                            self.corpus_dict[key])

                        num_relations = len(relations)

                        if use_vecs:
                            result = self.test_link(
                                relation_vec,
                                word,
                                key,
                                target,
                                self.output_file,
                                return_vec=True,
                                depth=level,
                                num_relations=num_relations,
                                answers=answers)

                            links.append(result)

                        else:
                            results = self.test_link(
                                relation_vec,
                                None,
                                key,
                                target,
                                self.output_file,
                                return_vec=False,
                                depth=level,
                                num_relations=num_relations,
                                answers=answers)

                            if answers:
                                results = results[0]

                            links.extend(results)

                if len(links) > 0:
                    layerB.extend(links)

            if len(layerA) == 0:
                level = level + 1
                layerA = layerB
                layerB = []

        if target_key is None:
            return list(parents)
        else:
            return -1
示例#11
0
    def plot_simulation(self, target_keys):
        then = datetime.datetime.now()

        correct_key = None
        if target_keys:
            correct_key = target_keys[0]

        sim = self.simulator
        t = sim.trange()

        max_val = 5.0 / np.sqrt(self.dimension)

        gs = gridspec.GridSpec(7, 2)
        fig = plt.figure(figsize=(10, 10))

        ax = plt.subplot(gs[0, 0])

        plt.plot(t, self.data[self.D_probe], label='D')
        title = 'Input to associative memory'
        ax.text(.01, 1.20, title, horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-max_val, max_val))

        ax = plt.subplot(gs[0, 1])
        plt.plot(t, self.data[self.output_probe], label='Output')
        title = 'Output of associative memory'
        ax.text(.01, 1.20, title, horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-max_val, max_val))

        ax = plt.subplot(gs[1:3, :])

        if len(self.index_vectors) < 1000:
            for key, v in self.index_vectors.iteritems():
                input_sims = np.dot(self.data[self.D_probe], v)
                label = str(key[1])
                if key == correct_key:
                    plt.plot(t, input_sims, '--', label=label + '*')
                else:
                    plt.plot(t, input_sims, label=label)

            title = (
                'Dot product between id vectors and input to assoc memory.\n'
                'Target %s is dashed line.' % str(correct_key))

            ax.text(.01, 0.80, title, horizontalalignment='left',
                    transform=ax.transAxes)
            # plt.legend(bbox_to_anchor=(-0.03, 0.5), loc='center right')
            if self.ideal_dot:
                ax.text(.01, 0.10, "Ideal dot: " + str(self.ideal_dot),
                        horizontalalignment='left', transform=ax.transAxes)
            if self.second_dot:
                ax.text(.99, 0.10, "Second dot: " + str(self.second_dot),
                        horizontalalignment='right', transform=ax.transAxes)

            plt.ylim((-1.0, 1.5))
            plt.axhline(1.0, ls=':', c='k')

        ax = plt.subplot(gs[3:5, :])
        for key, p in self.assoc_probes.iteritems():
            if key == correct_key:
                plt.plot(t, self.data[p], '--')
            else:
                plt.plot(t, self.data[p])

        title = (
            'Decoded values of association populations.\n' +
            'Target %s is dashed line.' % str(correct_key))

        ax.text(.01, 0.80, title, horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-0.2, 1.5))
        plt.axhline(y=1.0, ls=':', c='k')

        ax = plt.subplot(gs[5:7, :])
        before_ls = '--'
        after_ls = '-'
        before_norms = [np.linalg.norm(v) for v in self.data[self.D_probe]]
        after_norms = [np.linalg.norm(v) for v in self.data[self.output_probe]]

        plt.plot(t, before_norms, before_ls, c='g', label='Norm - Before')
        plt.plot(t, after_norms, after_ls, c='g', label='Norm - After')

        if correct_key is not None:
            correct_index_hrr = HRR(data=self.index_vectors[correct_key])
            correct_stored_hrr = HRR(data=self.stored_vectors[correct_key])

            before_sims = [correct_index_hrr.compare(HRR(data=i))
                           for i in self.data[self.D_probe]]

            after_sims = [correct_stored_hrr.compare(HRR(data=o))
                          for o in self.data[self.output_probe]]

            plt.plot(t, before_sims, before_ls, c='b',
                     label='Cosine Sim - Before')
            plt.plot(t, after_sims, after_ls, c='b',
                     label='Cosine Sim - After')

        title = 'Before and After Associative Memory'
        ax.text(.01, 0.90, title, horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-1.0, 1.5))
        plt.legend(loc=4, prop={'size': 6})
        plt.axhline(y=1.0, ls=':', c='k')
        ax.set_xlabel('Time (s)')

        date_time_string = str(datetime.datetime.now()).split('.')[0]
        date_time_string = reduce(lambda y, z: string.replace(y, z, "_"),
                                  [date_time_string, ":", ".", " ", "-"])

        plot_name = 'neural_extraction_' + date_time_string + ".png"
        plot_path = os.path.join(self.output_dir, plot_name)

        plt.savefig(plot_path)

        symlink_name = os.path.join(
            self.output_dir, 'latest_neural_extraction')
        make_sym_link(plot_name, symlink_name)

        now = datetime.datetime.now()
        self.write_to_runtime_file(now - then, "plot")

        plt.close(fig)
示例#12
0
    def form_knowledge_base(self, id_vecs=True, unitary=False):

        # Check existence of corpus
        if self.corpus_dict is None:
            raise Exception("Attempted to form the knowledge "
                            "base without a corpus.")

        print "Number of items in knowledge base:", len(self.corpus_dict)

        if not id_vecs:
            print "Processing Corpus"
            self.processCorpus()

        print "Generating relation type vectors"
        print "Using relation types: ", self.relation_symbols

        self.relation_type_vectors = {symbol: HRR(self.dimension)
                                      for symbol in self.relation_symbols}
        if unitary:
            for k, h in self.relation_type_vectors.iteritems():
                h.make_unitary()

        if id_vecs:
            key_order = self.corpus_dict.keys()
        else:
            # Order words by the dependencies of their definitions
            # Only have to do this if we're not using ID-vectors
            key_order = []
            resolved = set(self.relation_symbols)

            dependencies = {}
            for key in self.corpus_dict.keys():
                dependencies[key] = set(
                    [tag[1] for tag in self.corpus_dict[key]
                     if tag[0] in self.relation_symbols])

            while len(key_order) < (len(self.corpus_dict)
                                    + len(self.relation_symbols)):

                resolvable = set()

                for key in dependencies:
                    if dependencies[key].issubset(resolved):
                        resolvable.add(key)

                # add the resolvable keys to the order list and resolved set
                key_order.extend(resolvable)
                resolved = resolved.union(resolvable)

                # remove resolved tags from the dependency dictionary
                for r in resolvable:
                    del dependencies[r]

                # if no items are resolvable, we're stuck
                if len(resolvable) == 0:
                    break

            del resolved
            del resolvable
            if len(key_order) < len(self.corpus_dict):
                raise Exception("Dependency resolution failed.")

        self.semantic_pointers = collections.OrderedDict()

        print "Generating ID-vectors"

        if id_vecs:
            self.id_vectors = collections.OrderedDict()

            for key in key_order:
                self.id_vectors[key] = HRR(self.dimension)
        else:
            self.id_vectors = self.semantic_pointers

        print "Generating HRR vectors"
        for key in key_order:
            relations = filter(
                lambda x: x[0] in self.relation_symbols,
                self.corpus_dict[key])

            if len(relations) == 0:
                self.semantic_pointers[key] = HRR(self.dimension)
                continue

            semantic_pointer = HRR(data=np.zeros(self.dimension))

            for n in range(self.sp_noise):
                semantic_pointer += HRR(self.dimension)

            for relation in relations:

                id_vector = self.id_vectors[relation[1]]

                relation_type_vector = self.relation_type_vectors[relation[0]]

                pair = id_vector * relation_type_vector

                semantic_pointer += pair

            if self.normalize:
                semantic_pointer.normalize()

            self.semantic_pointers[key] = semantic_pointer

        # convert all vectors from hrrs to numpy ndarrays
        for k in key_order:
            h = self.semantic_pointers[k]
            self.semantic_pointers[k] = h.v

        if id_vecs:
            for k in key_order:
                h = self.id_vectors[k]
                self.id_vectors[k] = h.v

        for k in self.relation_type_vectors:
            h = self.relation_type_vectors[k]
            self.relation_type_vectors[k] = h.v
示例#13
0
    def form_knowledge_base(self, id_vecs=True, unitary=False):

        # Check existence of corpus
        if self.corpus_dict is None:
            raise Exception("Attempted to form the knowledge "
                            "base without a corpus.")

        print "Number of items in knowledge base:", len(self.corpus_dict)

        if not id_vecs:
            print "Processing Corpus"
            self.processCorpus()

        print "Generating relation type vectors"
        print "Using relation types: ", self.relation_symbols

        self.relation_type_vectors = {
            symbol: HRR(self.dimension)
            for symbol in self.relation_symbols
        }
        if unitary:
            for k, h in self.relation_type_vectors.iteritems():
                h.make_unitary()

        if id_vecs:
            key_order = self.corpus_dict.keys()
        else:
            # Order words by the dependencies of their definitions
            # Only have to do this if we're not using ID-vectors
            key_order = []
            resolved = set(self.relation_symbols)

            dependencies = {}
            for key in self.corpus_dict.keys():
                dependencies[key] = set([
                    tag[1] for tag in self.corpus_dict[key]
                    if tag[0] in self.relation_symbols
                ])

            while len(key_order) < (len(self.corpus_dict) +
                                    len(self.relation_symbols)):

                resolvable = set()

                for key in dependencies:
                    if dependencies[key].issubset(resolved):
                        resolvable.add(key)

                # add the resolvable keys to the order list and resolved set
                key_order.extend(resolvable)
                resolved = resolved.union(resolvable)

                # remove resolved tags from the dependency dictionary
                for r in resolvable:
                    del dependencies[r]

                # if no items are resolvable, we're stuck
                if len(resolvable) == 0:
                    break

            del resolved
            del resolvable
            if len(key_order) < len(self.corpus_dict):
                raise Exception("Dependency resolution failed.")

        self.semantic_pointers = collections.OrderedDict()

        print "Generating ID-vectors"

        if id_vecs:
            self.id_vectors = collections.OrderedDict()

            for key in key_order:
                self.id_vectors[key] = HRR(self.dimension)
        else:
            self.id_vectors = self.semantic_pointers

        print "Generating HRR vectors"
        for key in key_order:
            relations = filter(lambda x: x[0] in self.relation_symbols,
                               self.corpus_dict[key])

            if len(relations) == 0:
                self.semantic_pointers[key] = HRR(self.dimension)
                continue

            semantic_pointer = HRR(data=np.zeros(self.dimension))

            for n in range(self.sp_noise):
                semantic_pointer += HRR(self.dimension)

            for relation in relations:

                id_vector = self.id_vectors[relation[1]]

                relation_type_vector = self.relation_type_vectors[relation[0]]

                pair = id_vector * relation_type_vector

                semantic_pointer += pair

            if self.normalize:
                semantic_pointer.normalize()

            self.semantic_pointers[key] = semantic_pointer

        # convert all vectors from hrrs to numpy ndarrays
        for k in key_order:
            h = self.semantic_pointers[k]
            self.semantic_pointers[k] = h.v

        if id_vecs:
            for k in key_order:
                h = self.id_vectors[k]
                self.id_vectors[k] = h.v

        for k in self.relation_type_vectors:
            h = self.relation_type_vectors[k]
            self.relation_type_vectors[k] = h.v
示例#14
0
    def findAllParents(self, start_key, target_key=None, rtype=[],
                       use_HRR=False, print_output=False):

        if print_output:
            print >> self.output_file, \
                "In find all parents, useHRR=", use_HRR

            print >> self.output_file, "Start:", start_key

            if target_key is not None:
                print >> self.output_file, "Target:", target_key

        use_vecs = use_HRR and self.extractor.return_vec

        level = 0
        if use_vecs:
            layerA = [self.semantic_pointers[start_key]]

            if target_key:
                target_vector = self.semantic_pointers[target_key]
                target_hrr = HRR(data=target_vector)
        else:
            layerA = [start_key]

        layerB = []
        parents = set()

        while len(layerA) > 0:
            word = layerA.pop()

            # test whether we've found the target
            found = False
            if use_vecs:
                word_hrr = HRR(data=word)
                found = target_hrr.compare(word_hrr) > self.decision_threshold
            else:
                found = word == target_key

            if found:
                if print_output:
                    print >> self.output_file, target_key, \
                        "found at level ", level

                return level

            if use_vecs:
                key = self.get_key_from_vector(word, self.semantic_pointers)
            else:
                key = word

            if key:
                if key in parents:
                    continue

                if level > 0:
                    parents.add(key)

                    if print_output:
                        print >> self.output_file, key, \
                            "found at level ", level

                links = []

                if not use_HRR:
                    links = [r[1] for r in self.corpus_dict[word]
                             if r[0] in rtype]
                else:

                    for symbol in rtype:
                        answers = [r[1] for r in self.corpus_dict[key]
                                   if r[0] == symbol]
                        relation_vec = self.relation_type_vectors[symbol]

                        if len(answers) == 0:
                            target = None
                        else:
                            target = answers[0]

                        relations = filter(
                            lambda x: x[0] in self.relation_type_vectors,
                            self.corpus_dict[key])

                        num_relations = len(relations)

                        if use_vecs:
                            result = self.test_link(
                                relation_vec, word, key, target,
                                self.output_file,
                                return_vec=True, depth=level,
                                num_relations=num_relations,
                                answers=answers)

                            links.append(result)

                        else:
                            results = self.test_link(
                                relation_vec, None, key, target,
                                self.output_file,
                                return_vec=False, depth=level,
                                num_relations=num_relations, answers=answers)

                            if answers:
                                results = results[0]

                            links.extend(results)

                if len(links) > 0:
                    layerB.extend(links)

            if len(layerA) == 0:
                level = level + 1
                layerA = layerB
                layerB = []

        if target_key is None:
            return list(parents)
        else:
            return -1
示例#15
0
    def run(self):
        self.dimension = len(self.id_vectors.values()[0])

        self.role_hrrs = self.create_role_hrrs()
        self.pos_map = self.create_pos_map()

        score = defaultdict(float)

        for i in range(self.num_trials):
            title = "New Sentence Test"
            if self.deep:
                title += "- Deep"

            tools.print_header(self.output_file, title)

            sentence = self.generate_sentence()

            if self.deep:
                embed = self.rng.sample(sentence.keys(), 1)[0]

                embedded_sentence = self.generate_sentence()

                del sentence[embed]

                for role in embedded_sentence.keys():
                    sentence[embed + role] = embedded_sentence[role]

            tag_vectors = {}
            sentence_hrr = HRR(data=np.zeros(self.dimension))

            # Pick role-fillers and create HRR representing the sentence
            # Also store the hrr to use as the query to extract each synset
            # included in the sentence.
            for role in sentence:
                tag_hrr = [self.role_hrrs[x] for x in role]
                tag_hrr = reduce(lambda x, y: x * y, tag_hrr)

                synset = sentence[role]

                sentence_hrr += tag_hrr * HRR(data=self.id_vectors[synset])

                tag_vectors[role] = tag_hrr.v

            sentence_hrr.normalize()

            sentence_vector = sentence_hrr.v

            print >> self.output_file, "Roles in sentence:"
            print >> self.output_file, sentence

            # ask about parts of the sentence
            sentence_score = defaultdict(float)
            sentence_length = defaultdict(float)
            for role in sentence.keys():

                answer = sentence[role]

                self.current_start_key = None
                self.current_target_keys = [answer]
                self.current_num_relations = len(sentence)

                print >> self.output_file, "\nTesting ", role

                result, correct, valid, exact = self.test_link(
                    tag_vectors[role],
                    sentence_vector,
                    None,
                    answer,
                    output_file=self.output_file,
                    return_vec=False,
                    num_relations=len(sentence),
                    answers=[answer])

                depth = len(role)
                if correct:
                    sentence_score[depth] += 1
                    print >> self.output_file, "Correct."
                else:
                    print >> self.output_file, "Incorrect."

                sentence_length[depth] += 1

                if self.short:
                    break

            for d in sentence_length:
                sentence_percent = sentence_score[d] / sentence_length[d]

                print >> self.output_file, \
                    "Percent correct for current sentence at depth %d: %f" \
                    % (d, sentence_percent)

                score[d] = score[d] + sentence_percent

        for d in score:
            print "Sentence test score at depth %d: %f out of %d" \
                % (d, score[d], self.num_trials)

            percent = score[d] / self.num_trials

            title = "Sentence Test Summary - Depth = %d" % d
            tools.print_header(self.output_file, title)
            print >> self.output_file, "Correct: ", score[d]
            print >> self.output_file, "Total: ", self.num_trials
            print >> self.output_file, "Percent: ", percent
            tools.print_footer(self.output_file, title)

            self.add_data("sentence_score_%d" % d, percent)
示例#16
0
    def run(self):
        self.dimension = len(self.id_vectors.values()[0])

        self.role_hrrs = self.create_role_hrrs()
        self.pos_map = self.create_pos_map()

        score = defaultdict(float)

        for i in range(self.num_trials):
            title = "New Sentence Test"
            if self.deep:
                title += "- Deep"

            tools.print_header(self.output_file, title)

            sentence = self.generate_sentence()

            if self.deep:
                embed = self.rng.sample(sentence.keys(), 1)[0]

                embedded_sentence = self.generate_sentence()

                del sentence[embed]

                for role in embedded_sentence.keys():
                    sentence[embed + role] = embedded_sentence[role]

            tag_vectors = {}
            sentence_hrr = HRR(data=np.zeros(self.dimension))

            # Pick role-fillers and create HRR representing the sentence
            # Also store the hrr to use as the query to extract each synset
            # included in the sentence.
            for role in sentence:
                tag_hrr = [self.role_hrrs[x] for x in role]
                tag_hrr = reduce(lambda x, y: x * y, tag_hrr)

                synset = sentence[role]

                sentence_hrr += tag_hrr * HRR(data=self.id_vectors[synset])

                tag_vectors[role] = tag_hrr.v

            sentence_hrr.normalize()

            sentence_vector = sentence_hrr.v

            print >> self.output_file, "Roles in sentence:"
            print >> self.output_file, sentence

            # ask about parts of the sentence
            sentence_score = defaultdict(float)
            sentence_length = defaultdict(float)
            for role in sentence.keys():

                answer = sentence[role]

                self.current_start_key = None
                self.current_target_keys = [answer]
                self.current_num_relations = len(sentence)

                print >> self.output_file, "\nTesting ", role

                result, correct, valid, exact = self.test_link(
                    tag_vectors[role], sentence_vector, None, answer,
                    output_file=self.output_file, return_vec=False,
                    num_relations=len(sentence), answers=[answer])

                depth = len(role)
                if correct:
                    sentence_score[depth] += 1
                    print >> self.output_file, "Correct."
                else:
                    print >> self.output_file, "Incorrect."

                sentence_length[depth] += 1

                if self.short:
                    break

            for d in sentence_length:
                sentence_percent = sentence_score[d] / sentence_length[d]

                print >> self.output_file, \
                    "Percent correct for current sentence at depth %d: %f" \
                    % (d, sentence_percent)

                score[d] = score[d] + sentence_percent

        for d in score:
            print "Sentence test score at depth %d: %f out of %d" \
                % (d, score[d], self.num_trials)

            percent = score[d] / self.num_trials

            title = "Sentence Test Summary - Depth = %d" % d
            tools.print_header(self.output_file, title)
            print >> self.output_file, "Correct: ", score[d]
            print >> self.output_file, "Total: ", self.num_trials
            print >> self.output_file, "Percent: ", percent
            tools.print_footer(self.output_file, title)

            self.add_data("sentence_score_%d" % d, percent)
示例#17
0
    def find_matches(self, vector, vector_dict, exempt=[]):
        hrr_vec = HRR(data=vector)

        for key in vector_dict.keys():
            if key not in exempt:
                yield (key, hrr_vec.compare(HRR(data=vector_dict[key])))
示例#18
0
    def plot_simulation(self, target_keys):
        then = datetime.datetime.now()

        correct_key = None
        if target_keys:
            correct_key = target_keys[0]

        sim = self.simulator
        t = sim.trange()

        max_val = 5.0 / np.sqrt(self.dimension)

        gs = gridspec.GridSpec(7, 2)
        fig = plt.figure(figsize=(10, 10))

        ax = plt.subplot(gs[0, 0])

        plt.plot(t, self.data[self.D_probe], label='D')
        title = 'Input to associative memory'
        ax.text(.01,
                1.20,
                title,
                horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-max_val, max_val))

        ax = plt.subplot(gs[0, 1])
        plt.plot(t, self.data[self.output_probe], label='Output')
        title = 'Output of associative memory'
        ax.text(.01,
                1.20,
                title,
                horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-max_val, max_val))

        ax = plt.subplot(gs[1:3, :])

        if len(self.index_vectors) < 1000:
            for key, v in self.index_vectors.iteritems():
                input_sims = np.dot(self.data[self.D_probe], v)
                label = str(key[1])
                if key == correct_key:
                    plt.plot(t, input_sims, '--', label=label + '*')
                else:
                    plt.plot(t, input_sims, label=label)

            title = (
                'Dot product between id vectors and input to assoc memory.\n'
                'Target %s is dashed line.' % str(correct_key))

            ax.text(.01,
                    0.80,
                    title,
                    horizontalalignment='left',
                    transform=ax.transAxes)
            # plt.legend(bbox_to_anchor=(-0.03, 0.5), loc='center right')
            if self.ideal_dot:
                ax.text(.01,
                        0.10,
                        "Ideal dot: " + str(self.ideal_dot),
                        horizontalalignment='left',
                        transform=ax.transAxes)
            if self.second_dot:
                ax.text(.99,
                        0.10,
                        "Second dot: " + str(self.second_dot),
                        horizontalalignment='right',
                        transform=ax.transAxes)

            plt.ylim((-1.0, 1.5))
            plt.axhline(1.0, ls=':', c='k')

        ax = plt.subplot(gs[3:5, :])
        for key, p in self.assoc_probes.iteritems():
            if key == correct_key:
                plt.plot(t, self.data[p], '--')
            else:
                plt.plot(t, self.data[p])

        title = ('Decoded values of association populations.\n' +
                 'Target %s is dashed line.' % str(correct_key))

        ax.text(.01,
                0.80,
                title,
                horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-0.2, 1.5))
        plt.axhline(y=1.0, ls=':', c='k')

        ax = plt.subplot(gs[5:7, :])
        before_ls = '--'
        after_ls = '-'
        before_norms = [np.linalg.norm(v) for v in self.data[self.D_probe]]
        after_norms = [np.linalg.norm(v) for v in self.data[self.output_probe]]

        plt.plot(t, before_norms, before_ls, c='g', label='Norm - Before')
        plt.plot(t, after_norms, after_ls, c='g', label='Norm - After')

        if correct_key is not None:
            correct_index_hrr = HRR(data=self.index_vectors[correct_key])
            correct_stored_hrr = HRR(data=self.stored_vectors[correct_key])

            before_sims = [
                correct_index_hrr.compare(HRR(data=i))
                for i in self.data[self.D_probe]
            ]

            after_sims = [
                correct_stored_hrr.compare(HRR(data=o))
                for o in self.data[self.output_probe]
            ]

            plt.plot(t,
                     before_sims,
                     before_ls,
                     c='b',
                     label='Cosine Sim - Before')
            plt.plot(t,
                     after_sims,
                     after_ls,
                     c='b',
                     label='Cosine Sim - After')

        title = 'Before and After Associative Memory'
        ax.text(.01,
                0.90,
                title,
                horizontalalignment='left',
                transform=ax.transAxes)
        plt.ylim((-1.0, 1.5))
        plt.legend(loc=4, prop={'size': 6})
        plt.axhline(y=1.0, ls=':', c='k')
        ax.set_xlabel('Time (s)')

        date_time_string = str(datetime.datetime.now()).split('.')[0]
        date_time_string = reduce(lambda y, z: string.replace(y, z, "_"),
                                  [date_time_string, ":", ".", " ", "-"])

        plot_name = 'neural_extraction_' + date_time_string + ".png"
        plot_path = os.path.join(self.output_dir, plot_name)

        plt.savefig(plot_path)

        symlink_name = os.path.join(self.output_dir,
                                    'latest_neural_extraction')
        make_sym_link(plot_name, symlink_name)

        now = datetime.datetime.now()
        self.write_to_runtime_file(now - then, "plot")

        plt.close(fig)
示例#19
0
    def find_matches(self, vector, vector_dict, exempt=[]):
        hrr_vec = HRR(data=vector)

        for key in vector_dict.keys():
            if key not in exempt:
                yield (key, hrr_vec.compare(HRR(data=vector_dict[key])))