Пример #1
0
    def expand_terms(self, terms, limit_per_term=10):
        """
        Given a list of weighted terms, add terms that are one step away in
        ConceptNet at a lower weight.

        This helps increase the recall power of the AssocSpace, because it
        means you can find terms that are too infrequent to have their own
        vector by looking up their neighbors. This forms a reasonable
        approximation of the vector an infrequent term would have anyway.
        """
        self.load()
        expanded = terms[:]
        for term, weight in terms:
            for edge in self.finder.lookup(term, limit=limit_per_term):
                if field_match(edge['start'], term):
                    neighbor = edge['end']
                elif field_match(edge['end'], term):
                    neighbor = edge['start']
                else:
                    continue
                neighbor_weight = weight * edge['weight'] * 0.1
                if edge['rel'].startswith('/r/Not'):
                    neighbor_weight *= -1
                expanded.append((neighbor, neighbor_weight))

        total_weight = sum(abs(weight) for (term, weight) in expanded)
        if total_weight == 0:
            return []
        return [(term, weight / total_weight) for (term, weight) in expanded]
Пример #2
0
    def expand_terms(self, terms, limit_per_term=10):
        """
        Given a list of weighted terms, add terms that are one step away in
        ConceptNet at a lower weight.

        This helps increase the recall power of the AssocSpace, because it
        means you can find terms that are too infrequent to have their own
        vector by looking up their neighbors. This forms a reasonable
        approximation of the vector an infrequent term would have anyway.
        """
        self.load()
        expanded = terms[:]
        for term, weight in terms:
            for edge in self.finder.lookup(term, limit=limit_per_term):
                if field_match(edge['start'], term):
                    neighbor = edge['end']
                elif field_match(edge['end'], term):
                    neighbor = edge['start']
                else:
                    continue
                neighbor_weight = weight * edge['weight'] * 0.1
                if edge['rel'].startswith('/r/Not'):
                    neighbor_weight *= -1
                expanded.append((neighbor, neighbor_weight))

        total_weight = sum(abs(weight) for (term, weight) in expanded)
        if total_weight == 0:
            return []
        return [(term, weight / total_weight) for (term, weight) in expanded]
Пример #3
0
def expand_terms(terms, limit_per_term=20):
	start = itemgetter('start')
	end = itemgetter('end')
	results = []
	uris = set()
	expanded = terms[:]
	for term in expanded:
		for edge in FINDER.lookup(term, limit=limit_per_term):

			if field_match(start(edge), term) and split_uri(end(edge))[1] == 'en':
				neighbor = edge['end']
			elif field_match(end(edge), term) and split_uri(start(edge))[1] == 'en':
				neighbor = edge['start']
			else:
				continue
			neighbor_weight = 1.0 * min(10, edge['weight'])
			if edge['rel'].startswith('/r/Not'):
				neighbor_weight *= -1
			for prefix in uri_prefixes(neighbor):
				uris.add(prefix)
			results.append((neighbor, neighbor_weight))
	total_weight = sum(abs(weight) for (term, weight) in results)
	if total_weight == 0:
		return []
	return [(term, weight, weight / total_weight) for (term, weight) in results]
Пример #4
0
def transform_directed_edge(edge, node):
    if field_match(edge['start']['@id'], node):
        edge['node'] = edge['start']
        edge['other'] = edge['end']
    elif field_match(edge['end']['@id'], node):
        edge['node'] = edge['end']
        edge['other'] = edge['start']
    else:
        raise ValueError("Neither the start nor end of this edge matches "
                         "the node %r: %r" % (node, edge))
    return edge
Пример #5
0
def transform_directed_edge(edge, node):
    if field_match(edge['start']['@id'], node):
        edge['node'] = edge['start']
        edge['other'] = edge['end']
    elif field_match(edge['end']['@id'], node):
        edge['node'] = edge['end']
        edge['other'] = edge['start']
    else:
        raise ValueError(
            "Neither the start nor end of this edge matches "
            "the node %r: %r" % (node, edge)
        )
    return edge
Пример #6
0
 def passes_filter(label, filter):
     if filter is None:
         return True
     else:
         return field_match(label, filter)
Пример #7
0
 def passes_filter(label, filter):
     if filter is None:
         return True
     else:
         return field_match(label, filter)