def build_relation(parts_dict):
    polarity = parts_dict["polarity"]
    relname = parts_dict["relname"]
    if relname == 'ConceptuallyRelatedTo':
        relname = 'RelatedTo'

    if polarity > 0:
        relation = normalize_uri('/r/' + relname)
    else:
        relation = normalize_uri('/r/Not' + relname)
    return relation
Пример #2
0
def build_relation(parts_dict):
    polarity = parts_dict["polarity"]
    relname = parts_dict["relname"]
    if relname == "ConceptuallyRelatedTo":
        relname = "RelatedTo"

    if polarity > 0:
        relation = normalize_uri("/r/" + relname)
    else:
        relation = normalize_uri("/r/Not" + relname)
    return relation
Пример #3
0
def build_relation(raw_assertion):
    polarity = raw_assertion.frame.frequency.value
    relname = raw_assertion.frame.relation.name
    if relname == 'ConceptuallyRelatedTo':
        relname = 'RelatedTo'

    if polarity > 0:
        relation = normalize_uri('/r/'+relname)
    else:
        relation = normalize_uri('/r/Not'+relname)
    return relation
def build_sources(raw_assertion):
    activity = raw_assertion.sentence.activity.name

    creator_node = normalize_uri(u'/s/contributor/omcs/'+raw_assertion.creator.username)
    activity_node = normalize_uri(u'/s/activity/omcs/'+activity)
    sources = [([creator_node, activity_node], 1)]

    for vote in raw_assertion.votes.all():
        sources.append(([normalize_uri('/s/contributor/omcs/'+vote.user.username),
                     normalize_uri(u'/s/activity/omcs/vote')], vote.vote))
    return sources
Пример #5
0
def build_relation(parts_dict):
    polarity = parts_dict["polarity"]
    relname = parts_dict["relname"]
    if relname == 'ConceptuallyRelatedTo':
        relname = 'RelatedTo'

    if polarity > 0:
        relation = normalize_uri('/r/'+relname)
    else:
        relation = normalize_uri('/r/Not'+relname)
    return relation
def build_relation(raw_assertion):
    polarity = raw_assertion.frame.frequency.value
    relname = raw_assertion.frame.relation.name
    if relname == 'ConceptuallyRelatedTo':
        relname = 'RelatedTo'

    if polarity > 0:
        relation = normalize_uri('/r/'+relname)
    else:
        relation = normalize_uri('/r/Not'+relname)
    return relation
Пример #7
0
def build_sources(parts_dict):
    activity = parts_dict["activity"]

    creator_node = normalize_uri(u'/s/contributor/omcs/'+parts_dict["creator"])
    activity_node = normalize_uri(u'/s/activity/omcs/'+activity)
    sources = [([creator_node, activity_node], 1)]

    for vote in parts_dict["votes"]:
        username = vote[0]
        vote_int = vote[1]
        sources.append(([normalize_uri('/s/contributor/omcs/'+username),
                     normalize_uri(u'/s/activity/omcs/vote')], vote_int))
    return sources
def build_sources(raw_assertion):
    activity = raw_assertion.sentence.activity.name

    creator_node = normalize_uri(u'/s/contributor/omcs/' +
                                 raw_assertion.creator.username)
    activity_node = normalize_uri(u'/s/activity/omcs/' + activity)
    sources = [([creator_node, activity_node], 1)]

    for vote in raw_assertion.votes.all():
        sources.append(([
            normalize_uri('/s/contributor/omcs/' + vote.user.username),
            normalize_uri(u'/s/activity/omcs/vote')
        ], vote.vote))
    return sources
Пример #9
0
def build_sources(parts_dict, preposition_fix=False):
    activity = parts_dict["activity"]

    creator_node = normalize_uri(u'/s/contributor/omcs/'+parts_dict["creator"])
    activity_node = normalize_uri(u'/s/activity/omcs/'+activity)
    if preposition_fix:
        sources = [([creator_node, activity_node, '/s/rule/preposition_fix'], 1)]
    else:
        sources = [([creator_node, activity_node], 1)]

    for vote in parts_dict["votes"]:
        username = vote[0]
        vote_int = vote[1]
        sources.append(([normalize_uri('/s/contributor/omcs/'+username),
                     normalize_uri(u'/s/activity/omcs/vote')], vote_int))
    return sources
Пример #10
0
def handle_triple(line):
    items = line.split()
    for i in xrange(3):
        if not (items[i].startswith('<') and items[i].endswith('>')):
            return
        items[i] = items[i][1:-1]
    subj, pred, obj = items[:3]
    if 'foaf/0.1/homepage' in pred or obj == 'work' or '_Feature' in obj or '#Thing' in obj or '__' in subj or '__' in obj or 'List_of' in subj or 'List_of' in obj: return
    subj_concept = make_concept_uri(translate_wp_url(subj), 'en')
    obj_concept = make_concept_uri(translate_wp_url(obj), 'en')
    webrel = map_web_relation(pred)
    if webrel is None:
        return
    rel = normalize_uri('/r/'+webrel)

    if (pred, rel) not in sw_map_used:
        sw_map_used.add((pred, rel))
        sw_map.write({'from': pred, 'to': rel})
    if (subj, subj_concept) not in sw_map_used:
        sw_map_used.add((subj, subj_concept))
        sw_map.write({'from': subj, 'to': subj_concept})
    if (obj, obj_concept) not in sw_map_used:
        sw_map_used.add((obj, obj_concept))
        sw_map.write({'from': obj, 'to': obj_concept})

    edge = make_edge(rel, subj_concept, obj_concept,
                     dataset='/d/dbpedia/en',
                     license='/l/CC/By-SA',
                     sources=['/s/dbpedia/3.7'],
                     context='/ctx/all',
                     weight=0.5)
    writer.write(edge)
Пример #11
0
def handle_triple(line):
    items = line.split()
    for i in xrange(3):
        if not (items[i].startswith('<') and items[i].endswith('>')):
            return
        items[i] = items[i][1:-1]
    subj, pred, obj = items[:3]
    if 'foaf/0.1/homepage' in pred or obj == 'work' or '_Feature' in obj or '#Thing' in obj or '__' in subj or '__' in obj or 'List_of' in subj or 'List_of' in obj:
        return
    subj_concept = make_concept_uri(translate_wp_url(subj), 'en')
    obj_concept = make_concept_uri(translate_wp_url(obj), 'en')
    webrel = map_web_relation(pred)
    if webrel is None:
        return
    rel = normalize_uri('/r/' + webrel)

    if (pred, rel) not in sw_map_used:
        sw_map_used.add((pred, rel))
        sw_map.write({'from': pred, 'to': rel})
    if (subj, subj_concept) not in sw_map_used:
        sw_map_used.add((subj, subj_concept))
        sw_map.write({'from': subj, 'to': subj_concept})
    if (obj, obj_concept) not in sw_map_used:
        sw_map_used.add((obj, obj_concept))
        sw_map.write({'from': obj, 'to': obj_concept})

    edge = make_edge(rel,
                     subj_concept,
                     obj_concept,
                     dataset='/d/dbpedia/en',
                     license='/l/CC/By-SA',
                     sources=['/s/dbpedia/3.7'],
                     context='/ctx/all',
                     weight=0.5)
    writer.write(edge)
Пример #12
0
    def _create_node_by_type(self, uri, properties = {}):
        """
        creates generic node object,
        parses uri, takes out args, identifies type of node and runs relevant
        method
        
        args:
        uri -- identifier of intended node, used in index
        properties -- (optional) properties for assertions (see assertions)

        """
        # Apply normalization to the URI here. All downstream functions can
        # assume it's normalized.

        uri = normalize_uri(uri)

        if uri.count('/') < 2:
            raise ValueError("""
            The URI %r is too short. You can't create the root or
            a type with this method.
            """ % uri)
        _, _type, rest = uri.split('/', 2)
        # Check if this is a web_concept
        if uri.find('http') == 0:
              return self._create_web_concept_node(
              '/web_concept/%s' % uri, uri, properties)
        method = getattr(self, '_create_%s_node' % _type)
        if method is None:
            raise ValueError("I don't know how to create type %r" % _type)
        return method(uri, rest, properties)
Пример #13
0
    def get_node(self, uri):
        """
        searches for node in main index,
        returns either single Node, None or Error (for multiple results)

        args:
        uri -- the uri of the node in question

        """
        uri = normalize_uri(uri)
        return self.db.nodes.find_one({'uri': uri})
Пример #14
0
    def get_prefix(self, uri_prefix):
        """
        returns a generator of nodes whose uri begins with uri_prefix

        args:
        uri_prefix -- the prefix which the uri of the nodes must have
        
        """
        uri_prefix = normalize_uri(uri_prefix)
        regex = '^' + uri_prefix
        return self.get_regex(regex)
Пример #15
0
    def get_node(self, uri):
        """
        searches for node in main index,
        returns either single Node, None or Error (for multiple results)

        args:
        uri -- the uri of the node in question

        """
        uri = normalize_uri(uri)
        return self.db.nodes.find_one({'uri': uri})
Пример #16
0
    def get_prefix(self, uri_prefix):
        """
        returns a generator of nodes whose uri begins with uri_prefix

        args:
        uri_prefix -- the prefix which the uri of the nodes must have
        
        """
        uri_prefix = normalize_uri(uri_prefix)
        regex = '^' + uri_prefix
        return self.get_regex(regex)
Пример #17
0
def build_sources(parts_dict, preposition_fix=False):
    activity = parts_dict["activity"]

    creator_node = normalize_uri(u'/s/contributor/omcs/' +
                                 parts_dict["creator"])
    activity_node = normalize_uri(u'/s/activity/omcs/' + activity)
    if preposition_fix:
        sources = [([creator_node, activity_node,
                     '/s/rule/preposition_fix'], 1)]
    else:
        sources = [([creator_node, activity_node], 1)]

    for vote in parts_dict["votes"]:
        username = vote[0]
        vote_int = vote[1]
        sources.append(([
            normalize_uri('/s/contributor/omcs/' + username),
            normalize_uri(u'/s/activity/omcs/vote')
        ], vote_int))
    return sources
Пример #18
0
    def get_or_create_source(self, source_list):
        """
        finds or creates source using a list of the source uri components.
        convenience function.

        args:
        source_list -- list of source components ex. for '/source/contributor/omcs/bedume'
        source_list would be ['contributor','omcs','bedume']
        """

        uri = normalize_uri("/source/" + "/".join(source_list))
        return self.get_node(uri) or self._create_node_by_type(uri, {})
Пример #19
0
def handle_raw_assertion(raw, writer):
    try:
        lang = raw.language_id
        assert lang == 'ja'
        if raw.frame.goodness < 1: return
        polarity = raw.frame.frequency.value
        activity = raw.sentence.activity.name
        if 'rubycommons' in activity: return

        # build the assertion
        frame_text = raw.frame.text
        frame_text = frame_text.replace('{1}', '[[%s]]' % raw.text1).replace(
            '{2}', '[[%s]]' % raw.text2)

        activity_node = normalize_uri(u'/s/site/nadya.jp')

        startText = ' '.join(JA.normalize_list(raw.text1))
        endText = ' '.join(JA.normalize_list(raw.text2))
        if startText != raw.text1:
            print raw.text1.encode('utf-8'), '=>', startText.encode('utf-8')
        normalize_uri('/text/' + lang + '/' + startText)
        end = normalize_uri('/text/' + lang + '/' + endText)

        relname = raw.frame.relation.name
        if relname == 'ConceptuallyRelatedTo':
            relname = 'RelatedTo'

        if polarity > 0:
            relation = normalize_uri('/r/' + relname)
        else:
            relation = normalize_uri('/r/Not' + relname)

        dataset = normalize_uri('/d/nadya.jp')
        score = raw.score

        sources = [([activity_node], score / 5.)]

        for source_list, weight in sources:
            if 'commons2_reject' in ' '.join(source_list):
                weight = -1
            start = make_concept_uri(startText, lang)
            end = make_concept_uri(endText, lang)
            edge = make_edge(relation,
                             start,
                             end,
                             dataset,
                             LICENSE,
                             source_list,
                             '/ctx/all',
                             frame_text,
                             weight=weight)
            writer.write(edge)
    except Exception:
        import traceback
        traceback.print_exc()
Пример #20
0
    def _any_to_uri(self, obj):
        """
        Converts any given input in the form of an id, uri or node into a uri string.

        args:
        obj -- the object to be converted

        """
        if isinstance(obj, basestring):
            return normalize_uri(obj)
        elif hasattr(obj, '__getitem__'):
            return obj['uri']
        elif obj == 0:
            # backwards compatibility
            return u'/'
        else:
            raise TypeError
Пример #21
0
def handle_raw_assertion(raw, writer):
    try:
        lang = raw.language_id
        assert lang == 'ja'
        if raw.frame.goodness < 1: return
        polarity = raw.frame.frequency.value
        activity = raw.sentence.activity.name
        if 'rubycommons' in activity: return

        # build the assertion
        frame_text = raw.frame.text
        frame_text = frame_text.replace('{1}', '[[%s]]' % raw.text1).replace('{2}', '[[%s]]' % raw.text2)

        activity_node = normalize_uri(u'/s/site/nadya.jp')
        
        startText = ' '.join(JA.normalize_list(raw.text1))
        endText = ' '.join(JA.normalize_list(raw.text2))
        if startText != raw.text1:
            print raw.text1.encode('utf-8'), '=>',  startText.encode('utf-8')
        normalize_uri('/text/'+lang+'/'+startText)
        end = normalize_uri('/text/'+lang+'/'+endText)

        relname = raw.frame.relation.name
        if relname == 'ConceptuallyRelatedTo':
            relname = 'RelatedTo'

        if polarity > 0:
            relation = normalize_uri('/r/'+relname)
        else:
            relation = normalize_uri('/r/Not'+relname)

        dataset = normalize_uri('/d/nadya.jp')
        score = raw.score

        sources = [([activity_node], score/5.)]

        for source_list, weight in sources:
            if 'commons2_reject' in ' '.join(source_list):
                weight = -1
            start = make_concept_uri(startText, lang)
            end = make_concept_uri(endText, lang)
            edge = make_edge(relation, start, end, dataset, LICENSE, source_list, '/ctx/all', frame_text, weight=weight)
            writer.write(edge)
    except Exception:
        import traceback
        traceback.print_exc()
Пример #22
0
    def get_node_w_score(self, uri):
        """
        functions in the same manner as get_node,
        also queries the justification database in order to find the
        score of the queried node.
        
        args:
        uri -- the uri of the node in question

        """
        uri = normalize_uri(uri)
        id_uri = uri[1:]
        return_dict = self.db.nodes.find_one({'uri':uri})
        score = self.db.justification.find_one({'_id':uri})
        if score == None:
            return_dict['score'] = None
        else:
            return_dict['score'] = score['value']
        return return_dict
Пример #23
0
    def get_regex(self, uri_regex):
        """
        returns a generator of nodes whose uri regular expression matches uri_regex

        args:
        uri_regex -- the regex which the uri of the nodes must match
        
        """
        uri_regex = normalize_uri(uri_regex)
        latest_result = ''
        while True:
            hasMore = False
            for node in self.db.nodes.find \
                ({ 'uri' : {'$regex' : uri_regex, '$gt' : latest_result}}) \
                .limit(100):
                yield node
                hasMore = True
                latest_result = node['uri']
            if not hasMore:
                break
Пример #24
0
    def get_regex(self, uri_regex):
        """
        returns a generator of nodes whose uri regular expression matches uri_regex

        args:
        uri_regex -- the regex which the uri of the nodes must match
        
        """
        uri_regex = normalize_uri(uri_regex)
        latest_result = ''
        while True:
            hasMore = False
            for node in self.db.nodes.find \
                ({ 'uri' : {'$regex' : uri_regex, '$gt' : latest_result}}) \
                .limit(100):
                yield node
                hasMore = True
                latest_result = node['uri']
            if not hasMore:
                break
Пример #25
0
def handle_raw_assertion(raw, writer):
    try:
        lang = raw.language_id
        if raw.frame.goodness < 1: return
        if lang.startswith('zh'): return
        polarity = raw.frame.frequency.value
        activity = raw.sentence.activity.name
        if 'rubycommons' in activity: return

        # build the assertion
        frame_text = raw.frame.text
        if polarity > 0:
            frame_text = frame_text.replace('{%}', '')
        else:
            frame_text = frame_text.replace('{%}', 'not')
        frame_text = frame_text.replace('{1}', '[[%s]]' % raw.text1).replace('{2}', '[[%s]]' % raw.text2)

        creator_node = normalize_uri(u'/s/contributor/omcs/'+raw.creator.username)
        activity_node = normalize_uri(u'/s/activity/omcs/'+activity)
        
        startText = raw.text1
        endText = raw.text2
        normalize_uri('/text/'+lang+'/'+raw.text1)
        end = normalize_uri('/text/'+lang+'/'+raw.text2)

        relname = raw.frame.relation.name
        if relname == 'ConceptuallyRelatedTo':
            relname = 'RelatedTo'

        if polarity > 0:
            relation = normalize_uri('/r/'+relname)
        else:
            relation = normalize_uri('/r/Not'+relname)

        dataset = normalize_uri('/d/conceptnet/4/'+lang)

        sources = [([creator_node, activity_node], 1)]

        for vote in raw.votes.all():
            sources.append(([normalize_uri('/s/contributor/omcs/'+vote.user.username),
                             normalize_uri(u'/s/activity/omcs/vote')], vote.vote))
        
        for source_list, weight in sources:
            bad = False
            if 'commons2_reject' in ' '.join(source_list):
                weight = -1
            start = make_concept_uri(startText, lang)
            end = make_concept_uri(endText, lang)
            if 'bedume' in ' '.join(source_list):
                for flagged in BEDUME_FLAGGED_CONCEPTS + BEDUME_FLAGGED_PLACES:
                    check = '/'+flagged.replace(' ', '_')
                    if start.endswith(check) or end.endswith(check):
                        bad = True
                        print "flagged:", str(raw)
                        break
            if not bad:
                edge = make_edge(relation, start, end, dataset, LICENSE, source_list, '/ctx/all', frame_text, weight=weight)
                writer.write(edge)
    except Exception:
        import traceback
        traceback.print_exc()
Пример #26
0
 def _any_to_uri(self, obj):
     if isinstance(obj, basestring):
         return normalize_uri(obj)
     else:
         raise TypeError
Пример #27
0
def build_sources(raw_assertion):
    score = raw_assertion.score
    activity_node = normalize_uri(u'/s/site/nadya.jp')
    sources = [([activity_node], score/5.)]
    return sources
Пример #28
0
def test_normalize_uri():
    assert normalize_uri(' one two') == u'one_two'
    assert normalize_uri(normalize_uri(' one two')) == u'one_two'
Пример #29
0
def test_normalize_uri():
    assert normalize_uri(' one two') == u'one_two'
    assert normalize_uri(normalize_uri(' one two')) == u'one_two'
def build_data_set(raw_assertion):
    lang = raw_assertion.language_id
    dataset = normalize_uri('/d/conceptnet/4/'+lang)
    return dataset
def build_data_set(raw_assertion):
    lang = raw_assertion.language_id
    dataset = normalize_uri('/d/conceptnet/4/' + lang)
    return dataset
Пример #32
0
def build_data_set(parts_dict):
    lang = parts_dict["lang"]
    dataset = normalize_uri("/d/conceptnet/4/" + lang)
    return dataset
Пример #33
0
 def _any_to_uri(self, obj):
     if isinstance(obj, basestring):
         return normalize_uri(obj)
     else:
         raise TypeError
Пример #34
0
def build_sources(parts_dict):
    score = parts_dict["score"]
    activity_node = normalize_uri(u"/s/site/nadya.jp")
    sources = [([activity_node], score / 5.0)]
    return sources
Пример #35
0
def build_data_set():
    return normalize_uri('/d/nadya.jp')
Пример #36
0
def build_sources(raw_assertion):
    score = raw_assertion.score
    activity_node = normalize_uri(u'/s/site/nadya.jp')
    sources = [([activity_node], score/5.)]
    return sources
def build_sources(parts_dict):
    score = parts_dict["score"]
    activity_node = normalize_uri(u'/s/site/nadya.jp')
    sources = [([activity_node], score / 5.)]
    return sources
Пример #38
0
def build_data_set():
    return normalize_uri("/d/nadya.jp")
Пример #39
0
def build_data_set(parts_dict):
    lang = parts_dict['lang']
    dataset = normalize_uri('/d/conceptnet/4/'+lang)
    return dataset