import re from conceptnet5.graph import JSONWriterGraph from conceptnet5.english_nlp import normalize from pymongo import Connection from types import * GRAPH = JSONWriterGraph('json_data/goalnet') goalnet = GRAPH.get_or_create_node(u'/source/rule/goalnet') GRAPH.justify(0, goalnet) omics = GRAPH.get_or_create_node(u'/source/activity/omics') GRAPH.justify(0, omics) def output_steps(goal, steps, source): # add raw assertions args = [] for step in steps: args.append(GRAPH.get_or_create_concept('en', step)) raw_sequence = GRAPH.get_or_create_assertion( '/relation/Sequence', args, {'dataset': 'goalnet/en', 'license': 'CC-By-SA'} ) args = [GRAPH.get_or_create_concept('en', goal)] args.append(raw_sequence) raw_assertion = GRAPH.get_or_create_assertion( '/relation/HasSteps', args, {'dataset': 'goalnet/en', 'license': 'CC-By-SA'} ) # add assertions args = [] goal = normalize(goal).strip().lower()
""" Parse the ReVerb dataset and put assertions to ConceptNet 5 """ from conceptnet5.graph import JSONWriterGraph from conceptnet5.english_nlp import normalize, normalize_topic, tokenize, untokenize from urlparse import urlparse import urllib import codecs import nltk import os import re GRAPH = JSONWriterGraph('json_data/reverb') reverb = GRAPH.get_or_create_node(u'/source/rule/reverb') GRAPH.justify('/', reverb, 0.5) reverb_object = GRAPH.get_or_create_node( u'/source/rule/extract_reverb_objects') #reverb_prep = GRAPH.get_or_create_node(u'/source/rule/extract_reverb_prepositions') reverb_triple = GRAPH.get_or_create_node( u'/source/rule/reverb_present_tense_triples') wikipedia = GRAPH.get_or_create_node(u'/source/web/en.wikipedia.org') GRAPH.justify('/', reverb_object, 0.2) GRAPH.justify('/', reverb_triple, 0.5) GRAPH.justify('/', wikipedia) TYPE_WORDS = ('type', 'kind', 'sort', 'variety', 'one') # Search for non-namespaced Wikipedia sources. WIKIPEDIA_SOURCE = re.compile( r'(http://en.wikipedia.org/wiki/([^:]|:_)+)(\||$)')