示例#1
0
def get_phrases(doc):
    phrases_output=[]
    nouns=[]
    noun_coding=[]
    verbs=[]
    verb_coding=[]
    return_dict={}
    article_id=doc[0]
    date=date_formatter(doc[1])
    #logger.info('Date: '+date)
    doc_id=doc[3]
    corenlpJsonData=json.loads(doc[2])
    sentences=corenlpJsonData['sentences']
    for sentence in sentences:
        sen_phrases_dict = {}
        sentenceJson= json.loads(json.dumps(sentence))
        sentenceId=sentenceJson['sen_id']
        sentenceTree=sentenceJson['tree']
        sentenceData=sentenceJson['sentence']
        parsed=utilities._format_parsed_str(sentenceTree)
        dict = {doc_id: {u'sents': {sentenceId: {u'content': sentenceData, u'parsed': parsed}}, u'meta': {u'date': date.encode()}}}
        try:
            return_dict = petrarch2.do_coding(dict)
            n = return_dict[doc_id]['meta']['verbs']['nouns']
            nouns = [i[0] for i in n]
            noun_coding = [i[1] for i in n]
            verbs = return_dict[doc_id]['meta']['verbs']['eventtext'].values()[0]
        except:
            print "No eventtext"
            verbs = ""
        try:
            verb_coding = return_dict[doc_id]['meta']['verbs']['eventtext'].keys()[0][2]
        except KeyError as e:
            print e
            verb_coding = ""
        phrase_dict = {"nouns": nouns,
                       "noun_coding": noun_coding,
                       "verbs": verbs,
                       "verb_coding": verb_coding}
	sen_phrases_dict={sentenceData: phrase_dict}
        phrases_output.append(sen_phrases_dict)
        #print phrases_output
    return (article_id, json.dumps(phrases_output),doc_id)
示例#2
0
db_phrases = db.phrases

#Sqlite connections
conn = sqlite3.connect(input_db)
c = conn.cursor()
try:
    c.execute("SELECT id,date,output,mongo_id FROM json_test_table")
except:
    py_logger.error("Input database error");
    exit(1);

rows= c.fetchall()

for row in rows:
    phrases_output=[]
    date=date_formatter(row[1])
    doc_id=row[3]
    corenlpJsonData=json.loads(row[2])
    sentences=corenlpJsonData['sentences']
    for sentence in sentences:
        sen_phrases_dict = {}
        sentenceJson= json.loads(json.dumps(sentence))
        sentenceId=sentenceJson['sen_id']
        sentenceTree=sentenceJson['tree']
        sentenceData=sentenceJson['sentence']
        parsed=utilities._format_parsed_str(sentenceTree)
        dict = {doc_id: {u'sents': {sentenceId: {u'content': sentenceData, u'parsed': parsed}}, u'meta': {u'date': date.encode()}}}
        try:
            return_dict = petrarch2.do_coding(dict, None)
            n = return_dict[doc_id]['meta']['verbs']['nouns']
            nouns = [i[0] for i in n]