def get_phrases(doc): phrases_output=[] nouns=[] noun_coding=[] verbs=[] verb_coding=[] return_dict={} article_id=doc[0] date=date_formatter(doc[1]) #logger.info('Date: '+date) doc_id=doc[3] corenlpJsonData=json.loads(doc[2]) sentences=corenlpJsonData['sentences'] for sentence in sentences: sen_phrases_dict = {} sentenceJson= json.loads(json.dumps(sentence)) sentenceId=sentenceJson['sen_id'] sentenceTree=sentenceJson['tree'] sentenceData=sentenceJson['sentence'] parsed=utilities._format_parsed_str(sentenceTree) dict = {doc_id: {u'sents': {sentenceId: {u'content': sentenceData, u'parsed': parsed}}, u'meta': {u'date': date.encode()}}} try: return_dict = petrarch2.do_coding(dict) n = return_dict[doc_id]['meta']['verbs']['nouns'] nouns = [i[0] for i in n] noun_coding = [i[1] for i in n] verbs = return_dict[doc_id]['meta']['verbs']['eventtext'].values()[0] except: print "No eventtext" verbs = "" try: verb_coding = return_dict[doc_id]['meta']['verbs']['eventtext'].keys()[0][2] except KeyError as e: print e verb_coding = "" phrase_dict = {"nouns": nouns, "noun_coding": noun_coding, "verbs": verbs, "verb_coding": verb_coding} sen_phrases_dict={sentenceData: phrase_dict} phrases_output.append(sen_phrases_dict) #print phrases_output return (article_id, json.dumps(phrases_output),doc_id)
db_phrases = db.phrases #Sqlite connections conn = sqlite3.connect(input_db) c = conn.cursor() try: c.execute("SELECT id,date,output,mongo_id FROM json_test_table") except: py_logger.error("Input database error"); exit(1); rows= c.fetchall() for row in rows: phrases_output=[] date=date_formatter(row[1]) doc_id=row[3] corenlpJsonData=json.loads(row[2]) sentences=corenlpJsonData['sentences'] for sentence in sentences: sen_phrases_dict = {} sentenceJson= json.loads(json.dumps(sentence)) sentenceId=sentenceJson['sen_id'] sentenceTree=sentenceJson['tree'] sentenceData=sentenceJson['sentence'] parsed=utilities._format_parsed_str(sentenceTree) dict = {doc_id: {u'sents': {sentenceId: {u'content': sentenceData, u'parsed': parsed}}, u'meta': {u'date': date.encode()}}} try: return_dict = petrarch2.do_coding(dict, None) n = return_dict[doc_id]['meta']['verbs']['nouns'] nouns = [i[0] for i in n]