# Make a big list of all regionnames regions = [] for r in region_dict: regions = regions + r["variants"] # For-loop for each row in the input query for l in range(0, len(lines)): try: line = lines[l] # Find phrases that are continuous words tagged with PERSON. sentence_id, words_str = line.strip().replace('"', '').strip('}').split('{') sentence_id = sentence_id.strip(",") words = words_str.split(",") words = [w.replace(")", "").replace("(", "") for w in words] phrases = find_phrases(words, regions) # Insert into mentions table for start_position, length, text in phrases: mention_id = '%s_%d' % (sentence_id, start_position) insert_statement = "%s,%s,%s,%s,%s\n" % ( sentence_id, start_position, length, " ".join(text), mention_id) filey.writelines(insert_statement) except: if not os.path.exists(error_file): efiley = open(error_file, "w") efiley.writelines("%s\n" % (line)) print "Error with line %s" % line filey.close() if os.path.exists(error_file):
# PARSE SENTENCES HERE. lines = [s.strip("\n") for s in sentences] # For-loop for each row in the input query insert_statements = [] for l in range(0, len(lines)): try: line = lines[l] # Find phrases that are continuous words tagged with PERSON. sentence_id, words_str = line.strip().replace('"', '').strip('}').split('{') sentence_id = sentence_id.strip(",") words = words_str.split(",") words = [w.replace(")", "").replace("(", "") for w in words] phrases = find_phrases(words, concept_names) # Insert into mentions table for start_position, length, text in phrases: mention_id = '%s_%d' % (sentence_id, start_position) insert_statement = "%s,%s,%s,%s,%s\n" % ( sentence_id, start_position, length, " ".join(text), mention_id) filey.writelines(insert_statement) except: if not os.path.exists(error_file): efiley = open(error_file, "w") efiley.writelines("%s\n" % (line)) print "Error with line %s" % line filey.close()
sentences_file.close() # PARSE SENTENCES HERE. lines = [s.strip("\n") for s in sentences] # For-loop for each row in the input query insert_statements = [] for l in range(0,len(lines)): try: line = lines[l] # Find phrases that are continuous words tagged with PERSON. sentence_id, words_str = line.strip().replace('"','').strip('}').split('{') sentence_id = sentence_id.strip(",") words = words_str.split(",") words = [w.replace(")","").replace("(","") for w in words] phrases = find_phrases(words,concept_names) # Insert into mentions table for start_position, length, text in phrases: mention_id = '%s_%d' % (sentence_id, start_position) insert_statement = "%s,%s,%s,%s,%s\n" %(sentence_id,start_position,length," ".join(text),mention_id) filey.writelines(insert_statement) except: if not os.path.exists(error_file): efiley = open(error_file,"w") efiley.writelines("%s\n" %(line)) print "Error with line %s" %line filey.close() if os.path.exists(error_file): efiley.close()
# Make a big list of all regionnames regions = [] for r in region_dict: regions = regions + r["variants"] # For-loop for each row in the input query for l in range(0, len(lines)): try: line = lines[l] # Find phrases that are continuous words tagged with PERSON. sentence_id, words_str = line.strip().replace('"', "").strip("}").split("{") sentence_id = sentence_id.strip(",") words = words_str.split(",") words = [w.replace(")", "").replace("(", "") for w in words] phrases = find_phrases(words, regions) # Insert into mentions table for start_position, length, text in phrases: mention_id = "%s_%d" % (sentence_id, start_position) insert_statement = "%s,%s,%s,%s,%s\n" % (sentence_id, start_position, length, " ".join(text), mention_id) filey.writelines(insert_statement) except: if not os.path.exists(error_file): efiley = open(error_file, "w") efiley.writelines("%s\n" % (line)) print "Error with line %s" % line filey.close() if os.path.exists(error_file): efiley.close()