示例#1
0
# Make a big list of all regionnames
regions = []
for r in region_dict:
    regions = regions + r["variants"]

# For-loop for each row in the input query
for l in range(0, len(lines)):
    try:
        line = lines[l]
        # Find phrases that are continuous words tagged with PERSON.
        sentence_id, words_str = line.strip().replace('"',
                                                      '').strip('}').split('{')
        sentence_id = sentence_id.strip(",")
        words = words_str.split(",")
        words = [w.replace(")", "").replace("(", "") for w in words]
        phrases = find_phrases(words, regions)
        # Insert into mentions table
        for start_position, length, text in phrases:
            mention_id = '%s_%d' % (sentence_id, start_position)
            insert_statement = "%s,%s,%s,%s,%s\n" % (
                sentence_id, start_position, length, " ".join(text),
                mention_id)
            filey.writelines(insert_statement)
    except:
        if not os.path.exists(error_file):
            efiley = open(error_file, "w")
        efiley.writelines("%s\n" % (line))
        print "Error with line %s" % line

filey.close()
if os.path.exists(error_file):
示例#2
0
# PARSE SENTENCES HERE.
lines = [s.strip("\n") for s in sentences]

# For-loop for each row in the input query
insert_statements = []
for l in range(0, len(lines)):
    try:
        line = lines[l]
        # Find phrases that are continuous words tagged with PERSON.
        sentence_id, words_str = line.strip().replace('"',
                                                      '').strip('}').split('{')
        sentence_id = sentence_id.strip(",")
        words = words_str.split(",")
        words = [w.replace(")", "").replace("(", "") for w in words]
        phrases = find_phrases(words, concept_names)
        # Insert into mentions table
        for start_position, length, text in phrases:
            mention_id = '%s_%d' % (sentence_id, start_position)
            insert_statement = "%s,%s,%s,%s,%s\n" % (
                sentence_id, start_position, length, " ".join(text),
                mention_id)
            filey.writelines(insert_statement)
    except:
        if not os.path.exists(error_file):
            efiley = open(error_file, "w")
        efiley.writelines("%s\n" % (line))
        print "Error with line %s" % line

filey.close()
示例#3
0
sentences_file.close()

# PARSE SENTENCES HERE.
lines = [s.strip("\n") for s in sentences]

# For-loop for each row in the input query
insert_statements = []
for l in range(0,len(lines)):
    try:
        line = lines[l]
        # Find phrases that are continuous words tagged with PERSON.
        sentence_id, words_str = line.strip().replace('"','').strip('}').split('{')
        sentence_id = sentence_id.strip(",")
        words = words_str.split(",")
        words = [w.replace(")","").replace("(","") for w in words]
        phrases = find_phrases(words,concept_names)
        # Insert into mentions table
        for start_position, length, text in phrases:
            mention_id =  '%s_%d' % (sentence_id, start_position)
            insert_statement = "%s,%s,%s,%s,%s\n" %(sentence_id,start_position,length," ".join(text),mention_id)
            filey.writelines(insert_statement)
    except:
        if not os.path.exists(error_file):
            efiley = open(error_file,"w")
        efiley.writelines("%s\n" %(line))
        print "Error with line %s" %line
    
filey.close()

if os.path.exists(error_file):
    efiley.close()
示例#4
0
# Make a big list of all regionnames
regions = []
for r in region_dict:
    regions = regions + r["variants"]

# For-loop for each row in the input query
for l in range(0, len(lines)):
    try:
        line = lines[l]
        # Find phrases that are continuous words tagged with PERSON.
        sentence_id, words_str = line.strip().replace('"', "").strip("}").split("{")
        sentence_id = sentence_id.strip(",")
        words = words_str.split(",")
        words = [w.replace(")", "").replace("(", "") for w in words]
        phrases = find_phrases(words, regions)
        # Insert into mentions table
        for start_position, length, text in phrases:
            mention_id = "%s_%d" % (sentence_id, start_position)
            insert_statement = "%s,%s,%s,%s,%s\n" % (sentence_id, start_position, length, " ".join(text), mention_id)
            filey.writelines(insert_statement)
    except:
        if not os.path.exists(error_file):
            efiley = open(error_file, "w")
        efiley.writelines("%s\n" % (line))
        print "Error with line %s" % line

filey.close()
if os.path.exists(error_file):
    efiley.close()