def load_data(input_file): raw_data = open(input_file,'r') doc_list = [] doc = Document() for rline in raw_data.readlines(): if rline.strip(): entry = rline.split() docID = entry[0] if docID != doc.docID: #import pdb #if doc.docID!='': # pdb.set_trace() doc_list.append(doc) doc = Document(docID) first = Entity(entry[1],(entry[2],entry[3]),entry[4],entry[5]) second = Entity(entry[6],(entry[7],entry[8]),entry[9],entry[10]) pair = Pair(first,second) if len(entry) == 12: pair.set_label(entry[11]) doc.add_pair(pair) else: first = Entity(entry[1],(entry[2],entry[3]),entry[4],entry[5]) second = Entity(entry[6],(entry[7],entry[8]),entry[9],entry[10]) pair = Pair(first,second) if len(entry) == 12: pair.set_label(entry[11]) doc.add_pair(pair) doc_list.append(doc) return doc_list
def load_data(input_file): raw_data = open(input_file,'r') doc_list = [] doc = Document() ne_dict = {} for rline in raw_data.readlines(): if rline.strip(): i = 0 entry = rline.split() if len(entry) == 14: i = 1 docID = entry[i] #new document if docID != doc.docID: #import pdb #if doc.docID!='': # pdb.set_trace() #record the name entity dictionary we have created doc.set_ne_dict(ne_dict) doc_list.append(doc) ne_dict = {} doc = Document(docID) first = Entity(entry[i+1],(entry[i+2],entry[i+3]),entry[i+4],entry[i+5],entry[i+6]) ne_dict[entry[i+5]] = (entry[i+1],entry[i+2]) second = Entity(entry[i+7],(entry[i+8],entry[i+9]),entry[i+10],entry[i+11],entry[i+12]) ne_dict[entry[i+11]] = (entry[i+7],entry[i+8]) pair = Pair(first,second) if i: pair.set_label(entry[0]) doc.add_pair(pair) else: first = Entity(entry[i+1],(entry[i+2],entry[i+3]),entry[i+4],entry[i+5],entry[i+6]) ne_dict[entry[i+5]] = (entry[i+1],entry[i+2]) second = Entity(entry[i+7],(entry[i+8],entry[i+9]),entry[i+10],entry[i+11],entry[i+12]) ne_dict[entry[i+11]] = (entry[i+7],entry[i+8]) pair = Pair(first,second) if i: pair.set_label(entry[0]) doc.add_pair(pair) doc.set_ne_dict(ne_dict) doc_list.append(doc) return doc_list