예제 #1
0
def load_data(input_file):
    
    raw_data = open(input_file,'r')

    doc_list = []
    doc = Document()
    for rline in raw_data.readlines():
        if rline.strip():
            entry = rline.split()
            docID = entry[0]
            if docID != doc.docID:
                #import pdb
                #if doc.docID!='':
                #    pdb.set_trace()
                doc_list.append(doc)
                doc = Document(docID)
                first = Entity(entry[1],(entry[2],entry[3]),entry[4],entry[5])
                second = Entity(entry[6],(entry[7],entry[8]),entry[9],entry[10])
                pair = Pair(first,second)
                if len(entry) == 12:
                    pair.set_label(entry[11])
                doc.add_pair(pair)
            else:
                first = Entity(entry[1],(entry[2],entry[3]),entry[4],entry[5])
                second = Entity(entry[6],(entry[7],entry[8]),entry[9],entry[10])
                pair = Pair(first,second)
                if len(entry) == 12:
                    pair.set_label(entry[11])
                doc.add_pair(pair)
                
    doc_list.append(doc)
    return doc_list
예제 #2
0
def load_data(input_file):
    
    raw_data = open(input_file,'r')

    doc_list = []
    doc = Document()
    ne_dict = {}
    for rline in raw_data.readlines():
        if rline.strip():
            i = 0
            entry = rline.split()
            if len(entry) == 14:
                i = 1
            docID = entry[i]
            #new document
            if docID != doc.docID:
                #import pdb
                #if doc.docID!='':
                #    pdb.set_trace()
                
                #record the name entity dictionary we have created
                doc.set_ne_dict(ne_dict)
                doc_list.append(doc)
                ne_dict = {}
                doc = Document(docID)
                first = Entity(entry[i+1],(entry[i+2],entry[i+3]),entry[i+4],entry[i+5],entry[i+6])
                ne_dict[entry[i+5]] = (entry[i+1],entry[i+2]) 
                second = Entity(entry[i+7],(entry[i+8],entry[i+9]),entry[i+10],entry[i+11],entry[i+12])
                ne_dict[entry[i+11]] = (entry[i+7],entry[i+8])
                pair = Pair(first,second)
                if i:
                    pair.set_label(entry[0])
                doc.add_pair(pair)
            else:
                first = Entity(entry[i+1],(entry[i+2],entry[i+3]),entry[i+4],entry[i+5],entry[i+6])
                ne_dict[entry[i+5]] = (entry[i+1],entry[i+2]) 
                second = Entity(entry[i+7],(entry[i+8],entry[i+9]),entry[i+10],entry[i+11],entry[i+12])
                ne_dict[entry[i+11]] = (entry[i+7],entry[i+8])
                pair = Pair(first,second)
                if i:
                    pair.set_label(entry[0])
                doc.add_pair(pair)
    doc.set_ne_dict(ne_dict)
    doc_list.append(doc)
    return doc_list