FLAGS_USE_TYPE = True
cur_dir = os.path.dirname(os.getcwd())
dataset = "wiki2"
print('dataset:%s' % dataset)
folder = cur_dir + '/data/{}/intermediate/'.format(dataset)
start = time.time()
print('data folder: {}'.format(folder))
print('loading eid and name maps')
eid2ename, ename2eid = util.loadEidToEntityMap(folder + 'entity2id.txt')
print('loading eid and skipgram maps')
eid2patterns, pattern2eids = util.loadFeaturesAndEidMap(
    folder + 'reduced_eidSkipgramCounts.txt')
print('loading skipgram strength maps')
eidAndPattern2strength = util.loadWeightByEidAndFeatureMap(
    folder + 'setexpan_eidSkipgram2TFIDFStrength.txt', idx=-1)
print('loading eid and type maps')
eid2types, type2eids = util.loadFeaturesAndEidMap(folder + 'eidTypeCounts.txt')
print('loading type strength maps')
eidAndType2strength = util.loadWeightByEidAndFeatureMap(
    folder + 'eidType2TFIDFStrength.txt', idx=-1)
end = time.time()
print("Finish loading all dataset, using %s seconds" % (end - start))

good_gold_set = {}
for filename in os.listdir('../data/eval/cleaned_set/'):
    with open('../data/eval/cleaned_set/' + filename, 'r') as fin:
        setname = filename.split('.')[0]
        data = fin.readlines()
        ents = []
        for line in data:
예제 #2
0
FLAGS_USE_TYPE = True

## Loading Corpus
data = "bc5"
print('dataset:%s' % data)
folder = '../../data/' + data + '/'
start = time.time()
print('loading eid and name maps')
eid2ename, ename2eid = util.loadEidToEntityMap(folder +
                                               'entity2id.txt')  #entity2id.txt
print('loading eid and skipgram maps')
eid2patterns, pattern2eids = util.loadFeaturesAndEidMap(
    folder + 'eidSkipgramCounts.txt')  #eidSkipgramCount.txt
print('loading skipgram strength map')
eidAndPattern2strength = util.loadWeightByEidAndFeatureMap(
    folder + 'eidSkipgram2TFIDFStrength.txt',
    idx=-1)  #(eid, feature, weight) file
if (FLAGS_USE_TYPE):
    print('loading eid and type maps')
    eid2types, type2eids = util.loadFeaturesAndEidMap(
        folder + 'eidTypeCounts.txt')  #eidTypeCount.txt
    print('loading type strength map')
    eidAndType2strength = util.loadWeightByEidAndFeatureMap(
        folder + 'eidType2TFIDFStrength.txt',
        idx=-1)  #(eid, feature, weight) file
end = time.time()
print("Finish loading all dataset, using %s seconds" % (end - start))

## Start set expansion
enttypes = ['CHEMICAL', 'DISEASE']