rel += 1
            ap += rel / idx
    return ap / len(truth)


FLAGS_USE_TYPE = True
cur_dir = os.path.dirname(os.getcwd())
dataset = "wiki2"
print('dataset:%s' % dataset)
folder = cur_dir + '/data/{}/intermediate/'.format(dataset)
start = time.time()
print('data folder: {}'.format(folder))
print('loading eid and name maps')
eid2ename, ename2eid = util.loadEidToEntityMap(folder + 'entity2id.txt')
print('loading eid and skipgram maps')
eid2patterns, pattern2eids = util.loadFeaturesAndEidMap(
    folder + 'reduced_eidSkipgramCounts.txt')
print('loading skipgram strength maps')
eidAndPattern2strength = util.loadWeightByEidAndFeatureMap(
    folder + 'setexpan_eidSkipgram2TFIDFStrength.txt', idx=-1)
print('loading eid and type maps')
eid2types, type2eids = util.loadFeaturesAndEidMap(folder + 'eidTypeCounts.txt')
print('loading type strength maps')
eidAndType2strength = util.loadWeightByEidAndFeatureMap(
    folder + 'eidType2TFIDFStrength.txt', idx=-1)
end = time.time()
print("Finish loading all dataset, using %s seconds" % (end - start))

good_gold_set = {}
for filename in os.listdir('../data/eval/cleaned_set/'):
    with open('../data/eval/cleaned_set/' + filename, 'r') as fin:
        setname = filename.split('.')[0]
Exemplo n.º 2
0
import random

curr_dir = os.path.dirname(os.path.realpath(__file__))

## Setting global versions
FLAGS_USE_TYPE=False

## Loading Corpus
data = "ap89"
print('dataset:%s' % data)
folder = '/../../data/'+data+'/intermediate/'
start = time.time()
print('loading eid and name maps')
eid2ename, ename2eid = util.loadEidToEntityMap(curr_dir + folder+'entity2id.txt') #entity2id.txt
print('loading eid and skipgram maps')
eid2patterns, pattern2eids = util.loadFeaturesAndEidMap(curr_dir + folder+'eidSkipgramCounts.txt') #eidSkipgramCount.txt
print('loading skipgram strength map')
eidAndPattern2strength = util.loadWeightByEidAndFeatureMap(curr_dir + folder+'eidSkipgram2TFIDFStrength.txt', idx=-1) #(eid, feature, weight) file
if (FLAGS_USE_TYPE):
  print('loading eid and type maps')
  eid2types, type2eids = util.loadFeaturesAndEidMap(curr_dir + folder+'eidTypeCounts.txt') #eidTypeCount.txt
  print('loading type strength map')
  eidAndType2strength = util.loadWeightByEidAndFeatureMap(curr_dir + folder+'eidType2TFIDFStrength.txt', idx=-1) #(eid, feature, weight) file
end = time.time()
print("Finish loading all dataset, using %s seconds" % (end-start))

## Start set expansion

# read good_gold_set.json
fin = open(curr_dir + "/../../data/queries/good_gold_set.json")
gold_sets = json.load(fin)