def import_infobox_properties(self): def parse_tuple(line): return tuple(map(long,line.split('\t',4)[:4])) def key(args): return args[:3] f = hdfs_open('swoct/dbpedia/infobox_properties_histogram') i =0 for k,g in groupby(imap(parse_tuple,f),key=key): print i i += 1 with transaction.commit_on_success(): prop = InfoboxProperty(md5 = k[0]) prop.save() yago = YagoClass(md5=k[1]) yago.save() h= InfoboxPropertyHistogram( infobox_property=prop, count =k[2], yago_class=yago) h.save() for sample in g: h.sample.add(Instance.objects.get(md5=sample[3]))
def import_infobox_properties_histogram(self): f = open(djity_cowst.__path__[0] + '/data/infobox_properties_histogram.tsv') for i,chunk in enumerate(chunkify(f,10000)): with transaction.commit_on_success(): for j,line in enumerate(chunk): props , yagos, count = line.split('\t',3)[:3] propl = long(props) yagol = long(yagos) count = int(count) prop = InfoboxProperty(md5 = propl) yago = YagoClass(md5=yagol) h= InfoboxPropertyHistogram( infobox_property=prop, count =count, yago_class=yago) h.save() del h del propl del yagol del props del yagos del prop del yago del count del line print "%d infobox properties relations imported"%((i+1)*10000)