Exemplo n.º 1
0
    def import_infobox_properties(self):
        def parse_tuple(line):
            return tuple(map(long,line.split('\t',4)[:4]))
        
        def key(args):
            return args[:3]



        f = hdfs_open('swoct/dbpedia/infobox_properties_histogram')
        i =0
        for k,g in groupby(imap(parse_tuple,f),key=key):
            print i
            i += 1
            with transaction.commit_on_success():
                prop = InfoboxProperty(md5 = k[0])
                prop.save()
                yago = YagoClass(md5=k[1])
                yago.save()
                h= InfoboxPropertyHistogram( infobox_property=prop, count =k[2], yago_class=yago)
                h.save()
                for sample in g:
                    h.sample.add(Instance.objects.get(md5=sample[3]))
Exemplo n.º 2
0
    def import_infobox_properties_histogram(self):
        f = open(djity_cowst.__path__[0] + '/data/infobox_properties_histogram.tsv')
        for i,chunk in enumerate(chunkify(f,10000)):
            with transaction.commit_on_success():
                for j,line in enumerate(chunk):
                    props , yagos, count = line.split('\t',3)[:3]
                    propl = long(props)
                    yagol = long(yagos)
                    count = int(count)
                    prop = InfoboxProperty(md5 = propl)
                    yago = YagoClass(md5=yagol)
                    h= InfoboxPropertyHistogram( infobox_property=prop, count =count, yago_class=yago)
                    h.save()
                    del h
                    del propl
                    del yagol
                    del props
                    del yagos
                    del prop
                    del yago
                    del count
                    del line

            print "%d infobox properties relations imported"%((i+1)*10000)