Exemplo n.º 1
0
    def ccScores(self):
        data=self.data
        columnNames=self.columnNames
        totalSize=self.totalSize
        relationships=self.relationships
        size=len(data)
        bitmap={}
        for i,column in enumerate(columnNames):
            relationships[column]={}
            bitmap[column]={} #this is a dictionary which is a set of flags per data value remembering if the increment already happened.
            for element in data:
                item=element[i]
                rlist=sparqlQuerypy.findBottomUp(item.strip())
                print 'number of nodes for', item.strip(), " is ", len(rlist)
                bitmap[column][item]={}
                for r in rlist:
                    if r[0] not in bitmap[column][item].keys():
                        bitmap[column][item][r[0]]=0
                    if r[0] not in relationships[column].keys():
                        relationships[column][r[0]]={}
                    relationships[column][r[0]]['name']='cc'
                    if 'incoming' not in relationships[column][r[0]].keys():
                        relationships[column][r[0]]['incoming']=1
                        relationships[column][r[0]]['cc']=1.0/totalSize
                    else:
                        relationships[column][r[0]]['incoming']+=1 
                        relationships[column][r[0]]['cc']=relationships[column][r[0]]['incoming']*1.0/totalSize
                        bitmap[column][item][r[0]]=1
        classSet=set() # A set to save all the possible cc classes for ease of retrieval later and to streamline it.


        for column in columnNames: #Loop to push the relations and nodes to Neo4j
            for classes in relationships[column].keys():
                classSet.add(classes)
                rel_data=Neo4jDrive.insertNodeAndRelationship(column,'cc',classes)[0]
                rel_data.properties['rel_class']='cc'
                rel_data.properties['fk']=relationships[column][classes]['cc'] 
                rel_data.push()
         
        for classes in classSet: #Loop to update the CCS score for each class after the previous loop is over. CCS=sum(fk)/no(fk) for the node.
            print classes
            cummulative=0 # The accumulator
            linkNumbers=0 # The denominator
            for link in Neo4jDrive.findIncomingCCLinks(classes): #loop to find incoming cc edges.
                cummulative+=link[0].properties['fk']
                linkNumbers+=1
            node=Neo4jDrive.findNodeByName(classes)
            node.properties['ccs']=cummulative*1.0/linkNumbers
            node.properties['type']='cc'
            node.push()
Exemplo n.º 2
0
    def run(self):
        support=self.support
        totalNumberOfValues=self.totalNumberOfValues*1.0

        column=self.column
        columnNames=self.columnNames
        item=self.item
        rlist=sparqlQuerypy.findBottomUp(item.strip())

        print 'number of nodes for', item.strip(), " is ", len(rlist)
        log.write('number of nodes for'+str( item.strip())+ " is "+ str(len(rlist))+'\n')
        flag=0
        for r in rlist:
            rel_data=Neo4jDrive.insertNodeAndRelationship(columnNames[column],"cc",r[2])       
            rel_data=rel_data[0]
            node=Neo4jDrive.findNodeByName(r[2])
            if r[2]=='http://dbpedia.org/ontology/PopulatedPlace':
                print columnNames[column], 'Happening'
            
                print 'potato',rel_data
            if rel_data.properties['incoming']==None: #find out why this is not happenings
                rel_data.properties['incoming']=1
                rel_data.properties['ccs']=1/totalNumberOfValues
                rel_data.push()
                #print 'tomato',rel_data
            else:
                if flag==0:
                    rel_data.properties['incoming']+=1
                    rel_data.push()
                    rel_data.properties['ccs']=node.properties['incoming']/totalNumberOfValues
                    flag=1
            node.properties['type']='cc'
            node.properties['ccs']=0
            numberOfLinks=0
            for link in Neo4jDrive.findIncomingCCLinks(r[2]):
                node.properties['ccs']+=link[0].properties['ccs']
                numberOfLinks+=1
            if numberOfLinks>0: node.properties['ccs']/=numberOfLinks
            node.push()
            
            
            
            rel_data.properties['rel_class'] = 'cc'
            #rel_data.properties['ccs']=node.proper/(totalNumberOfValues*1.0)
            rel_data.push()