def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range( sum([ 1 for _ in Neo4jDrive.findRelationshipsOfNode( nameOfFile, "Column") ])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): k = itemThread(item, columnNames, column, support, totalNumberOfValues) k.start() k.join()
def run(data,tables,size): support=[[]] columnNames=[] for i,nameOfFile in enumerate(tables): columnNames+=[CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1])] columnNames[i]=[c.strip() for c in columnNames[i]] for j,name in enumerate(columnNames[i]): z=Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name)[0] node=Neo4jDrive.findNodeByName(name) node.properties['type']='Column' node.push() z.properties['type']="Column" z.push() support[i]+=[CSVRead.getSupport(nameOfFile,j)] support+=[[]] support=support[:-1] totalNumberOfValues=CSVRead.getSize(nameOfFile,0) hyplock=Lock() stypelock=Lock() for itemPiece in data: indexOfFile=itemPiece[1] item=itemPiece[0] for column in range(len(columnNames[indexOfFile])): #support=CSVRead.getSupport(nameOfFile,column) #totalNumberOfValues=CSVRead.numberOfItems(support) k=ccThread(item[column],columnNames[indexOfFile],column,support[indexOfFile],size[indexOfFile]) k.start() k.join() for itemPiece in data: indexOfFile=itemPiece[1] item=itemPiece[0] for column in range(len(columnNames[indexOfFile])): #support=CSVRead.getSupport(nameOfFile,column) #totalNumberOfValues=CSVRead.numberOfItems(support) for perm_column in range(len(columnNames[indexOfFile])): if perm_column!=column: k=dmsThread(item[column],item[perm_column],size[indexOfFile],columnNames[indexOfFile],column,perm_column) k.start() k.join() allCC=set(Neo4jDrive.findAllCCNodes()) for s,c in enumerate(columnNames): for column in c: k=topDownThread(column,hyplock,stypelock,allCC,size[s]) k.start() k.join()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): k=itemThread(item,columnNames,column,support,totalNumberOfValues) k.start() k.join()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames=CSVRead.readCSV(nameOfFile,firstRow=True, choice=[0,1,2,3,4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile,"Column",name) #support=CSVRead.getSupport(nameOfFile,0) #totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile,"Column")])): support=CSVRead.getSupport(nameOfFile,column) totalNumberOfValues=CSVRead.numberOfItems(support) #print i.end_node #cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node=Neo4jDrive.findNodeByName(item) if node== None: Neo4jDrive.insertNodeAndRelationship(columnNames[column],'dataItems',item) node=Neo4jDrive.findNodeByName(item) node.properties['fvalue']=support[item] node.push() rlist=sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data=Neo4jDrive.insertNodeAndRelationship(item,"cc",r[0]) rel_data1=Neo4jDrive.insertNodeAndRelationship(r[0],"dd",r[2]) node=node=Neo4jDrive.findNodeByName(r[2]) if node.properties['incoming']==None: node.properties['incoming']=1 else: node.properties['incoming']+=1 node.properties['type']='type' node.push() except : print columnNames[column],'cc',r[0] rel_data=rel_data[0] rel_data.properties['rel_class'] = 'cc' rel_data.properties['support']=support[item]/(totalNumberOfValues*1.0) rel_data.push()
def main(): Neo4jDrive.insertNode(nameOfFile) columnNames = CSVRead.readCSV(nameOfFile, firstRow=True, choice=[0, 1, 2, 3, 4]) for name in columnNames: Neo4jDrive.insertNodeAndRelationship(nameOfFile, "Column", name) # support=CSVRead.getSupport(nameOfFile,0) # totalNumberOfValues=CSVRead.numberOfItems(support) for column in range(sum([1 for _ in Neo4jDrive.findRelationshipsOfNode(nameOfFile, "Column")])): support = CSVRead.getSupport(nameOfFile, column) totalNumberOfValues = CSVRead.numberOfItems(support) # print i.end_node # cNode=Neo4jDrive.findNodeByName(columnNames[column]) for item in support.keys(): node = Neo4jDrive.findNodeByName(item) if node == None: Neo4jDrive.insertNodeAndRelationship(columnNames[column], "dataItems", item) node = Neo4jDrive.findNodeByName(item) node.properties["fvalue"] = support[item] node.push() rlist = sparqlQuerypy.findBottomUp(item) for r in rlist: try: rel_data = Neo4jDrive.insertNodeAndRelationship(item, "cc", r[0]) rel_data1 = Neo4jDrive.insertNodeAndRelationship(r[0], "dd", r[2]) node = node = Neo4jDrive.findNodeByName(r[2]) if node.properties["incoming"] == None: node.properties["incoming"] = 1 else: node.properties["incoming"] += 1 node.properties["type"] = "type" node.push() except: print columnNames[column], "cc", r[0] rel_data = rel_data[0] rel_data.properties["rel_class"] = "cc" rel_data.properties["support"] = support[item] / (totalNumberOfValues * 1.0) rel_data.push()