Пример #1
0
    def getYear_DBIS_filetered(self,paperDict,PaperRawdict,filterDict):
        outputlist=[]
        outputlist_null=[]
        counter=0
        counter_total=0

        for key in paperDict:
            if key in filterDict: continue
            papername=PaperRawdict[key]
            if papername[0]=='"' and papername[-1]=='"':
                papername=papername[1:-1]
            papername= self.connect.escape(papername)

            self.cursor.execute(self.selectStr_easy % (papername))
            resultlist=self.cursor.fetchall()
            if len(resultlist)>0:
                print resultlist
                year=resultlist[0][1]
                outputlist.append([key,year])
                counter+=1
            else:
                outputlist_null.append([key,papername])
            counter_total+=1
            print str(counter_total)+'/'+str(len(paperDict)-len(filterDict))+' '+papername

        print counter
        util.write_csv_inlist('paperwithyear_dbis.csv',outputlist)
        util.write_csv_inlist('paperwithnoresults.csv',outputlist_null)
Пример #2
0
 def output(self):
     outputlist=[]
     for year in self.yearDict:
         confDict_oneyear=self.yearDict[year]
         for conf in confDict_oneyear:
             outputlist.append([year,conf,self.confDict[conf],confDict_oneyear[conf]])
     
     util.write_csv_inlist('statistic_conf_byyear.csv',outputlist)
Пример #3
0
    def output(self, keyList, labels, name):
        outputList = []
        length = len(keyList)
        for i in range(length):
            clu = labels[i]
            theKey = keyList[i]
            outputList.append([theKey, self.authorObjDict[theKey].name, clu])

        util.write_csv_inlist('./authorCluster_' + name + '.csv', outputList)
Пример #4
0
 def selectData_onekey(self,outputfile,key):
     selectStr="""SELECT * FROM paper WHERE conference like %s """
     key= self.connect.escape(key)
     
     self.cursor.execute(selectStr % (key))
     outputlist=[]
     for row in self.cursor.fetchall():
         outputlist.append([row[0].encode('utf-8'),row[1],row[2].encode('utf-8')])
     util.write_csv_inlist(outputfile,outputlist)
Пример #5
0
 def findDBIS_paper_withfilter(self,paper_confdict,paperdict,DBIS_cleaned,myfilter=None):
     DBIS_paperlist=list()
     counter=0
     for key in paperdict:
         confkey=paper_confdict[key]
         if confkey in DBIS_cleaned:
             counter+=1
             if key not in myfilter:
                 DBIS_paperlist.append([key, paperdict[key]])
     print counter
     util.write_csv_inlist('notmatched.csv',DBIS_paperlist)
Пример #6
0
    def countAuthors(self):
        focusedAuthors = dict()
        print len(self.paperDict_obj)
        for key in self.paperDict_obj:
            paper = self.paperDict_obj[key]
            authors = paper.authors
            for author in authors:
                if author not in focusedAuthors:
                    focusedAuthors[author] = 1
                else:
                    focusedAuthors[author] = focusedAuthors[author] + 1

        util.write_csv_inlist('authorCount.csv',
                              util.dict2list(focusedAuthors))
        return focusedAuthors
Пример #7
0
    def clustering(self, k):
        kmeans = KMeans(n_clusters=k, random_state=0).fit_predict(self.F)
        gobalIndex = 0
        outputList = []
        for key in self.nodeIndexDict:
            keyList = self.nodeIndexDict[key]
            length = len(keyList)
            for i in range(length):
                clu = kmeans[gobalIndex + i]
                theKey = keyList[i]
                outputList.append(
                    [theKey, self.authorObjDict[theKey].name, key, clu])
            gobalIndex += length

        util.write_csv_inlist('./temporal/authorCluster.csv', outputList)
Пример #8
0
    def filterFeatureName(self, folder):
        featureNameSet = set()
        for year in range(self.start, self.end + 1):
            fp = open(folder + str(year) + '-featureNames.csv', 'r')
            while 1:
                line = fp.readline()
                if not line:
                    break
                content = line.strip().split(',')
                for val in content:
                    featureNameSet.add(val)

        output = ''
        for key in featureNameSet:
            output += key + ','
        print len(featureNameSet)
        util.write_csv_inlist(folder + 'out-featureNames.csv', [output])
Пример #9
0
    def getYear_DBIS_notmatched(self,paperDict,notmatchDict):
        outputlist=[]
        counter=0
        for key in notmatchDict:
            papername=paperDict[key]
            if papername[0]=='"' and papername[-1]=='"':
                papername=papername[1:-1]
            papername= self.connect.escape(papername)

            # print papername
            self.cursor.execute(self.selectStr_easy % (papername))
            resultlist=self.cursor.fetchall()
            if len(resultlist)>0:
                print resultlist
                year=resultlist[0][1]
                outputlist.append([key,year])
                counter+=1

        print counter
        util.write_csv_inlist('paperwithyear_dbis.csv',outputlist)
Пример #10
0
 def matchAminerAndDBLP(self, focusedPaperDict, filterDict=None):
     haveCounter=0
     havelist=[]
     nolist=[]
     html_parser = HTMLParser.HTMLParser()
     paperTitleDict=self.reverseDict()
     for key in focusedPaperDict:
         originalTitle = html_parser.unescape(focusedPaperDict[key].title)
         originalTitle=util.simplifyStr(originalTitle)
         # print originalTitle
         if originalTitle in paperTitleDict:
             havelist.append([paperTitleDict[originalTitle],key,focusedPaperDict[key].title])
             haveCounter+=1
             print haveCounter
         else:
             nolist.append([key,focusedPaperDict[key].title])
     
     util.write_csv_inlist('havelist.csv',havelist,['aminerKey','dblpKey','title'])
     util.write_csv_inlist('nolist.csv',nolist,['aminerKey','title'])
     print haveCounter
     print len(focusedPaperDict)
Пример #11
0
    def generateEdge(self):
        edgeDict = collections.OrderedDict()
        edgeIndexDict = collections.OrderedDict()
        for yearkey in self.author_nodes:
            author_node = self.author_nodes[yearkey]
            key_list = []
            for key in author_node:
                key_list.append(key)
            mlength = len(key_list)
            ajmatrix = np.zeros((mlength, mlength))

            for i in range(mlength):
                ikey = key_list[i]
                iauthor = self.authorObjDict[ikey]
                for pkey in iauthor.papers:
                    thePaper = self.paperObjDict[pkey]
                    theYear = thePaper.year
                    if theYear > yearkey: continue
                    theAuthors = thePaper.authors
                    for akey in theAuthors:
                        if akey == ikey: continue
                        if akey in key_list:
                            akey_index = key_list.index(akey)
                            val = math.exp(-self.rou * (yearkey - theYear))
                            ajmatrix[i][
                                akey_index] = ajmatrix[i][akey_index] + val
                            # ajmatrix[akey_index][i]=ajmatrix[i][akey_index]
            # b = np.nonzero(ajmatrix)
            # print(np.array(b).ndim)
            np.savetxt('./proces/temporal/year_' + str(yearkey) + '.csv',
                       ajmatrix,
                       fmt='%d',
                       delimiter=',')
            util.write_csv_inlist(
                './proces/temporal/nodeslist_' + str(yearkey) + '.csv',
                key_list)
            edgeDict[yearkey] = ajmatrix
            edgeIndexDict[yearkey] = key_list
        return edgeDict, edgeIndexDict
Пример #12
0
 def toCSV(self,filename):
     util.write_csv_inlist(filename,self.outputList)
Пример #13
0
    def buildNetwork(self):
        edgeDict_new = collections.OrderedDict()
        edgeIndexDict_new = collections.OrderedDict()

        for key in self.nodeIndexDict:

            myGraph = snap.TNEANet.New()
            keyList = self.nodeIndexDict[key]

            length = len(keyList)
            for i in range(length):
                theKey = keyList[i]
                nid = myGraph.AddNode(i)
                myGraph.AddStrAttrDatN(nid, theKey, 'key')

            # there is only the first order!!!!!!!!!!!!!!!!!!
            A = self.edgeDict[key]
            # B=np.dot(A,A)
            # B=np.dot(A,np.dot(A,A))
            # B_sim=cosine_similarity(A)
            C = A
            outputList = []
            outputList_line = []
            outputList_dw = []
            for i in range(length):
                for j in range(i + 1, length):
                    if C[i, j] > 0:
                        eid = myGraph.AddEdge(i, j)
                        myGraph.AddFltAttrDatE(eid, A[i, j], 'weigth')
                        eid = myGraph.AddEdge(j, i)
                        myGraph.AddFltAttrDatE(eid, A[j, i], 'weigth')
                        outputList.append([keyList[i], keyList[j], A[i, j]])
                        outputList_line.append(
                            [keyList[j], keyList[i], A[j, i]])
                        outputList_line.append(
                            [keyList[i], keyList[j], A[i, j]])
                        outputList_dw.append([keyList[i], keyList[j]])
                        outputList_dw.append([keyList[j], keyList[i]])

            util.write_csv_inlist(str(key) + '.csv', outputList)
            util.write_csv_inlist(str(key) + '_line.txt', outputList_line)
            util.write_csv_inlist(str(key) + '_dw.txt', outputList_dw)

            print str(key) + '-original: ' + str(
                myGraph.GetEdges()) + ' ' + str(myGraph.GetNodes())
            MxWcc = snap.GetMxWcc(myGraph)
            print str(key) + '-mxWcc: ' + str(MxWcc.GetEdges()) + ' ' + str(
                MxWcc.GetNodes())

            # labels = snap.TIntStrH()
            # for NI in MxWcc.Nodes():
            #     labels[NI.GetId()] = str(NI.GetId())
            # snap.DrawGViz(MxWcc, snap.gvlSfdp, './graph/'+str(key)+".gif", " ", labels)

            keyList_new = []

            for node in MxWcc.Nodes():
                keyList_new.append(keyList[int(node.GetId())])

            ajmatrix = np.zeros((MxWcc.GetNodes(), MxWcc.GetNodes()))

            counter_out = 0
            for node_out in MxWcc.Nodes():
                counter_in = 0
                for node_in in MxWcc.Nodes():
                    ajmatrix[counter_out,
                             counter_in] = A[int(node_out.GetId()),
                                             int(node_in.GetId())]
                    counter_in += 1
                counter_out += 1

            edgeDict_new[key] = ajmatrix
            edgeIndexDict_new[key] = keyList_new
            self.graphList.append(MxWcc)

        return edgeDict_new, edgeIndexDict_new
Пример #14
0
    def getNodeAttributes(self):
        attributeslist = []
        outputList = []

        for UGraph in self.graphList:

            attriList = []
            for index in range(UGraph.GetNodes()):
                nodelist = []
                attriList.append(nodelist)

            #page rank
            PRankH = snap.TIntFltH()
            snap.GetPageRank(UGraph, PRankH)
            counter = 0
            for item in PRankH:
                attriList[counter].append(PRankH[item])
                counter += 1
            #HIN
            counter = 0
            NIdHubH = snap.TIntFltH()
            NIdAuthH = snap.TIntFltH()
            snap.GetHits(UGraph, NIdHubH, NIdAuthH)
            for item in NIdHubH:
                attriList[counter].append(NIdHubH[item])
                attriList[counter].append(NIdAuthH[item])
                counter += 1

            # Betweenness Centrality
            counter = 0
            Nodes = snap.TIntFltH()
            Edges = snap.TIntPrFltH()
            snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
            for node in Nodes:
                attriList[counter].append(Nodes[node])
                counter += 1

            # closeness centrality
            counter = 0
            for NI in UGraph.Nodes():
                CloseCentr = snap.GetClosenessCentr(UGraph, NI.GetId())
                attriList[counter].append(CloseCentr)
                counter += 1

            # farness centrality
            counter = 0
            for NI in UGraph.Nodes():
                FarCentr = snap.GetFarnessCentr(UGraph, NI.GetId())
                attriList[counter].append(FarCentr)
                counter += 1

            # node eccentricity
            counter = 0
            for NI in UGraph.Nodes():
                attriList[counter].append(
                    snap.GetNodeEcc(UGraph, NI.GetId(), True))
                counter += 1

            atrriMarix = np.array(attriList)
            attributeslist.append(atrriMarix)
            outputList.append(attriList)
            # convert to undirected graph
            # GOut = snap.ConvertGraph(snap.PUNGraph, UGraph)

            # for NI in UGraph.Nodes():
            #     DegCentr = snap.GetDegreeCentr(UGraph, NI.GetId())
            #     print "node: %d centrality: %f" % (NI.GetId(), DegCentr)
            util.write_csv_inlist('attributeslist.csv', outputList)
        return attributeslist