示例#1
0
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'WCNFI','WAAFI')
    pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        WCNFI = 0;
        WAAFI = 0;
        
        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            WCNFI = WCNFI + item['WCN'];
            WAAFI = WAAFI + item['WAA'];
        pdb.insert(str(pair[0]), str(pair[1]), WCNFI, WAAFI )   
    pdb.commit()
    return pdb;
 def __init__(self, preparedParameters, filePathResults, filePathAnalyseResult, topRank):
     print "Starting Analysing the results", datetime.today()
     
     absFilePath = filePathResults
     absfilePathAnalyseResult = filePathAnalyseResult #FormatingDataSets.get_abs_file_path(filePathAnalyseResult)
     fResult = open(absFilePath, 'r')
     with open(absfilePathAnalyseResult, 'w') as fnodes:
         self.success = 0
         element = 0
         for line in fResult:
             element = element+1
             FormatingDataSets.printProgressofEvents(element, topRank, "Analysing the results: ")
             cols = line.strip().replace('\n','').split('\t')
             if len(list(networkx.common_neighbors(preparedParameters.testGraph, cols[len(cols)-2] ,  cols[len(cols)-1] ))) != 0:
                 self.success = self.success + 1
                 fnodes.write(cols[len(cols)-2]  + '\t' + cols[len(cols)-1] + '\t' +  'SUCCESS \r\n')
             else:
                 fnodes.write(cols[len(cols)-2]  + '\t' + cols[len(cols)-1] + '\t' +  'FAILED \r\n')
             
             
             
             if element == topRank:
                 break 
         
         result =  float(self.success) / float(topRank) *100
         strResult = 'Final Result: \t' + str(result) + '%'
         fnodes.write(strResult)
         fnodes.write('\n#\t'+str(self.success))
         fnodes.close()
     print "Analysing the results finished", datetime.today()
def analise(calcDb, topRank,TestGraph, util, method):
    order = sorted( list({ 'node1': r['node1'], 'node2': r['node2'], 'value' : r[method]} for r in calcDb)  , key=lambda value: value['value'], reverse=True)
    BD = None
    if not os.path.exists(FormatingDataSets.get_abs_file_path(util.calculated_file + '.' + method +'.base.pdl')):
        BD = generate_finalResult(order, topRank, TestGraph, FormatingDataSets.get_abs_file_path(util.calculated_file + '.' + method +'.base.pdl'))
    
    else:
        BD = reading_Database(FormatingDataSets.get_abs_file_path(util.calculated_file + '.' + method +'.base.pdl'))
    
    return get_results(BD, method)
    def get_pair_nodes_not_linked(self, graph, file, min_papers):
        print "Starting getting pair of nodes that is not liked", datetime.today(
        )
        results = []
        nodesinGraph = set(n for n, d in graph.nodes(data=True)
                           if d['node_type'] == 'N')
        currentNodes = set()
        for n in nodesinGraph:

            papers = set(networkx.all_neighbors(graph, n))
            print papers
            if (len(papers) >= min_papers):
                currentNodes.add(n)

        print 'qty of authors: ', len(currentNodes)
        nodesOrdered = sorted(currentNodes)
        element = 0
        totalnodesOrdered = len(nodesOrdered)
        for node1 in nodesOrdered:
            element = element + 1
            FormatingDataSets.printProgressofEvents(
                element, totalnodesOrdered, "Checking Node not liked: ")

            others = set(n for n in nodesOrdered if n > node1)
            notLinked = set()
            for other_node in others:
                if len(set(networkx.common_neighbors(graph, node1,
                                                     other_node))) == 0:
                    #notLinked.add(other_node) # como estava antes
                    # esse if abaixo verifica se estao perto
                    if networkx.has_path(graph, node1, other_node):
                        tamanho_caminho = len(
                            networkx.shortest_path(graph, node1,
                                                   other_node)) - 2
                        #print "%s ate %s: %s" %(node1, other_node,tamanho_caminho)
                        #print repr(networkx.shortest_path(graph, node1, other_node));
                        if (tamanho_caminho >
                                0) and (tamanho_caminho <=
                                        self.MAX_NUMBER_OF_PEOPLE_BETWEEN * 2 +
                                        1):  # -2 porque inclui o inicio e fim
                            print "adicionando %s - %s" % (node1, other_node)
                            notLinked.add(other_node)
            if len(notLinked) > 0:
                results.append([node1, notLinked])
            if element % 2000 == 0:
                for item in results:
                    file.write(str(item[0]) + '\t' + repr(item[1]) + '\n')
                results = []

        for item in results:
            file.write(str(item[0]) + '\t' + repr(item[1]) + '\n')
        results = []

        print "getting pair of nodes that is not liked finished", datetime.today(
        )
class SFrame:
    
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999/AllExecutionScores/configToAG.txt')
    util = ParameterUtil(parameter_file = 'data/configuration/arxiv/exemplo_1994_1999/CombinationLinear/configToAG.txt')
    #util = ParameterUtil(parameter_file = 'data/configuration/arxiv/condmat_1994_1999/CombinationLinear/configToAG.txt')
    
    
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)
     
    metrics = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.calculated_file+'_normalizated.csv'))
    results = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.result_random_file))

    top = 20
    
    def __init__(self):
        pass
        

    @classmethod
    def evaluate(cls, individual):
        new_metric = float(0)
        ##print 'individuos: ', individual
        
        for index_score in  range(len(cls.myparams.ScoresChoiced)):
            #print cls.myparams.ScoresChoiced[index_score][0].getName()
            valorMetrica = cls.metrics[ cls.myparams.ScoresChoiced[index_score][0].getName() ]
            valorIndividual = individual[index_score]
            #print "valores ", valorMetrica, valorIndividual
            new_metric = new_metric + (valorMetrica * valorIndividual )
               
        ##print 'nova metrica',  new_metric
        copy_metrics = cls.metrics.copy()
        copy_metrics.add_column(new_metric, name='new_metric')
        copy_metrics = copy_metrics.topk('new_metric', k=cls.top)
        #print 'metrics after topk \n\n', copy_metrics
        copy_results = cls.results.copy()
        
        #print 'copy_results before join', copy_results
        copy_metrics = copy_metrics.join(copy_results)
        #print 'metrics after join \n\n', copy_metrics
        copy_metrics = copy_metrics.sort('new_metric', ascending=False)
        ##print 'copy metrics ', copy_metrics
        aux = [0]
        
        copy_metrics = copy_metrics.filter_by(aux,'result')
        zero = copy_metrics.num_rows()
        #print 'zero', zero
        del copy_metrics
        del copy_results
        result =  float(zero) / cls.top,
        #print 'resultado ', result
        return result
    def readingOrginalDataset(self):
        print "Starting Reading Original Dataset", datetime.today()
        with open(self.OriginalDataSet) as f:
            self.OrignalContent = f.readlines()
            f.close()

        articleid = 0
        articles = []
        authornames = []
        authorofArticles = []
        authors = []
        article = None
        element = 0
        for line in self.OrignalContent:
            element = element + 1
            FormatingDataSets.printProgressofEvents(
                element, len(self.OrignalContent), "Reading File Content to Generate Graph: "
            )
            line = line.strip()
            if line.startswith("#*"):
                articleid = articleid + 1
                article = Article("p_" + str(articleid))
                article.articlename = line.replace("#*", "").replace("\r\n", "")
            if line.startswith("#t"):
                article.time = line.replace("#t", "").replace("\r\n", "")

            if line.startswith("#@"):
                authorsofArticle = line.replace("#@", "").replace("\r\n", "").split(",")
                for author in authorsofArticle:
                    author = author.strip()
                    if not author in authornames:
                        authornames.append(author)
                    articleauthor = AuthorInArticle(article.articleid, authornames.index(author) + 1)
                    authorofArticles.append(articleauthor)
            if line.startswith("#!"):
                articles.append(article)
        for index in range(len(authornames)):
            author = Author(index + 1, authornames[index])
            authors.append(author)
        self.Graph = networkx.Graph()
        for item_article in articles:
            self.Graph.add_node(
                item_article.articleid,
                {"node_type": "E", "title": item_article.articlename.decode("latin_1"), "time": int(item_article.time)},
            )
        for item_author in authors:
            self.Graph.add_node(
                int(item_author.authorid), {"node_type": "N", "name": item_author.name.decode("latin_1")}
            )
        for item_edge in authorofArticles:
            self.Graph.add_edge(item_edge.articleid, int(item_edge.authorid))

        print "Reading Original Dataset finished", datetime.today()
 def readingOrginalDataset(self):
     print "Starting Reading Original Dataset", datetime.today()
     con = None
     try:
         con = psycopg2.connect(database='projetomestrado', user='******', password='******')
         
         curPublicacao = con.cursor()
         curPublicacao.execute("select distinct p.idpublicacao, p.titulo, p.ano from projetomestrado.publicacao p inner join projetomestrado.autorpublicacao a on a.idpublicacao = p.idpublicacao where a.idautor in (select idautor from projetomestrado.autor where afiliacao = 'Instituto Militar de Engenharia')")
         curPublicacaoData = curPublicacao.fetchall()
         element = 0
         qty = len(curPublicacaoData)
         print qty
         for linha in curPublicacaoData:
             element = element+1
             FormatingDataSets.printProgressofEvents(element, qty, "Adding paper to new graph: ")
         
             idpublicacao = linha[0]
             curPublicacaoPalavras = con.cursor()
             curPublicacaoPalavras.execute("select k.keyword from projetomestrado.keyword k inner join projetomestrado.publicacaokeyword pk on pk.idkeyword = k.idkeyword where pk.idpublicacao =" + str(idpublicacao))
             palavras = []
             for palavra in curPublicacaoPalavras.fetchall():
                 palavras.append(palavra[0].strip())
             curAutores = con.cursor()
             curAutores.execute("select a.idautor, a.primeironome, a.ultimonome from projetomestrado.autorpublicacao ap inner join projetomestrado.autor a on a.idautor = ap.idautor where ap.idpublicacao = "+ str(idpublicacao))
             autores = []
             for autor in curAutores.fetchall():
                 autores.append([autor[0], autor[1] + "," + autor[2]])
         
                 
             self.Publications.append([idpublicacao, linha[1], linha[2], palavras, autores ])
         
         self.Graph = networkx.Graph()
         
         for item_article in self.Publications:
             self.Graph.add_node('P_' + str(item_article[0]), {'node_type' : 'E', 'title' : item_article[1].decode("latin_1"), 'time' : int(item_article[2]), 'keywords': str(item_article[3]) })
             for item_autor in item_article[4]:
                 self.Graph.add_node(int(item_autor[0]), {'node_type' : 'N', 'name' : item_autor[1].decode("latin_1") })
                 self.Graph.add_edge('P_' + str(item_article[0]), int(item_autor[0]) )
         
         print "Reading Original Dataset finished", datetime.today()
         
         
 
         
         
         
         
     except psycopg2.DatabaseError, e:
         print 'Error %s' % e
def calculatingInputToFuzzy(graph, nodesnotLinked,  params):
    
    result = []
    #pdb = Base(calculatingFile)
    #pdb.create('node1', 'node2', 'IntensityNode1', 'IntencityNode2' ,'Similarity','AgesNode1', 'AgesNode2')
    #pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        IntensityNode1 = 0;
        IntensityNode2 = 0;
        Similarities = 0;
        Similarity = 0;
        AgesNode1 = 0;
        AgesNode2 = 0;
        
        for cn in CommonNeigbors:
            infoNode1 = list(edge for n1, n2, edge in graph.edges([ pair[0], cn], data=True) if ((n1 ==  pair[0] and n2 == cn) or (n1 == cn and n2 == pair[0])) )
            infoNode2 = list(edge for n1, n2, edge in graph.edges([pair[1], cn], data=True) if ((n1 ==  pair[1] and n2 == cn) or (n1 == cn and n2 == pair[1])) )

            IntensityNode1 = IntensityNode1 + len(infoNode1)
            IntensityNode2 = IntensityNode2 + len(infoNode2)
            
            MaxTimeNode1 =  max(info['time'] for info in infoNode1 if 1==1)
            MaxTimeNode2 =  max(info['time'] for info in infoNode2 if 1==1)

            AgesNode1 = max(AgesNode1,MaxTimeNode1)
            AgesNode2 = max(AgesNode2,MaxTimeNode1)
            
            bagofWordsNode1 =  list(info['keywords'] for info in infoNode1 if 1==1)
            bagofWordsNode2 =  list(info['keywords'] for info in infoNode2 if 1==1)
            
            
            
            Similarities = Similarities + get_jacard_domain(bagofWordsNode1, bagofWordsNode2)
        AgesNode1 = abs(params.t0_ - AgesNode1)    
        AgesNode2 = abs(params.t0_ - AgesNode2)
        if len(CommonNeigbors) > 0:
            Similarity = (Similarities / len(CommonNeigbors)) *100
            result.append({ 'no1':  str(pair[0]), 'no2' :str(pair[1]), 'intensityno1' : IntensityNode1,'intensityno2' : IntensityNode2, 'similarity' : Similarity, 'ageno1' :  AgesNode1, 'ageno2' :AgesNode2 })
    return result   
示例#9
0
 def getTopRank(relativeFilePathRandomAnalised):
     absFile = FormatingDataSets.get_abs_file_path(
         relativeFilePathRandomAnalised)
     f = open(absFile, 'r')
     for last in f:
         pass
     return int(last.split('\t')[1])
示例#10
0
def step05(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file=paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    analise = Analyse(
        myparams, FormatingDataSets.get_abs_file_path(util.calculated_file),
        FormatingDataSets.get_abs_file_path(util.analysed_file) +
        '.random.analised.txt', calc.qtyDataCalculated)
def execution(configFile):
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'T.EXPERIMENTO_ATUAL_CORE03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")

    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    nodesSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #GET THE AUTHORS THAT PUBLISH AT TRAINNING AND TEST 
    #A NUMBER OF PAPERS DEFINED AT MIN_EDGES IN CONFIG FILE
    nodes = nodesSelection.get_NowellAuthorsCore()
    #GET A PAIR OF AUTHORS THAT PUBLISH AT LEAST ONE ARTICLE AT TRAINNING AND TEST.
    #DID NOT SEE ANY NEED
    collaborations = nodesSelection.get_NowellColaboration()
    #GET THE FIRST EDGES MADE BY THE COMBINATION OF NODES IN TRAINNING GRAPH
    eOld = nodesSelection.get_NowellE(nodes,myparams.trainnigGraph)
    #GET THE FIRST EDGES MADE BY THE COMBINATION OF NODES IN TEST GRAPH THAT DO NOT HAVE EDGES IN TRAINNING
    eNew = nodesSelection.get_NowellE2(nodes, eOld, myparams.testGraph)
    #GET THE NODES NOT LINKED OVER THE COMBINATION NODES.
    nodesNotLinked = nodesSelection.get_PairsofNodesNotinEold(nodes)
    #CREATING CALCULATION OBJECT
    calc = CalculateInMemory(myparams,nodesNotLinked)
    #CALCULATING THE SCORES.
    resultsofCalculation = calc.executingCalculate()
    #ORDERNING THE RESULTS RETURNING THE TOP N 
    orderingResults = calc.ordering(len(eNew), resultsofCalculation)
    #SAVING THE ORDERED RESULTS.
    calc.saving_orderedResult(util.ordered_file, orderingResults)
    #ANALISE THE ORDERED RESULTS AND CHECK THE FUTURE.
    ScoresResults = Analyse.AnalyseNodesWithScoresInFuture(orderingResults, myparams.testGraph)
    #SAVING THE RESULTS.  
    for index in range(len(ScoresResults)):
        Analyse.saving_analyseResult(ScoresResults[index], util.analysed_file + str(myparams.ScoresChoiced[index][0] ) + '.txt')
        resultFile.write("TOTAL OF SUCESSS USING METRIC "  + str(myparams.ScoresChoiced[index][0])  + " = " +  str(Analyse.get_TotalSucess(ScoresResults[index]) ))
        resultFile.write("\n")
        resultFile.write("\n")
         
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(collaborations)*2)+ "\t\t" + str(len(nodes)) + "\t" + str(len(eOld))+"\t" + str(len(eNew)))
     
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
示例#12
0
def step05(paramFile):
    #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    util = ParameterUtil(parameter_file = paramFile)

    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    analise = Analyse(myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
def execution(configFile):
    
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")

    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.fuzzyinputy.txt')):
    data = calculatingInputToFuzzy(myparams.trainnigGraph,nodeSelection.nodesNotLinked,  myparams)
    saving_files_calculting_input(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.inputFuzzy.txt'), data)
    
    for item in data:
        calc = FuzzyCalculation(item['intensityno1'], item['intensityno2'], item['similarity'], item['ageno1'], item['ageno2'])
        print item['no1'], item['no2'], calc.potencial_ligacao, calc.grau_potencial_ligacao
        
        
       
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'cnWTS02', 'cnWTS05', 'cnWTS08', 'aaWTS02',
               'aaWTS05', 'aaWTS08')
    pdb.create_index('node1', 'node2')

    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element + 1
        FormatingDataSets.printProgressofEvents(
            element, qtyofNodesToProcess,
            "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        CNWts02Feature = 0
        CNWts05Feature = 0
        CNWts08Feature = 0
        AAWts02Feature = 0
        AAWts05Feature = 0
        AAWts08Feature = 0
        CNWJCFeature = 0
        AAWJCFeature = 0

        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            CNWts02Feature = CNWts02Feature + item['cnWts02']
            CNWts05Feature = CNWts05Feature + item['cnWts05']
            CNWts08Feature = CNWts08Feature + item['cnWts08']
            AAWts02Feature = AAWts02Feature + item['aaWts02']
            AAWts05Feature = AAWts05Feature + item['aaWts05']
            AAWts08Feature = AAWts08Feature + item['aaWts08']
            #CNWJCFeature = CNWJCFeature + item['cnWJC'];
            #AAWJCFeature = AAWJCFeature + item['aaWJC'];

        pdb.insert(str(pair[0]), str(pair[1]), CNWts02Feature, CNWts05Feature,
                   CNWts08Feature, AAWts02Feature, AAWts05Feature,
                   AAWts08Feature)
    pdb.commit()
    return pdb
示例#15
0
    def adding_normalize_values_tograph(self, graph, weighted_graph_file):
        arquivo = open(self.filepathResult, 'r')

        for line in arquivo:
            result = Calculate.reading_calculateLine(line)
            graph.add_edge(int(result[1]),
                           int(result[2]),
                           weight=str(result[0]))
        networkx.write_graphml(
            graph, FormatingDataSets.get_abs_file_path(weighted_graph_file))
示例#16
0
def execution(configFile, metricas):
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")
    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    
    #CREATING CALCULATION OBJECT
    weights = {'cn' : 1, 'aas': 1, 'pa':1, 'jc': 1, 'ts08':1,'ts05': 1, 'ts02':1}
    
    calc = CalculatingCombinationOnlyNowell(myparams, nodeSelection.nodesNotLinked,weights,False )

    saving_files_calculting(FormatingDataSets.get_abs_file_path(util.calculated_file), calc.results, metricas)
    
    Analise = nodeSelection.AnalyseAllNodesNotLinkedInFuture(nodeSelection.nodesNotLinked, myparams.testGraph)
    salvar_analise(FormatingDataSets.get_abs_file_path(util.analysed_file) + '.allNodes.csv', Analise)
    
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
示例#17
0
def execution(configFile):
    
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")

    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.fuzzyinputy.txt')):
    data = calculatingInputToFuzzy(myparams.trainnigGraph,nodeSelection.nodesNotLinked,  myparams)
    dataSorted = sorted(data, key=lambda value: value['result'], reverse=True)
    
    topRank = len(nodeSelection.eNeW)
    totalCalculated = len(dataSorted)
    dataToAnalysed = []
    if (topRank >= totalCalculated):
        for item in range(totalCalculated):
            dataToAnalysed.append({'no1':  dataSorted[item]['no1'], 'no2': dataSorted[item]['no2'], 'result':  dataSorted[item]['result'] })
    else:
        for item in range(topRank):
            dataToAnalysed.append({'no1':  dataSorted[item]['no1'], 'no2': dataSorted[item]['no2'], 'result':  dataSorted[item]['result'] })
            
    
    analise = AnalyseNodesInFuture(dataToAnalysed, myparams.testGraph)
    
    resultFile.write( repr(get_TotalSucess(analise)) )   
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
def execution(configFile):
    
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")

    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.fuzzyinputy.txt')):
    data = calculatingInputToFuzzy(myparams.trainnigGraph,nodeSelection.nodesNotLinked,  myparams)
    dataSorted = sorted(data, key=lambda value: value['result'], reverse=True)
    
    topRank = len(nodeSelection.eNeW)
    totalCalculated = len(dataSorted)
    dataToAnalysed = []
    if (topRank >= totalCalculated):
        for item in range(totalCalculated):
            dataToAnalysed.append({'no1':  dataSorted[item]['no1'], 'no2': dataSorted[item]['no2'], 'result':  dataSorted[item]['result'] })
    else:
        for item in range(topRank):
            dataToAnalysed.append({'no1':  dataSorted[item]['no1'], 'no2': dataSorted[item]['no2'], 'result':  dataSorted[item]['result'] })
            
    
    analise = AnalyseNodesInFuture(dataToAnalysed, myparams.testGraph)
    
    resultFile.write( repr(get_TotalSucess(analise)) )   
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'cnWTS02','cnWTS05','cnWTS08', 'aaWTS02', 'aaWTS05', 'aaWTS08')
    pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        CNWts02Feature = 0;
        CNWts05Feature = 0;
        CNWts08Feature = 0;
        AAWts02Feature = 0;
        AAWts05Feature = 0;
        AAWts08Feature = 0;
        CNWJCFeature = 0;
        AAWJCFeature = 0;
        
        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            CNWts02Feature = CNWts02Feature + item['cnWts02'];
            CNWts05Feature = CNWts05Feature + item['cnWts05'];
            CNWts08Feature = CNWts08Feature + item['cnWts08'];
            AAWts02Feature = AAWts02Feature + item['aaWts02'];
            AAWts05Feature = AAWts05Feature + item['aaWts05'];
            AAWts08Feature = AAWts08Feature + item['aaWts08'];
            #CNWJCFeature = CNWJCFeature + item['cnWJC'];
            #AAWJCFeature = AAWJCFeature + item['aaWJC'];
        
            
        pdb.insert(str(pair[0]), str(pair[1]), CNWts02Feature, CNWts05Feature, CNWts08Feature, AAWts02Feature, AAWts05Feature, AAWts08Feature  )   
    pdb.commit()
    return pdb;
示例#20
0
def analise(calcDb, topRank, TestGraph, util, method):
    order = sorted(list({
        'node1': r['node1'],
        'node2': r['node2'],
        'value': r[method]
    } for r in calcDb),
                   key=lambda value: value['value'],
                   reverse=True)
    BD = None
    if not os.path.exists(
            FormatingDataSets.get_abs_file_path(util.calculated_file + '.' +
                                                method + '.base.pdl')):
        BD = generate_finalResult(
            order, topRank, TestGraph,
            FormatingDataSets.get_abs_file_path(util.calculated_file + '.' +
                                                method + '.base.pdl'))

    else:
        BD = reading_Database(
            FormatingDataSets.get_abs_file_path(util.calculated_file + '.' +
                                                method + '.base.pdl'))

    return get_results(BD, method)
示例#21
0
    def __init__(self, preparedParameters, filePathResults,
                 filePathAnalyseResult, topRank):
        print "Starting Analysing the results", datetime.today()

        absFilePath = filePathResults
        absfilePathAnalyseResult = filePathAnalyseResult  #FormatingDataSets.get_abs_file_path(filePathAnalyseResult)
        fResult = open(absFilePath, 'r')
        with open(absfilePathAnalyseResult, 'w') as fnodes:
            self.success = 0
            element = 0
            for line in fResult:
                element = element + 1
                FormatingDataSets.printProgressofEvents(
                    element, topRank, "Analysing the results: ")
                cols = line.strip().replace('\n', '').split('\t')
                if len(
                        list(
                            networkx.common_neighbors(
                                preparedParameters.testGraph,
                                cols[len(cols) - 2],
                                cols[len(cols) - 1]))) != 0:
                    self.success = self.success + 1
                    fnodes.write(cols[len(cols) - 2] + '\t' +
                                 cols[len(cols) - 1] + '\t' + 'SUCCESS \r\n')
                else:
                    fnodes.write(cols[len(cols) - 2] + '\t' +
                                 cols[len(cols) - 1] + '\t' + 'FAILED \r\n')

                if element == topRank:
                    break

            result = float(self.success) / float(topRank) * 100
            strResult = 'Final Result: \t' + str(result) + '%'
            fnodes.write(strResult)
            fnodes.write('\n#\t' + str(self.success))
            fnodes.close()
        print "Analysing the results finished", datetime.today()
示例#22
0
 def saving_analyseResult(AnalysedNodesnotLinkedInFuture, filepath):
     f = codecs.open(FormatingDataSets.get_abs_file_path(filepath), 'w',encoding='utf-8')
     f.write('no1,no2,result\n')
     
     for item in AnalysedNodesnotLinkedInFuture:
         value = item[0] + ',' +item[1] + ','
         
         for item_index in range(len(item)-2):
             value = value  +  repr( item[item_index+2]  )
             if (item_index < (len(item) -2) ):
                 value = value + ','
         final = value + '\n'
         
         f.write( final.replace(',\n', '\n')  )
     f.close()   
示例#23
0
class SFrame:
    
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999/AllExecutionScores/configToAG.txt')
    
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)
     
    metrics = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.calculated_file+'_normalizated.csv'))
    results = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.result_random_file))

    top = 20
    
    def __init__(self):
        pass
        

    @classmethod
    def evaluate(cls, individual):
        new_metric = float(0)
        
        for index_score in  range(len(cls.myparams.ScoresChoiced)):
            new_metric = new_metric + (cls.metrics[ cls.myparams.ScoresChoiced[index_score][0].getName() ] * individual[index_score] )
               
        print new_metric
        copy_metrics = cls.metrics.copy()
        copy_metrics.add_column(new_metric, name='new_metric')
        copy_metrics = copy_metrics.topk('new_metric', k=cls.top)
        copy_results = cls.results.copy()
        copy_metrics = copy_metrics.join(copy_results)
        copy_metrics = copy_metrics.sort('new_metric', ascending=False)
        aux = [0]
        copy_metrics = copy_metrics.filter_by(aux,'result')
        zero = copy_metrics.num_rows()
        del copy_metrics
        del copy_results
        return float(zero) / cls.top,
 def get_pair_nodes_not_linked(self, graph, file, min_papers):
     print "Starting getting pair of nodes that is not liked", datetime.today()
     results = []
     nodesinGraph =set(n for n,d in graph.nodes(data=True) if d['node_type'] == 'N')
     currentNodes = set()
     for n in nodesinGraph:
         
         papers = set(networkx.all_neighbors(graph, n))
         print papers
         if (len(papers) >= min_papers):
             currentNodes.add(n)
     
     print 'qty of authors: ', len(currentNodes)
     nodesOrdered = sorted(currentNodes)
     element = 0
     totalnodesOrdered = len(nodesOrdered)
     for node1 in nodesOrdered:
         element = element+1
         FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ")
         
         others =  set(n for n in nodesOrdered if n > node1)
         notLinked = set()
         for other_node in others:
             if len(set(networkx.common_neighbors(graph, node1, other_node))) == 0:
                 notLinked.add(other_node)
         results.append([node1, notLinked])
         if element % 2000 == 0:
             for item in results:
                 file.write(str(item[0]) + '\t' +  repr(item[1]) + '\n')
             results = []
             
     for item in results:
         file.write(str(item[0]) + '\t' +  repr(item[1]) + '\n')
     results = []
         
     print "getting pair of nodes that is not liked finished", datetime.today()
示例#25
0
 def get_pair_nodes_not_linked(self):
     print "Starting getting pair of nodes that is not liked", datetime.today()
     results = []
     nodesinGraph =self.graph.nodes()
     nodesOrdered = sorted(nodesinGraph)
     totalnodesOrdered = len(nodesOrdered)
     element = 0
     
     for node in nodesOrdered:
         element = element+1
         FormatingDataSets.printProgressofEvents(element, totalnodesOrdered, "Checking Node not liked: ")
         publicacoes = self.graph.edges(node,data=False)
         qtdepublicacoes = len(publicacoes)
         #print "O autor e seus papers ",node,qtdepublicacoes ,publicacoes 
         if (qtdepublicacoes >= self.min_papers):
             others =  set(n for n in nodesOrdered if n > node)
             for otherNode in others:
                 other_publicacoes = self.graph.edges(otherNode,data=False)
                 other_qtdepublicacoes = len(other_publicacoes)
                 if (other_qtdepublicacoes >= self.min_papers):
                     if (not self.graph.has_edge(node, otherNode)):
                         if self.USE_MAX_NUMBER_OF_PEOPLE_BETWEEN == True:
                             if networkx.has_path(self.graph, node, otherNode):
                                 shortestPathResult = networkx.shortest_path(self.graph, node, otherNode)
                                 #print shortestPathResult
                                 tamanho_caminho = len(shortestPathResult) - 1
                                 #print "%s ate %s: %s" %(node1, other_node,tamanho_caminho)
                                 #print repr(networkx.shortest_path(graph, node1, other_node));
                                 if ( tamanho_caminho > 0 ) and (tamanho_caminho <= self.MAX_NUMBER_OF_PEOPLE_BETWEEN ): # -2 porque inclui o inicio e fim
                                     #print "adicionando %s - %s" %(node, otherNode)
                                     results.append([node, otherNode])
                         else:
                             results.append([node, otherNode])
             
     print "getting pair of nodes that is not liked finished", datetime.today()
     return results
示例#26
0
 def reading_analyseResult( filepath):
     result = []
     firstLine = 0
     f = open(FormatingDataSets.get_abs_file_path(filepath), 'r')
     for line in f:
         if firstLine == 0:
             firstLine = 1
             continue
         cols = line.strip().replace('\n','').split(',')
         item_result = []
         for col in cols:
             try:
                 item_result.append(eval(col))
             except Exception:
                 item_result.append(str(col))
         result.append(item_result)
     return result
def execution(configFile):
   
    
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03_onlyinteraction.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")

    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    db = None
    if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl')):
        db = generateWeights(myparams.trainnigGraph, FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl') , myparams)
    else:
        db = reading_Database(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl'))
    calcDb = None
    if not os.path.exists(FormatingDataSets.get_abs_file_path(util.calculated_file + '.base.pdl')):
        calcDb = calculatingWeights(myparams.trainnigGraph, nodeSelection.nodesNotLinked, db, FormatingDataSets.get_abs_file_path(util.calculated_file) + '.base.pdl')
    else:
        calcDb = reading_Database(FormatingDataSets.get_abs_file_path(util.calculated_file + '.base.pdl'))
        
    ordering = get_ordering(calcDb, len(nodeSelection.eNeW))
    
    result = get_analyseNodesInFuture(ordering, myparams.testGraph)
    
    resultFile.write(repr(result))
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
def execution(configFile):
   
    
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'wTScore03_010304.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")

    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    db = None
    if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl')):
        db = generateWeights(myparams.trainnigGraph, FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl') , myparams)
    else:
        db = reading_Database(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl'))
    calcDb = None
    if not os.path.exists(FormatingDataSets.get_abs_file_path(util.calculated_file + '.base.pdl')):
        calcDb = calculatingWeights(myparams.trainnigGraph, nodeSelection.nodesNotLinked, db, FormatingDataSets.get_abs_file_path(util.calculated_file) + '.base.pdl')
    else:
        calcDb = reading_Database(FormatingDataSets.get_abs_file_path(util.calculated_file + '.base.pdl'))
        
    ordering = get_ordering(calcDb, len(nodeSelection.eNeW))
    
    result = get_analyseNodesInFuture(ordering, myparams.testGraph)
    
    resultFile.write(repr(result))
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
def execution(configFile):
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03_execucaoFinal_cstT02.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")
    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #CREATING CALCULATION OBJECT
    calc = CalculatingTogether(myparams, nodeSelection.nodesNotLinked)
    
    ordering = calc.ordering(len(nodeSelection.eNeW))
    
    #calc.saving_orderedResult(util.ordered_file, ordering)
    
    calc.AnalyseNodesInFuture(ordering, myparams.testGraph)
    
    resultFile.write(repr(calc.get_TotalSucess()))
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
def execution(configFile):
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    resultFile.write("\n")
    
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE.
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)

    #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_
    myparams.generating_Training_Graph()
      
    #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_
    myparams.generating_Test_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #CREATING CALCULATION OBJECT
    calc = CalculatingTogether(myparams, nodeSelection.nodesNotLinked)
    
    ordering = calc.ordering(len(nodeSelection.eNeW))
    
    #calc.saving_orderedResult(util.ordered_file, ordering)
    
    calc.AnalyseNodesInFuture(ordering, myparams.testGraph)
    
    resultFile.write(repr(calc.get_TotalSucess()))
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.datetime.now()))
    
    resultFile.close()
示例#31
0
def get_analyseNodesInFuture(calcDb, topRank, TestGraph, util):

    result = []
    WCNFI_ORDERED = sorted(list({
        'node1': r['node1'],
        'node2': r['node2'],
        'value': r['WCNFI']
    } for r in calcDb),
                           key=lambda value: value['value'],
                           reverse=True)
    WCNFIBD = None
    if not os.path.exists(
            FormatingDataSets.get_abs_file_path(util.calculated_file +
                                                '.WCNFI.base.pdl')):
        WCNFIBD = generate_finalResult(
            WCNFI_ORDERED, topRank, TestGraph,
            FormatingDataSets.get_abs_file_path(util.calculated_file +
                                                '.WCNFI.base.pdl'))

    else:
        WCNFIBD = reading_Database(
            FormatingDataSets.get_abs_file_path(util.calculated_file +
                                                '.WCNFI.base.pdl'))

    result.append(get_results(WCNFIBD, 'WCNFI'))

    WAAFI_ORDERED = sorted(list({
        'node1': r['node1'],
        'node2': r['node2'],
        'value': r['WAAFI']
    } for r in calcDb),
                           key=lambda value: value['value'],
                           reverse=True)
    WAAFIBD = None
    if not os.path.exists(
            FormatingDataSets.get_abs_file_path(util.calculated_file +
                                                '.WAAFI.base.pdl')):
        WAAFIBD = generate_finalResult(
            WAAFI_ORDERED, topRank, TestGraph,
            FormatingDataSets.get_abs_file_path(util.calculated_file +
                                                '.WAAFI.base.pdl'))

    else:
        WAAFIBD = reading_Database(
            FormatingDataSets.get_abs_file_path(util.calculated_file +
                                                '.WAAFI.base.pdl'))

    result.append(get_results(WAAFIBD, 'WAAFI'))
    return result
示例#32
0
def execution(configFile, weights):
    #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA
    resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w')
    
    resultFile.write("Inicio da operacao\n")
    resultFile.write(str(datetime.now()))
    resultFile.write("\n")
    #READING THE CONFIG FILE
    util = ParameterUtil(parameter_file = configFile)
    
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination,
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file)
    
    myparams.generating_Test_Graph()
    myparams.generating_Training_Graph()
    
    nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util)
    #CREATING CALCULATION OBJECT
    calc = CalculatingCombinationOnlyNowell(myparams, nodeSelection.nodesNotLinked, weights, True)
        
    ordering = calc.ordering(len(nodeSelection.eNeW))
    
    calc.AnalyseNodesInFuture(ordering, myparams.testGraph)
    
    resultFile.write(repr(calc.get_TotalSucess()))
    
    resultFile.write("\n")
#        
    resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n")
    resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW)))
     
 
    resultFile.write("\n")

    resultFile.write("Fim da Operacao\n")
    resultFile.write(str(datetime.now()))
    
    resultFile.close()
示例#33
0
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'WCNFTI01', 'WCNFTI02', 'WCNFTI03',
               'WCNFTI04', 'WCNFTI05', 'WCNFTI06', 'WCNFTI07', 'WCNFTI08',
               'WCNFTI09', 'WAAFTI01', 'WAAFTI02', 'WAAFTI03', 'WAAFTI04',
               'WAAFTI05', 'WAAFTI06', 'WAAFTI07', 'WAAFTI08', 'WAAFTI09')
    pdb.create_index('node1', 'node2')

    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element + 1
        FormatingDataSets.printProgressofEvents(
            element, qtyofNodesToProcess,
            "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        WCNFTI01 = 0
        WCNFTI02 = 0
        WCNFTI03 = 0
        WCNFTI04 = 0
        WCNFTI05 = 0
        WCNFTI06 = 0
        WCNFTI07 = 0
        WCNFTI08 = 0
        WCNFTI09 = 0

        WAAFTI01 = 0
        WAAFTI02 = 0
        WAAFTI03 = 0
        WAAFTI04 = 0
        WAAFTI05 = 0
        WAAFTI06 = 0
        WAAFTI07 = 0
        WAAFTI08 = 0
        WAAFTI09 = 0

        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            WCNFTI01 = WCNFTI01 + item['WCN'][0]
            WCNFTI02 = WCNFTI02 + item['WCN'][1]
            WCNFTI03 = WCNFTI03 + item['WCN'][2]
            WCNFTI04 = WCNFTI04 + item['WCN'][3]
            WCNFTI05 = WCNFTI05 + item['WCN'][4]
            WCNFTI06 = WCNFTI06 + item['WCN'][5]
            WCNFTI07 = WCNFTI07 + item['WCN'][6]
            WCNFTI08 = WCNFTI08 + item['WCN'][7]
            WCNFTI09 = WCNFTI09 + item['WCN'][8]

            WAAFTI01 = WAAFTI01 + item['WAA'][0]
            WAAFTI02 = WAAFTI02 + item['WAA'][1]
            WAAFTI03 = WAAFTI03 + item['WAA'][2]
            WAAFTI04 = WAAFTI04 + item['WAA'][3]
            WAAFTI05 = WAAFTI05 + item['WAA'][4]
            WAAFTI06 = WAAFTI06 + item['WAA'][5]
            WAAFTI07 = WAAFTI07 + item['WAA'][6]
            WAAFTI08 = WAAFTI08 + item['WAA'][7]
            WAAFTI09 = WAAFTI09 + item['WAA'][8]

        pdb.insert(
            str(pair[0]),
            str(pair[1]),
            WCNFTI01,
            WCNFTI02,
            WCNFTI02,
            WCNFTI03,
            WCNFTI04,
            WCNFTI05,
            WCNFTI06,
            WCNFTI07,
            WCNFTI08,
            WCNFTI09,
            WAAFTI01,
            WAAFTI02,
            WAAFTI02,
            WAAFTI03,
            WAAFTI04,
            WAAFTI05,
            WAAFTI06,
            WAAFTI07,
            WAAFTI08,
            WAAFTI09,
        )
    pdb.commit()
    return pdb
示例#34
0
    def __init__(self, preparedParameter, filepathNodesNotLinked,
                 filepathResult, filePathOrdered, filepathMaxMinCalculated):
        print "Starting Calculating Nodes not linked", datetime.today()

        self.preparedParameter = preparedParameter
        self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
        self.filepathMaxMinCalculated = Formating.get_abs_file_path(
            filepathMaxMinCalculated)
        self.filepathResult = Formating.get_abs_file_path(filepathResult)
        self.filepathNodesNotLinked = Formating.get_abs_file_path(
            filepathNodesNotLinked)
        #for each links that is not linked all the calculates is done.
        element = 0
        qtyofResults = FormatingDataSets.getTotalLineNumbers(
            self.filepathNodesNotLinked)
        fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
        if os.path.exists(self.filepathResult):
            print "Calculate already done for this file, please delete if you want a new one.", datetime.today(
            )
            self.reading_Max_min_file()
            return

        fcontentCalcResult = open(self.filepathResult, 'w')

        self.minValueCalculated = list(
            99999 for x in self.preparedParameter.featuresChoice)
        self.maxValueCalculated = list(
            0 for x in self.preparedParameter.featuresChoice)

        qtyFeatures = len(self.preparedParameter.featuresChoice)
        qtyNodesCalculated = 0
        partialResults = []
        for lineofFile in fcontentNodesNotLinked:
            element = element + 1
            item = VariableSelection.getItemFromLine(lineofFile)
            qtyothernodenotlinked = len(item[1])
            newelement = 0
            for neighbor_node in item[1]:
                newelement = newelement + 1
                qtyNodesCalculated = qtyNodesCalculated + 1
                self.printProgressofEvents(
                    element, qtyofResults,
                    "Calculating features for nodes not liked: ")
                self.printProgressofEventsWihoutPercent(
                    newelement, qtyothernodenotlinked, "Calculating nodes: " +
                    str(item[0]) + ":" + str(neighbor_node))

                item_result = []
                #executing the calculation for each features chosen at parameter
                for index_features in range(qtyFeatures):
                    self.preparedParameter.featuresChoice[index_features][
                        0].parameter = preparedParameter
                    valueCalculated = self.preparedParameter.featuresChoice[
                        index_features][0].execute(
                            item[0], neighbor_node
                        ) * self.preparedParameter.featuresChoice[
                            index_features][1]
                    if valueCalculated < self.minValueCalculated[
                            index_features]:
                        self.minValueCalculated[
                            index_features] = valueCalculated
                    if valueCalculated > self.maxValueCalculated[
                            index_features]:
                        self.maxValueCalculated[
                            index_features] = valueCalculated

                    item_result.append(valueCalculated)

                lineContent = []
                #generating a vetor with the name of the feature and the result of the calculate
                for indice in range(qtyFeatures):
                    lineContent.append(
                        str({
                            str(self.preparedParameter.featuresChoice[indice]):
                            item_result[indice]
                        }))
                partialResults.append([lineContent, item[0], neighbor_node])

            if element % 10 == 0:
                for item in partialResults:
                    for calc in item[0]:
                        fcontentCalcResult.write(calc + '\t')
                    fcontentCalcResult.write(
                        str(item[1]) + '\t' + str(item[2]) + '\n')
                partialResults = []

        for item in partialResults:
            for calc in item[0]:
                fcontentCalcResult.write(calc + '\t')
            fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2]) + '\n')

        fcontentCalcResult.flush()
        fcontentCalcResult.close()
        fcontentNodesNotLinked.close()
        fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
        fcontentMaxMin.write(
            str(qtyNodesCalculated) + '\t' + repr(self.minValueCalculated) +
            '\t' + repr(self.maxValueCalculated))
        fcontentMaxMin.close()
        print "Calculating Nodes not linked finished", datetime.today()
    
    
    for line in calculatedFile:
        if texto in line:
            result = line
            break
        elif textov2 in line:
            result = line
            break
    calculatedFile.seek(0)
    return result
    

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    calculatedFile = open(FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        x.append(Calculate.reading_calculateLine(linha))
    calculatedFile.close()
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    Nodes_notLinked = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges)
    nodes_notlinkedFile = open(FormatingDataSets.get_abs_file_path(util.nodes_notlinked_file), 'r')
    qtyLine = 0
    qtyCalculated = 0
    f = open(FormatingDataSets.get_abs_file_path(util.calculated_file )+ '.weight.txt', 'w')
    minValueCalculated = list(99999 for x in myparams.featuresChoice)
    maxValueCalculated = list(0 for x in myparams.featuresChoice)
    qtyFeatures = len(myparams.featuresChoice)
    for line in nodes_notlinkedFile:
        qtyLine = qtyLine + 1
示例#36
0
if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/exemplomenor/config/config.txt')
    myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, 
                                filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None)

    myparams.generating_Training_Graph()
    myparams.generating_Test_Graph()
    
    selection = VariableSelection(myparams.trainnigGraph, util.min_edges)
    nodesNotLinked = selection.get_pair_nodes_not_linked()
    calc = CalculateInMemory(myparams, nodesNotLinked)
    resultsCalculate = calc.executingCalculate()
    
    
    calc.Separating_calculateFile()
    analise = Analyse(myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
    topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
    calc.Ordering_separating_File(topRank)
    for OrderingFilePath in calc.getfilePathOrdered_separeted():
        analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
    
    
    print "Trainning Period:", myparams.t0, " - ", myparams.t0_
    print "Test Period:", myparams.t1, " - ", myparams.t1_
    
    print "# Papers in Trainning: ",  myparams.get_edges(myparams.trainnigGraph)
    print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph)
    print "# Papers in Test: ",  myparams.get_edges(myparams.testGraph)
    print "# Authors in Test", myparams.get_nodes(myparams.testGraph)
    
    print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated  #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file))
示例#37
0
    for line in calculatedFile:
        if texto in line:
            result = line
            break
        elif textov2 in line:
            result = line
            break
    calculatedFile.seek(0)
    return result


if __name__ == '__main__':
    util = ParameterUtil(
        parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    calculatedFile = open(
        FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        x.append(Calculate.reading_calculateLine(linha))
    calculatedFile.close()
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    myparams.generating_Training_Graph()
    Nodes_notLinked = VariableSelection(myparams.trainnigGraph,
                                        util.nodes_notlinked_file,
                                        util.min_edges)
    nodes_notlinkedFile = open(
        FormatingDataSets.get_abs_file_path(util.nodes_notlinked_file), 'r')
    qtyLine = 0
'''
Created on Aug 22, 2015

@author: cptullio
Analysing the results
'''
from parametering.ParameterUtil import ParameterUtil
from parametering.Parameterization import Parameterization
from calculating.Calculate import Calculate
from analysing.Analyse import Analyse
from calculating.VariableSelection import VariableSelection
from formating.FormatingDataSets import FormatingDataSets
import networkx

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    selection = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges, True)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    wg = calc.adding_normalize_values_tograph(myparams.trainnigGraph)
    networkx.write_graphml(wg, FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.weighted.txt'))
    node993 =set(n for n,d in wg.edges(data=True) if n == 993 and d == 994)
    print node993
    
    
	def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated):
		print "Starting Calculating Nodes not linked", datetime.today()
		
		self.preparedParameter = preparedParameter
		self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
		self.filepathMaxMinCalculated = Formating.get_abs_file_path(filepathMaxMinCalculated)
		self.filepathResult = Formating.get_abs_file_path(filepathResult)
		self.filepathNodesNotLinked = Formating.get_abs_file_path(filepathNodesNotLinked)
		#for each links that is not linked all the calculates is done.
		element = 0
		qtyofResults = FormatingDataSets.getTotalLineNumbers(self.filepathNodesNotLinked)
		fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
		if os.path.exists(self.filepathResult):
			print "Calculate already done for this file, please delete if you want a new one.", datetime.today()
			return
		
		fcontentCalcResult = open(self.filepathResult, 'w')
		
		self.minValueCalculated = list(99999 for x in self.preparedParameter.featuresChoice)
		self.maxValueCalculated = list(0 for x in self.preparedParameter.featuresChoice)
		
		qtyFeatures = len(self.preparedParameter.featuresChoice)
		self.qtyDataCalculated = 0
		
		out_q = multiprocessing.Queue()
		procs = []
		nprocs = 100
		for lineofFile in fcontentNodesNotLinked:
			element = element+1
			
			p = multiprocessing.Process(target=self.calculating_features, args=(lineofFile,element,qtyofResults  , preparedParameter, qtyFeatures , self.minValueCalculated, self.maxValueCalculated,  out_q))
			procs.append(p)
			p.start()
			
			
			if len(procs) >= nprocs:
				for i in range(len(procs)):
					result  = out_q.get()
					result = result.split('|')
					
					mini = eval(result[0])
					maxi = eval(result[1])
					
					self.qtyDataCalculated = self.qtyDataCalculated + int(result[2])
					fcontentCalcResult.write(result[3])
					for index_features in range(qtyFeatures):
						if   mini[index_features] < self.minValueCalculated[index_features]:
							self.minValueCalculated[index_features] = mini[index_features]
						if maxi[index_features] > self.maxValueCalculated[index_features]:
							self.maxValueCalculated[index_features] = maxi[index_features]
							
				for p in procs:
					p.join()
				procs = []
		
		for i in range(len(procs)):
			result  = out_q.get()
			result = result.split('|')
					
			mini = eval(result[0])
			maxi = eval(result[1])
			self.qtyDataCalculated = self.qtyDataCalculated + int(result[2])
			
			fcontentCalcResult.write(result[3])
			
			for index_features in range(qtyFeatures):
				if   mini[index_features] < self.minValueCalculated[index_features]:
					self.minValueCalculated[index_features] = mini[index_features]
				if maxi[index_features] > self.maxValueCalculated[index_features]:
					self.maxValueCalculated[index_features] = maxi[index_features]
			
		for p in procs:
			p.join()
				
		fcontentCalcResult.flush()
		fcontentCalcResult.close()
		fcontentNodesNotLinked.close()
		fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
		fcontentMaxMin.write(str(self.qtyDataCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated) )
		fcontentMaxMin.close()
		print "Calculating Nodes not linked finished", datetime.today()
		
		
def calculatingWeights(graph, nodesnotLinked, database, calculatingFile):
    pdb = Base(calculatingFile)
    pdb.create('node1', 'node2', 'WCNFTI01','WCNFTI02', 'WCNFTI03','WCNFTI04','WCNFTI05','WCNFTI06','WCNFTI07','WCNFTI08','WCNFTI09','WAAFTI01','WAAFTI02', 'WAAFTI03','WAAFTI04','WAAFTI05','WAAFTI06','WAAFTI07','WAAFTI08','WAAFTI09')
    pdb.create_index('node1', 'node2')
                
    element = 0
    qtyofNodesToProcess = len(nodesnotLinked)
    for pair in nodesnotLinked:
        element = element+1
        FormatingDataSets.printProgressofEvents(element, qtyofNodesToProcess, "Calculating features for nodes not liked: ")
        neighbors_node1 = all_neighbors(graph, pair[0])
        neighbors_node2 = all_neighbors(graph, pair[1])
        len_neihbors_node1 = len(neighbors_node1)
        len_neihbors_node2 = len(neighbors_node2)
        CommonNeigbors = neighbors_node1.intersection(neighbors_node2)
        WCNFTI01 = 0;
        WCNFTI02 = 0;
        WCNFTI03 = 0;
        WCNFTI04 = 0;
        WCNFTI05 = 0;
        WCNFTI06 = 0;
        WCNFTI07 = 0;
        WCNFTI08 = 0;
        WCNFTI09 = 0;
        
        WAAFTI01 = 0;
        WAAFTI02 = 0;
        WAAFTI03 = 0;
        WAAFTI04 = 0;
        WAAFTI05 = 0;
        WAAFTI06 = 0;
        WAAFTI07 = 0;
        WAAFTI08 = 0;
        WAAFTI09 = 0;
        
        
        for cn in CommonNeigbors:
            item = get_partOfWeightCalculating(graph, database, pair, cn)
            WCNFTI01 = WCNFTI01 + item['WCN'][0];
            WCNFTI02 = WCNFTI02 + item['WCN'][1];
            WCNFTI03 = WCNFTI03 + item['WCN'][2];
            WCNFTI04 = WCNFTI04 + item['WCN'][3];
            WCNFTI05 = WCNFTI05 + item['WCN'][4];
            WCNFTI06 = WCNFTI06 + item['WCN'][5];
            WCNFTI07 = WCNFTI07 + item['WCN'][6];
            WCNFTI08 = WCNFTI08 + item['WCN'][7];
            WCNFTI09 = WCNFTI09 + item['WCN'][8];
            
            WAAFTI01 = WAAFTI01 + item['WAA'][0];
            WAAFTI02 = WAAFTI02 + item['WAA'][1];
            WAAFTI03 = WAAFTI03 + item['WAA'][2];
            WAAFTI04 = WAAFTI04 + item['WAA'][3];
            WAAFTI05 = WAAFTI05 + item['WAA'][4];
            WAAFTI06 = WAAFTI06 + item['WAA'][5];
            WAAFTI07 = WAAFTI07 + item['WAA'][6];
            WAAFTI08 = WAAFTI08 + item['WAA'][7];
            WAAFTI09 = WAAFTI09 + item['WAA'][8];
            
        pdb.insert(str(pair[0]), str(pair[1]), WCNFTI01, WCNFTI02,  WCNFTI02,
                   WCNFTI03,WCNFTI04,WCNFTI05,WCNFTI06,WCNFTI07,WCNFTI08,WCNFTI09,
                   WAAFTI01, WAAFTI02,  WAAFTI02,
                   WAAFTI03,WAAFTI04,WAAFTI05,WAAFTI06,WAAFTI07,WAAFTI08,WAAFTI09,
                    
                    )   
    pdb.commit()
    return pdb;
 def __init__(self, preparedParameter, filepathNodesNotLinked, filepathResult, filePathOrdered, filepathMaxMinCalculated):
     print "Starting Calculating Nodes not linked", datetime.today()
     
     self.preparedParameter = preparedParameter
     self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
     self.filepathMaxMinCalculated = Formating.get_abs_file_path(filepathMaxMinCalculated)
     self.filepathResult = Formating.get_abs_file_path(filepathResult)
     self.filepathNodesNotLinked = Formating.get_abs_file_path(filepathNodesNotLinked)
     #for each links that is not linked all the calculates is done.
     element = 0
     qtyofResults = FormatingDataSets.getTotalLineNumbers(self.filepathNodesNotLinked)
     fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
     if os.path.exists(self.filepathResult):
         print "Calculate already done for this file, please delete if you want a new one.", datetime.today()
         self.reading_Max_min_file()
         return
     
     fcontentCalcResult = open(self.filepathResult, 'w')
     
     self.minValueCalculated = list(99999 for x in self.preparedParameter.featuresChoice)
     self.maxValueCalculated = list(0 for x in self.preparedParameter.featuresChoice)
     
     qtyFeatures = len(self.preparedParameter.featuresChoice)
     qtyNodesCalculated = 0
     partialResults = []
     for lineofFile in fcontentNodesNotLinked:
         element = element+1
         item = VariableSelection.getItemFromLine(lineofFile)
         qtyothernodenotlinked = len(item[1])
         newelement = 0
         for neighbor_node in item[1]:
             newelement = newelement +1
             qtyNodesCalculated = qtyNodesCalculated + 1
             self.printProgressofEvents(element, qtyofResults, "Calculating features for nodes not liked: ")
             self.printProgressofEventsWihoutPercent(newelement, qtyothernodenotlinked, "Calculating nodes: " + str(item[0])  + ":" +  str(neighbor_node) )
         
             item_result = []
             #executing the calculation for each features chosen at parameter
             for index_features in range(qtyFeatures):
                 self.preparedParameter.featuresChoice[index_features][0].parameter = preparedParameter
                 valueCalculated = self.preparedParameter.featuresChoice[index_features][0].execute(item[0],neighbor_node) * self.preparedParameter.featuresChoice[index_features][1]
                 if valueCalculated < self.minValueCalculated[index_features]:
                     self.minValueCalculated[index_features] = valueCalculated
                 if valueCalculated > self.maxValueCalculated[index_features]:
                     self.maxValueCalculated[index_features] = valueCalculated
                     
                 item_result.append(valueCalculated)
             
             lineContent = []    
             #generating a vetor with the name of the feature and the result of the calculate
             for indice in range(qtyFeatures):
                 lineContent.append(str({str(self.preparedParameter.featuresChoice[indice]):item_result[indice]}) )
             partialResults.append([lineContent, item[0], neighbor_node])
             
         if element % 10 == 0:
             for item in partialResults:
                 for calc in item[0]:
                     fcontentCalcResult.write(calc + '\t')
                 fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2])  + '\n'  )
             partialResults = []
     
     for item in partialResults:
         for calc in item[0]:
             fcontentCalcResult.write(calc + '\t')
         fcontentCalcResult.write(str(item[1]) + '\t' + str(item[2])  + '\n'  )
             
     
     fcontentCalcResult.flush()
     fcontentCalcResult.close()
     fcontentNodesNotLinked.close()
     fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
     fcontentMaxMin.write(str(qtyNodesCalculated) + '\t' + repr(self.minValueCalculated) + '\t' + repr(self.maxValueCalculated) )
     fcontentMaxMin.close()
     print "Calculating Nodes not linked finished", datetime.today()
     
     
示例#42
0
        parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    myparams.generating_Training_Graph()
    AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file,
                                 util.min_edges, True)
    calc = Calculate(myparams, util.nodes_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    print 'armazenando resultados'
    cnx = mysql.connector.connect(user='******',
                                  password='******',
                                  host='127.0.0.1',
                                  database='calculos')
    add_result = ("INSERT INTO resultadopesos "
                  "(no1, no2, resultados) "
                  "VALUES (%s, %s, %s)")
    cursor = cnx.cursor()
    calculatedFile = open(
        FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        dado = Calculate.reading_calculateLine(linha)
        data_result = (dado[1], dado[2].replace('\n', ''), str(dado[0]))
        cursor.execute(add_result, data_result)
    calculatedFile.close()
    cnx.commit()
    cursor.close()
    cnx.close()
示例#43
0
    def __init__(self, preparedParameter, filepathNodesNotLinked,
                 filepathResult, filePathOrdered, filepathMaxMinCalculated):
        print "Starting Calculating Nodes not linked", datetime.today()

        self.preparedParameter = preparedParameter
        self.filePathOrdered = Formating.get_abs_file_path(filePathOrdered)
        self.filepathMaxMinCalculated = Formating.get_abs_file_path(
            filepathMaxMinCalculated)
        self.filepathResult = Formating.get_abs_file_path(filepathResult)
        self.filepathNodesNotLinked = Formating.get_abs_file_path(
            filepathNodesNotLinked)
        #for each links that is not linked all the calculates is done.
        element = 0
        qtyofResults = FormatingDataSets.getTotalLineNumbers(
            self.filepathNodesNotLinked)
        fcontentNodesNotLinked = open(self.filepathNodesNotLinked, 'r')
        if os.path.exists(self.filepathResult):
            print "Calculate already done for this file, please delete if you want a new one.", datetime.today(
            )
            return

        fcontentCalcResult = open(self.filepathResult, 'w')

        self.minValueCalculated = list(
            99999 for x in self.preparedParameter.featuresChoice)
        self.maxValueCalculated = list(
            0 for x in self.preparedParameter.featuresChoice)

        qtyFeatures = len(self.preparedParameter.featuresChoice)
        self.qtyDataCalculated = 0

        out_q = multiprocessing.Queue()
        procs = []
        nprocs = 100
        for lineofFile in fcontentNodesNotLinked:
            element = element + 1

            p = multiprocessing.Process(
                target=self.calculating_features,
                args=(lineofFile, element, qtyofResults, preparedParameter,
                      qtyFeatures, self.minValueCalculated,
                      self.maxValueCalculated, out_q))
            procs.append(p)
            p.start()

            if len(procs) >= nprocs:
                for i in range(len(procs)):
                    result = out_q.get()
                    result = result.split('|')

                    mini = eval(result[0])
                    maxi = eval(result[1])

                    self.qtyDataCalculated = self.qtyDataCalculated + int(
                        result[2])
                    fcontentCalcResult.write(result[3])
                    for index_features in range(qtyFeatures):
                        if mini[index_features] < self.minValueCalculated[
                                index_features]:
                            self.minValueCalculated[index_features] = mini[
                                index_features]
                        if maxi[index_features] > self.maxValueCalculated[
                                index_features]:
                            self.maxValueCalculated[index_features] = maxi[
                                index_features]

                for p in procs:
                    p.join()
                procs = []

        for i in range(len(procs)):
            result = out_q.get()
            result = result.split('|')

            mini = eval(result[0])
            maxi = eval(result[1])
            self.qtyDataCalculated = self.qtyDataCalculated + int(result[2])

            fcontentCalcResult.write(result[3])

            for index_features in range(qtyFeatures):
                if mini[index_features] < self.minValueCalculated[
                        index_features]:
                    self.minValueCalculated[index_features] = mini[
                        index_features]
                if maxi[index_features] > self.maxValueCalculated[
                        index_features]:
                    self.maxValueCalculated[index_features] = maxi[
                        index_features]

        for p in procs:
            p.join()

        fcontentCalcResult.flush()
        fcontentCalcResult.close()
        fcontentNodesNotLinked.close()
        fcontentMaxMin = open(self.filepathMaxMinCalculated, 'w')
        fcontentMaxMin.write(
            str(self.qtyDataCalculated) + '\t' +
            repr(self.minValueCalculated) + '\t' +
            repr(self.maxValueCalculated))
        fcontentMaxMin.close()
        print "Calculating Nodes not linked finished", datetime.today()
示例#44
0
 def getTopRank(relativeFilePathRandomAnalised):
     absFile = FormatingDataSets.get_abs_file_path(relativeFilePathRandomAnalised)
     f = open(absFile, 'r')
     for last in f:
         pass
     return int(last.split('\t')[1])
from analysing.Analyse import Analyse
from calculating.VariableSelection import VariableSelection
from formating.FormatingDataSets import FormatingDataSets
import networkx
import mysql.connector

if __name__ == '__main__':
    util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay)
    myparams.generating_Training_Graph()
    AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file,util.min_edges, True)
    calc = Calculate(myparams, util.nodes_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file)
    print 'armazenando resultados'
    cnx = mysql.connector.connect(user='******', password='******',
                              host='127.0.0.1',
                              database='calculos')
    add_result = ("INSERT INTO resultadopesos "
               "(no1, no2, resultados) "
               "VALUES (%s, %s, %s)")
    cursor = cnx.cursor()
    calculatedFile = open(FormatingDataSets.get_abs_file_path(util.calculated_file), 'r')
    for linha in calculatedFile:
        dado = Calculate.reading_calculateLine(linha)
        data_result = (dado[1], dado[2].replace('\n',''),str(dado[0]))
        cursor.execute(add_result, data_result)
    calculatedFile.close()
    cnx.commit()
    cursor.close()
    cnx.close()
    
    
示例#46
0
'''
Created on Aug 22, 2015

@author: cptullio
Generating TopRank
'''
from parametering.ParameterUtil import ParameterUtil
from parametering.Parameterization import Parameterization
from calculating.Calculate import Calculate
from analysing.Analyse import Analyse
from formating.FormatingDataSets import FormatingDataSets

if __name__ == '__main__':

    util = ParameterUtil(
        parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt')
    myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0,
                                util.t0_, util.t1, util.t1_,
                                util.FeaturesChoiced, util.graph_file,
                                util.trainnig_graph_file, util.test_graph_file,
                                util.decay)
    calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file,
                     util.ordered_file, util.maxmincalculated_file)
    myparams.generating_Test_Graph()
    analise = Analyse(
        myparams, FormatingDataSets.get_abs_file_path(util.calculated_file),
        FormatingDataSets.get_abs_file_path(util.analysed_file) +
        '.random.analised.txt', calc.qtyDataCalculated)
示例#47
0
    def readingOrginalDataset(self):
        print "Starting Reading Original Dataset", datetime.today()
        con = None
        try:
            con = psycopg2.connect(database='projetomestrado',
                                   user='******',
                                   password='******')

            curPublicacao = con.cursor()
            curPublicacao.execute(
                "select idpublicacao, titulo, ano from projetomestrado.publicacao  where ano >= 1993 and ano <= 2000"
            )
            curPublicacaoData = curPublicacao.fetchall()
            element = 0
            for linha in curPublicacaoData:
                element = element + 1
                FormatingDataSets.printProgressofEvents(
                    element, len(curPublicacaoData),
                    "Adding paper to new graph: ")

                idpublicacao = linha[0]
                curPublicacaoPalavras = con.cursor()
                curPublicacaoPalavras.execute(
                    "select k.keyword from projetomestrado.keyword k inner join projetomestrado.publicacaokeyword pk on pk.idkeyword = k.idkeyword where pk.idpublicacao ="
                    + str(idpublicacao))
                palavras = []
                for palavra in curPublicacaoPalavras.fetchall():
                    palavras.append(palavra[0].strip())
                curAutores = con.cursor()
                curAutores.execute(
                    "select a.idautor, a.primeironome, a.ultimonome from projetomestrado.autorpublicacao ap inner join projetomestrado.autor a on a.idautor = ap.idautor where ap.idpublicacao = "
                    + str(idpublicacao))
                autores = []
                for autor in curAutores.fetchall():
                    autores.append([autor[0], autor[1] + "," + autor[2]])

                self.Publications.append(
                    [idpublicacao, linha[1], linha[2], palavras, autores])

            self.Graph = networkx.Graph()

            for item_article in self.Publications:
                self.Graph.add_node(
                    'P_' + str(item_article[0]), {
                        'node_type': 'E',
                        'title': item_article[1].decode("latin_1"),
                        'time': int(item_article[2]),
                        'keywords': str(item_article[3])
                    })
                for item_autor in item_article[4]:
                    self.Graph.add_node(int(item_autor[0]), {
                        'node_type': 'N',
                        'name': item_autor[1].decode("latin_1")
                    })
                    self.Graph.add_edge('P_' + str(item_article[0]),
                                        int(item_autor[0]))

            print "Reading Original Dataset finished", datetime.today()

        except psycopg2.DatabaseError, e:
            print 'Error %s' % e
    def readingOrginalDataset(self):
        print "Starting Reading Original Dataset", datetime.today()
        with open(self.OriginalDataSet) as f:
            self.OrignalContent = f.readlines()
            f.close()

        articleid = 0
        articles = []
        authornames = []
        authorofArticles = []
        authors = []
        article = None
        element = 0
        for line in self.OrignalContent:
            element = element + 1
            FormatingDataSets.printProgressofEvents(
                element, len(self.OrignalContent),
                "Reading File Content to Generate Graph: ")
            line = line.strip()
            if line.startswith('#*'):
                articleid = articleid + 1
                article = Article('p_' + str(articleid))
                article.articlename = line.replace('#*',
                                                   '').replace('\r\n', '')
            if line.startswith('#t'):
                article.time = line.replace('#t', '').replace('\r\n', '')

            if line.startswith('#@'):
                authorsofArticle = line.replace('#@',
                                                '').replace('\r\n',
                                                            '').split(',')
                for author in authorsofArticle:
                    author = author.strip()
                    if not author in authornames:
                        authornames.append(author)
                    articleauthor = AuthorInArticle(
                        article.articleid,
                        authornames.index(author) + 1)
                    authorofArticles.append(articleauthor)
            if line.startswith('#!'):
                articles.append(article)
        for index in range(len(authornames)):
            author = Author(index + 1, authornames[index])
            authors.append(author)
        self.Graph = networkx.Graph()
        for item_article in articles:
            self.Graph.add_node(
                item_article.articleid, {
                    'node_type': 'E',
                    'title': item_article.articlename.decode("latin_1"),
                    'time': int(item_article.time)
                })
        for item_author in authors:
            self.Graph.add_node(int(item_author.authorid), {
                'node_type': 'N',
                'name': item_author.name.decode("latin_1")
            })
        for item_edge in authorofArticles:
            self.Graph.add_edge(item_edge.articleid, int(item_edge.authorid))

        print "Reading Original Dataset finished", datetime.today()