def step08(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles( pathFile + '.analised.txt', topRank) print "---------------------------------"
def execution(configFile): #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'T.EXPERIMENTO_ATUAL_CORE03.txt'), 'w') resultFile.write("Inicio da operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.write("\n") #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodesSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) #GET THE AUTHORS THAT PUBLISH AT TRAINNING AND TEST #A NUMBER OF PAPERS DEFINED AT MIN_EDGES IN CONFIG FILE nodes = nodesSelection.get_NowellAuthorsCore() #GET A PAIR OF AUTHORS THAT PUBLISH AT LEAST ONE ARTICLE AT TRAINNING AND TEST. #DID NOT SEE ANY NEED collaborations = nodesSelection.get_NowellColaboration() #GET THE FIRST EDGES MADE BY THE COMBINATION OF NODES IN TRAINNING GRAPH eOld = nodesSelection.get_NowellE(nodes,myparams.trainnigGraph) #GET THE FIRST EDGES MADE BY THE COMBINATION OF NODES IN TEST GRAPH THAT DO NOT HAVE EDGES IN TRAINNING eNew = nodesSelection.get_NowellE2(nodes, eOld, myparams.testGraph) #GET THE NODES NOT LINKED OVER THE COMBINATION NODES. nodesNotLinked = nodesSelection.get_PairsofNodesNotinEold(nodes) #CREATING CALCULATION OBJECT calc = CalculateInMemory(myparams,nodesNotLinked) #CALCULATING THE SCORES. resultsofCalculation = calc.executingCalculate() #ORDERNING THE RESULTS RETURNING THE TOP N orderingResults = calc.ordering(len(eNew), resultsofCalculation) #SAVING THE ORDERED RESULTS. calc.saving_orderedResult(util.ordered_file, orderingResults) #ANALISE THE ORDERED RESULTS AND CHECK THE FUTURE. ScoresResults = Analyse.AnalyseNodesWithScoresInFuture(orderingResults, myparams.testGraph) #SAVING THE RESULTS. for index in range(len(ScoresResults)): Analyse.saving_analyseResult(ScoresResults[index], util.analysed_file + str(myparams.ScoresChoiced[index][0] ) + '.txt') resultFile.write("TOTAL OF SUCESSS USING METRIC " + str(myparams.ScoresChoiced[index][0]) + " = " + str(Analyse.get_TotalSucess(ScoresResults[index]) )) resultFile.write("\n") resultFile.write("\n") resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n") resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(collaborations)*2)+ "\t\t" + str(len(nodes)) + "\t" + str(len(eOld))+"\t" + str(len(eNew))) resultFile.write("\n") resultFile.write("Fim da Operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.close()
def step01(paramFile): util = ParameterUtil(parameter_file=paramFile) astroPh = Formating(util.graph_file) astroPh.subject = 'cond-mat' astroPh.yearstoRescue = [1994, 1995, 1996, 1997, 1998, 1999] astroPh.readingOrginalDataset() #astroPh.generating_graph() astroPh.saveGraph()
def step04(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.Separating_calculateFile()
def execution(configFile): #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w') resultFile.write("Inicio da operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.write("\n") #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) #if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.fuzzyinputy.txt')): data = calculatingInputToFuzzy(myparams.trainnigGraph,nodeSelection.nodesNotLinked, myparams) dataSorted = sorted(data, key=lambda value: value['result'], reverse=True) topRank = len(nodeSelection.eNeW) totalCalculated = len(dataSorted) dataToAnalysed = [] if (topRank >= totalCalculated): for item in range(totalCalculated): dataToAnalysed.append({'no1': dataSorted[item]['no1'], 'no2': dataSorted[item]['no2'], 'result': dataSorted[item]['result'] }) else: for item in range(topRank): dataToAnalysed.append({'no1': dataSorted[item]['no1'], 'no2': dataSorted[item]['no2'], 'result': dataSorted[item]['result'] }) analise = AnalyseNodesInFuture(dataToAnalysed, myparams.testGraph) resultFile.write( repr(get_TotalSucess(analise)) ) resultFile.write("\n") # resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n") resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW))) resultFile.write("\n") resultFile.write("Fim da Operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.close()
def execution(configFile): #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03_onlyinteraction.txt'), 'w') resultFile.write("Inicio da operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.write("\n") #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) db = None if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl')): db = generateWeights(myparams.trainnigGraph, FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl') , myparams) else: db = reading_Database(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.base.pdl')) calcDb = None if not os.path.exists(FormatingDataSets.get_abs_file_path(util.calculated_file + '.base.pdl')): calcDb = calculatingWeights(myparams.trainnigGraph, nodeSelection.nodesNotLinked, db, FormatingDataSets.get_abs_file_path(util.calculated_file) + '.base.pdl') else: calcDb = reading_Database(FormatingDataSets.get_abs_file_path(util.calculated_file + '.base.pdl')) ordering = get_ordering(calcDb, len(nodeSelection.eNeW)) result = get_analyseNodesInFuture(ordering, myparams.testGraph) resultFile.write(repr(result)) resultFile.write("\n") # resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n") resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW))) resultFile.write("\n") resultFile.write("Fim da Operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.close()
class SFrame: #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999/AllExecutionScores/configToAG.txt') util = ParameterUtil(parameter_file = 'data/configuration/arxiv/exemplo_1994_1999/CombinationLinear/configToAG.txt') #util = ParameterUtil(parameter_file = 'data/configuration/arxiv/condmat_1994_1999/CombinationLinear/configToAG.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) metrics = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.calculated_file+'_normalizated.csv')) results = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.result_random_file)) top = 20 def __init__(self): pass @classmethod def evaluate(cls, individual): new_metric = float(0) ##print 'individuos: ', individual for index_score in range(len(cls.myparams.ScoresChoiced)): #print cls.myparams.ScoresChoiced[index_score][0].getName() valorMetrica = cls.metrics[ cls.myparams.ScoresChoiced[index_score][0].getName() ] valorIndividual = individual[index_score] #print "valores ", valorMetrica, valorIndividual new_metric = new_metric + (valorMetrica * valorIndividual ) ##print 'nova metrica', new_metric copy_metrics = cls.metrics.copy() copy_metrics.add_column(new_metric, name='new_metric') copy_metrics = copy_metrics.topk('new_metric', k=cls.top) #print 'metrics after topk \n\n', copy_metrics copy_results = cls.results.copy() #print 'copy_results before join', copy_results copy_metrics = copy_metrics.join(copy_results) #print 'metrics after join \n\n', copy_metrics copy_metrics = copy_metrics.sort('new_metric', ascending=False) ##print 'copy metrics ', copy_metrics aux = [0] copy_metrics = copy_metrics.filter_by(aux,'result') zero = copy_metrics.num_rows() #print 'zero', zero del copy_metrics del copy_results result = float(zero) / cls.top, #print 'resultado ', result return result
def step06(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank)
def step03(paramFile, num_people): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() selection = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file, util.min_edges, False, num_people) return
def step05(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
def execution(configFile): #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) #if not os.path.exists(FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.fuzzyinputy.txt')): calculatingInputToFuzzy(myparams.trainnigGraph,nodeSelection.nodesNotLinked, myparams)
def step07(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank)
def execution(configFile): #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w') resultFile.write("Inicio da operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.write("\n") #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) #CREATING CALCULATION OBJECT calc = CalculatingTogether(myparams, nodeSelection.nodesNotLinked) ordering = calc.ordering(len(nodeSelection.eNeW)) #calc.saving_orderedResult(util.ordered_file, ordering) calc.AnalyseNodesInFuture(ordering, myparams.testGraph) resultFile.write(repr(calc.get_TotalSucess())) resultFile.write("\n") # resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n") resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW))) resultFile.write("\n") resultFile.write("Fim da Operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.close()
def execution(configFile, metricas): #DEFINE THE FILE THAT WILL KEEP THE RESULT DATA resultFile = open(FormatingDataSets.get_abs_file_path(configFile + 'core03.txt'), 'w') resultFile.write("Inicio da operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.write("\n") #READING THE CONFIG FILE util = ParameterUtil(parameter_file = configFile) #CREATING PARAMETRIZATION OBJECT WITH THE INFORMATIONS OF THE CONFIG FILE. myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) #GENERATING TRAINNING GRAPH BASED ON CONFIG FILE T0 AND T0_ myparams.generating_Training_Graph() #GENERATING TEST GRAPH BASED ON CONcvb FIG FILE T1 AND T1_ myparams.generating_Test_Graph() nodeSelection = NodeSelection(myparams.trainnigGraph, myparams.testGraph, util) #CREATING CALCULATION OBJECT weights = {'cn' : 1, 'aas': 1, 'pa':1, 'jc': 1, 'ts08':1,'ts05': 1, 'ts02':1} calc = CalculatingCombinationOnlyNowell(myparams, nodeSelection.nodesNotLinked,weights,False ) saving_files_calculting(FormatingDataSets.get_abs_file_path(util.calculated_file), calc.results, metricas) Analise = nodeSelection.AnalyseAllNodesNotLinkedInFuture(nodeSelection.nodesNotLinked, myparams.testGraph) salvar_analise(FormatingDataSets.get_abs_file_path(util.analysed_file) + '.allNodes.csv', Analise) resultFile.write("Authors\tArticles\tCollaborations\tAuthors\tEold\tEnew\n") resultFile.write( str(myparams.get_nodes(myparams.trainnigGraph))+ "\t" + str(myparams.get_edges(myparams.trainnigGraph)) + "\t\t" + str(len(nodeSelection.get_NowellColaboration())*2)+ "\t\t" + str(len(nodeSelection.nodes)) + "\t" + str(len(nodeSelection.eOld))+"\t" + str(len(nodeSelection.eNeW))) resultFile.write("\n") resultFile.write("Fim da Operacao\n") resultFile.write(str(datetime.datetime.now())) resultFile.close()
class SFrame: util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999/AllExecutionScores/configToAG.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, linear_combination=util.linear_combination, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None, result_random_file=util.result_random_file) metrics = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.calculated_file+'_normalizated.csv')) results = sframe.SFrame.read_csv(FormatingDataSets.get_abs_file_path(util.result_random_file)) top = 20 def __init__(self): pass @classmethod def evaluate(cls, individual): new_metric = float(0) for index_score in range(len(cls.myparams.ScoresChoiced)): new_metric = new_metric + (cls.metrics[ cls.myparams.ScoresChoiced[index_score][0].getName() ] * individual[index_score] ) print new_metric copy_metrics = cls.metrics.copy() copy_metrics.add_column(new_metric, name='new_metric') copy_metrics = copy_metrics.topk('new_metric', k=cls.top) copy_results = cls.results.copy() copy_metrics = copy_metrics.join(copy_results) copy_metrics = copy_metrics.sort('new_metric', ascending=False) aux = [0] copy_metrics = copy_metrics.filter_by(aux,'result') zero = copy_metrics.num_rows() del copy_metrics del copy_results return float(zero) / cls.top,
''' Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx from calculating.CalculateInMemory import CalculateInMemory if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/exemplomenor/config/config.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() selection = VariableSelection(myparams.trainnigGraph, util.min_edges) nodesNotLinked = selection.get_pair_nodes_not_linked() calc = CalculateInMemory(myparams, nodesNotLinked) resultsCalculate = calc.executingCalculate() calc.Separating_calculateFile() analise = Analyse(myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt')
Created on Aug 22, 2015 @author: cptullio Selecting all Nodes that will be calculated. ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate import networkx from matplotlib import pyplot if __name__ == '__main__': #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt') util = ParameterUtil( parameter_file= 'data/formatado/duarte/1994_1999/config/configuration.txt') myparams = Parameterization( t0=util.t0, t0_=util.t0_, t1=util.t1, t1_=util.t1_, filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced, weightedScoresChoiced=util.WeightedScoresChoiced,
''' Created on Aug 22, 2015 @author: cptullio Generating TopRank ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from formating.FormatingDataSets import FormatingDataSets if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
@author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx import matplotlib if __name__ == '__main__': util = ParameterUtil( parameter_file= 'data/formatado/arxiv/nowell_astroph_1994_1999/onlyDomainLinkScore/config.txt' ) myparams = Parameterization( t0=util.t0, t0_=util.t0_, t1=util.t1, t1_=util.t1_, filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced, weightedScoresChoiced=util.WeightedScoresChoiced,
def step02(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() myparams.generating_Test_Graph()
Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx import mysql.connector if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file, util.min_edges, True) calc = Calculate(myparams, util.nodes_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) print 'armazenando resultados' cnx = mysql.connector.connect(user='******', password='******', host='127.0.0.1', database='calculos')
''' Created on 28 de ago de 2015 @author: CarlosPM For statistics results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from formating.FormatingDataSets import FormatingDataSets from analysing.Analyse import Analyse from calculating.Calculate import Calculate if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph)
''' Created on Aug 22, 2015 @author: cptullio First Step is the generation of the graph from the database informations. We will need the file of parameter to indicate the place where the graph will be saved ''' from parametering.ParameterUtil import ParameterUtil from formating.arxiv.Formating import Formating if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_2004_2010.txt') astroPh = Formating(util.graph_file) #astroPh.readingOrginalDataset() astroPh.generating_graph() astroPh.saveGraph()
Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx if __name__ == '__main__': util = ParameterUtil( parameter_file= 'data/formatado/arxiv/nowell_condmat_2004_2009/AllExecutionScores/config.txt' ) myparams = Parameterization( t0=util.t0, t0_=util.t0_, t1=util.t1, t1_=util.t1_, filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced, weightedScoresChoiced=util.WeightedScoresChoiced,
Created on 28 de ago de 2015 @author: CarlosPM For statistics results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from formating.FormatingDataSets import FormatingDataSets from analysing.Analyse import Analyse from calculating.Calculate import Calculate from calculating.CalculateWeights import CalculateWeights if __name__ == '__main__': util = ParameterUtil( parameter_file= 'data/formatado/arxiv/nowell_condmat_1994_1999/config/configuration_weights.txt' ) myparams = Parameterization( t0=util.t0, t0_=util.t0_, t1=util.t1, t1_=util.t1_, filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced,
Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx import matplotlib if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999/AllExecutionScores/config.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() selection = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.Separating_calculateFile() analise = Analyse(myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
''' Created on Aug 22, 2015 @author: cptullio First Step is the generation of the graph from the database informations. We will need the file of parameter to indicate the place where the graph will be saved ''' from parametering.ParameterUtil import ParameterUtil from formating.arxiv.Formating import Formating if __name__ == '__main__': util = ParameterUtil( parameter_file= 'data/configuration/arxiv/exemplo_1994_1999/CombinationLinear/configToAG.txt' ) #util = ParameterUtil(parameter_file = 'data/configuration/arxiv/condmat_1994_1999/MetricaTemporal/config.txt') #myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, # filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) astroPh = Formating(util.graph_file) astroPh.subject = 'cond-mat' #astroPh.yearstoRescue = [1993] astroPh.yearstoRescue = [1994, 1995, 1996, 1997, 1998, 1999] #astroPh.yearstoRescue = [2004,2005,2006,2007,2008,2009, 2010, 2011, 2012] #astroPh.readingOrginalDataset() astroPh.generating_graph() astroPh.saveGraph()
@author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx import matplotlib if __name__ == '__main__': util = ParameterUtil( parameter_file= 'data/formatado/exemplomenor/config/configApenasLinkScore.txt') myparams = Parameterization( t0=util.t0, t0_=util.t0_, t1=util.t1, t1_=util.t1_, filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced, weightedScoresChoiced=util.WeightedScoresChoiced,