def step08(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file+ '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles(pathFile + '.analised.txt', topRank) print "---------------------------------"
def step08(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles( pathFile + '.analised.txt', topRank) print "---------------------------------"
def step04(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.Separating_calculateFile()
def step07(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file = paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
def step06(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank)
def step07(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank)
def step05(paramFile): #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') util = ParameterUtil(parameter_file=paramFile) myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
''' Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_example_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() selection = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges, True) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) wg = calc.adding_normalize_values_tograph(myparams.trainnigGraph) networkx.write_graphml(wg, FormatingDataSets.get_abs_file_path(util.trainnig_graph_file + '.weighted.txt')) node993 =set(n for n,d in wg.edges(data=True) if n == 993 and d == 994) print node993
from analysing.Analyse import Analyse from calculating.VariableSelection import VariableSelection from formating.FormatingDataSets import FormatingDataSets import networkx import mysql.connector if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file,util.min_edges, True) calc = Calculate(myparams, util.nodes_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) print 'armazenando resultados' cnx = mysql.connector.connect(user='******', password='******', host='127.0.0.1', database='calculos') add_result = ("INSERT INTO resultadopesos " "(no1, no2, resultados) " "VALUES (%s, %s, %s)") cursor = cnx.cursor() calculatedFile = open(FormatingDataSets.get_abs_file_path(util.calculated_file), 'r') for linha in calculatedFile: dado = Calculate.reading_calculateLine(linha) data_result = (dado[1], dado[2].replace('\n',''),str(dado[0])) cursor.execute(add_result, data_result) calculatedFile.close() cnx.commit() cursor.close() cnx.close()
''' Created on Aug 22, 2015 @author: cptullio Generating TopRank ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse from formating.FormatingDataSets import FormatingDataSets if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated)
''' Created on Aug 22, 2015 @author: cptullio Analysing the results ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) myparams.generating_Test_Graph() topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') print 'Analising Files with TopRank', str(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank )
filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced, weightedScoresChoiced=util.WeightedScoresChoiced, FullGraph=None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() selection = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file, util.min_edges) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.Separating_calculateFile() analise = Analyse( myparams, FormatingDataSets.get_abs_file_path(util.calculated_file), FormatingDataSets.get_abs_file_path(util.analysed_file) + '.random.analised.txt', calc.qtyDataCalculated) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank) for OrderingFilePath in calc.getfilePathOrdered_separeted(): analise = Analyse(myparams, OrderingFilePath, OrderingFilePath + '.analised.txt', topRank) print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph)
result = line break elif textov2 in line: result = line break calculatedFile.seek(0) return result if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt') calculatedFile = open( FormatingDataSets.get_abs_file_path(util.calculated_file), 'r') for linha in calculatedFile: x.append(Calculate.reading_calculateLine(linha)) calculatedFile.close() myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() Nodes_notLinked = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file, util.min_edges) nodes_notlinkedFile = open( FormatingDataSets.get_abs_file_path(util.nodes_notlinked_file), 'r') qtyLine = 0 qtyCalculated = 0 f = open(
if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt') myparams = Parameterization(t0 = util.t0, t0_ = util.t0_, t1 = util.t1, t1_ = util.t1_, filePathGraph = util.graph_file, filePathTrainingGraph = util.trainnig_graph_file, filePathTestGraph = util.test_graph_file, decay = util.decay, domain_decay = util.domain_decay, min_edges = util.min_edges, scoreChoiced = util.ScoresChoiced, weightsChoiced = util.WeightsChoiced, weightedScoresChoiced = util.WeightedScoresChoiced, FullGraph = None) myparams.generating_Training_Graph() myparams.generating_Test_Graph() print "Trainning Period:", myparams.t0, " - ", myparams.t0_ print "Test Period:", myparams.t1, " - ", myparams.t1_ print "# Papers in Trainning: ", myparams.get_edges(myparams.trainnigGraph) print "# Authors in Training: ", myparams.get_nodes(myparams.trainnigGraph) print "# Papers in Test: ", myparams.get_edges(myparams.testGraph) print "# Authors in Test", myparams.get_nodes(myparams.testGraph) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.reading_Max_min_file() print "# pair of Authors with at least 3 articles Calculated: ", calc.qtyDataCalculated #FormatingDataSets.getTotalLineNumbers(FormatingDataSets.get_abs_file_path(util.calculated_file)) topRank = Analyse.getTopRank(util.analysed_file+ '.random.analised.txt') print "# pair of Authors with at least 3 articles that is connected in Test Graph in a random way: ", topRank print "Max values found in calculations: ", str(calc.maxValueCalculated) print "Min Values found in calculations: ", str(calc.minValueCalculated) for pathFile in calc.getfilePathOrdered_separeted(): print "File Analised: ", pathFile + '.analised.txt' number_connected = Analyse.getTopRankABSPathFiles(pathFile + '.analised.txt') print "# pair of Authors that is connected in Test Graph: ", number_connected print "%: ", Analyse.getLastInfosofResultsABSPathFiles(pathFile + '.analised.txt', topRank) print "---------------------------------"
''' Created on Aug 22, 2015 @author: cptullio Ordering Calculation ''' from parametering.ParameterUtil import ParameterUtil from parametering.Parameterization import Parameterization from calculating.Calculate import Calculate from analysing.Analyse import Analyse if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/duarte/nowell_duarte_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) topRank = Analyse.getTopRank(util.analysed_file + '.random.analised.txt') calc.Ordering_separating_File(topRank)
from formating.FormatingDataSets import FormatingDataSets import networkx import mysql.connector if __name__ == '__main__': util = ParameterUtil( parameter_file='data/formatado/arxiv/nowell_astroph_1994_1999.txt') myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() AllNodes = VariableSelection(myparams.trainnigGraph, util.nodes_file, util.min_edges, True) calc = Calculate(myparams, util.nodes_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) print 'armazenando resultados' cnx = mysql.connector.connect(user='******', password='******', host='127.0.0.1', database='calculos') add_result = ("INSERT INTO resultadopesos " "(no1, no2, resultados) " "VALUES (%s, %s, %s)") cursor = cnx.cursor() calculatedFile = open( FormatingDataSets.get_abs_file_path(util.calculated_file), 'r') for linha in calculatedFile: dado = Calculate.reading_calculateLine(linha) data_result = (dado[1], dado[2].replace('\n', ''), str(dado[0])) cursor.execute(add_result, data_result)
from matplotlib import pyplot if __name__ == '__main__': #util = ParameterUtil(parameter_file = 'data/formatado/arxiv/exemplomenor/config/configuration_weights.txt') util = ParameterUtil( parameter_file= 'data/formatado/duarte/1994_1999/config/configuration.txt') myparams = Parameterization( t0=util.t0, t0_=util.t0_, t1=util.t1, t1_=util.t1_, filePathGraph=util.graph_file, filePathTrainingGraph=util.trainnig_graph_file, filePathTestGraph=util.test_graph_file, decay=util.decay, domain_decay=util.domain_decay, min_edges=util.min_edges, scoreChoiced=util.ScoresChoiced, weightsChoiced=util.WeightsChoiced, weightedScoresChoiced=util.WeightedScoresChoiced, FullGraph=None) myparams.generating_Training_Graph() calc = Calculate(myparams, util.nodes_notlinked_file, util.calculated_file, util.ordered_file, util.maxmincalculated_file) calc.Separating_calculateFile() #networkx.networkx.draw_networkx(myparams.trainnigGraph) # networkx draw() #pyplot.draw() # pyplot draw() #pyplot.show()
for line in calculatedFile: if texto in line: result = line break elif textov2 in line: result = line break calculatedFile.seek(0) return result if __name__ == '__main__': util = ParameterUtil(parameter_file = 'data/formatado/arxiv/nowell_astroph_1994_1999.txt') calculatedFile = open(FormatingDataSets.get_abs_file_path(util.calculated_file), 'r') for linha in calculatedFile: x.append(Calculate.reading_calculateLine(linha)) calculatedFile.close() myparams = Parameterization(util.keyword_decay, util.lengthVertex, util.t0, util.t0_, util.t1, util.t1_, util.FeaturesChoiced, util.graph_file, util.trainnig_graph_file, util.test_graph_file, util.decay) myparams.generating_Training_Graph() Nodes_notLinked = VariableSelection(myparams.trainnigGraph, util.nodes_notlinked_file,util.min_edges) nodes_notlinkedFile = open(FormatingDataSets.get_abs_file_path(util.nodes_notlinked_file), 'r') qtyLine = 0 qtyCalculated = 0 f = open(FormatingDataSets.get_abs_file_path(util.calculated_file )+ '.weight.txt', 'w') minValueCalculated = list(99999 for x in myparams.featuresChoice) maxValueCalculated = list(0 for x in myparams.featuresChoice) qtyFeatures = len(myparams.featuresChoice) for line in nodes_notlinkedFile: qtyLine = qtyLine + 1 item = VariableSelection.getItemFromLine(line)