#Es. create non overlapping set s = data3.shuffle() s1 = data3[0:50] s2 = data3[50:100] s3 = data3[100:] rawDat_s1,rawLabels_s1 = [s1.data,s1.labels] rawDat_s2,rawLabels_s2 = [s2.data,s2.labels] #GRAPH #download from TUDataset repository #TODO: will be change datasets.get_dataset("COIL-RAG") gdata1 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/COIL-RAG", "Letter-h", readergraph, pre_transform=gTransfItem()) gdata2 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/COIL-RAG", "Letter-h", readergraph, transform = gTransfItem()) gdata3 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/COIL-RAG", "Letter-h", readergraph) print("original data get item: ", gdata3[0].x.nodes[0]['attributes']) print("transform data get item: ", gdata2[0].x.nodes[0]['attributes']) print("pre_transformed data get item:", gdata1[0].x.nodes[0]['attributes']) print("original data stored value: ", gdata3.data[0].nodes[0]['attributes']) print("transform data stored value: ", gdata2.data[0].nodes[0]['attributes']) print("pre_transformed data stored value: ", gdata1.data[0].nodes[0]['attributes']) # #### # data1_perm = data1.shuffle() # data0_key = data1_perm.indices[0]
def edgeDissimilarity(a, b): D = 0 if (a['labels'] != b['labels']): D = 1 return D def readergraph(path): graphs_nx = reader.tud_to_networkx("Mutagenicity") classes = [g.graph['classes'] for g in graphs_nx] return graphs_nx, classes print("Loading...") data1 = graph_nxDataset( "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/Mutagenicity", "Mutagenicity", readergraph) #not connected graph cleanData = [(g, idx, label) for g, idx, label in zip(data1.data, data1.indices, data1.labels) if nx.is_connected(g)] cleanData = np.asarray(cleanData, dtype=object) data1 = graph_nxDataset([cleanData[:, 0], cleanData[:, 2]], "Mutagenicity") data1 = data1[0:10] graphDist = BMF(nodeDissimilarity, edgeDissimilarity) x1 = graphDist.pdist(data1.data, forceSym=True) x2 = graphDist.pdist(data1.data, forceSym=False)
# @jitclass def nodeDissimilarity(a, b): return np.linalg.norm(np.asarray(a['attributes']) - np.asarray(b['attributes'])) / np.sqrt(2) # @jitclass def edgeDissimilarity(a,b): return 0.0 def readergraph(path): graphs_nx = reader.tud_to_networkx("Letter-high") classes = [g.graph['classes'] for g in graphs_nx] return graphs_nx, classes print("Loading...") data1 = graph_nxDataset("/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/Letter-high", "LetterH", reader = readergraph) #Removed not connected graph and null graph! cleanData=[] for g,idx,label in zip(data1.data,data1.indices,data1.labels): if not nx.is_empty(g): if nx.is_connected(g): cleanData.append((g,idx,label)) #cleanData = [(g,idx,label) for g,idx,label in zip(data1.data,data1.indices,data1.labels) if not nx.is_empty(g)] cleanData = np.asarray(cleanData,dtype=object) data1 = graph_nxDataset([cleanData[:,0],cleanData[:,2]],"Letter") data1= data1[0:100] #Test extr indices data1 = data1.shuffle()
def main(): # def eaSimple(population, toolbox, cxpb, mutpb, ngen, stats=None, # halloffame=None, verbose=__debug__): #################### random.seed(64) verbose = True population = toolbox.population(n=POP_SIZE) cxpb = CXPROB mutpb = MUTPROB ngen = N_GEN halloffame = tools.HallOfFame(1) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max) logbook = tools.Logbook() logbook.header = ['gen', 'nevals'] + (stats.fields if stats else []) ################### print("Loading...") data1 = graph_nxDataset( "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/tudataset/Mutagenicity", "Mutagenicity", readergraph) #not connected graph cleanData = [ (g, idx, label) for g, idx, label in zip(data1.data, data1.indices, data1.labels) if nx.is_connected(g) ] cleanData = np.asarray(cleanData, dtype=object) data1 = graph_nxDataset([cleanData[:, 0], cleanData[:, 2]], "Mutagenicity") extract_func = randomwalk_restart.extr_strategy(max_order=6) subgraph_extr = Extractor(extract_func) # Evaluate the individuals with an invalid fitness print("Initializing population...") subgraphs = [ subgraph_extr.randomExtractDataset(data1, 1000) for _ in population ] # mapWithConst = partial(toolbox.evaluate,granulationBucket=subgraphs) invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, zip(invalid_ind, subgraphs)) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit if halloffame is not None: halloffame.update(population) record = stats.compile(population) if stats else {} logbook.record(gen=0, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) # Begin the generational process print("Start evolution") for gen in range(1, ngen + 1): # Select the next generation individuals offspring = toolbox.select(population, len(population)) # Vary the pool of individuals offspring = varAnd(offspring, toolbox, cxpb, mutpb) # Evaluate the individuals with an invalid fitness subgraphs = [ subgraph_extr.randomExtractDataset(data1, 1000) for _ in population ] invalid_ind = [ind for ind in offspring if not ind.fitness.valid] #mapWithConst = partial(toolbox.evaluate,granulationBucket=subgraphs) fitnesses = toolbox.map(toolbox.evaluate, zip(invalid_ind, subgraphs)) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(offspring) # Replace the current population by the offspring population[:] = offspring # Append the current generation statistics to the logbook record = stats.compile(population) if stats else {} logbook.record(gen=gen, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) return population, logbook
def main(): # """ # Reproducing GRALG optimization with mu+lambda strategy of evolution # """ #################### random.seed(64) verbose = True population = toolbox.population(n=MU) cxpb = CXPROB mutpb = MUTPROB ngen = N_GEN mu = MU lambda_ = LAMBDA halloffame = tools.HallOfFame(1) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max) logbook = tools.Logbook() logbook.header = ['gen', 'nevals'] + (stats.fields if stats else []) ################### print("Loading...") data1 = graph_nxDataset( "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/IAM/Letter3/Training/", "LetterH", reader=IAMreadergraph) data2 = graph_nxDataset( "/home/luca/Documenti/Progetti/E-ABC_v2/eabc_v2/Datasets/IAM/Letter3/Validation/", "LetterH", reader=IAMreadergraph) # data1 = data1.shuffle() # data2 = data2.shuffle() #Removed not connected graph and null graph! cleanData = [] for dataset in [data1, data2]: for g, idx, label in zip(dataset.data, dataset.indices, dataset.labels): if not nx.is_empty(g): if nx.is_connected(g): cleanData.append((g, idx, label)) cleanData = np.asarray(cleanData, dtype=object) normalize('coords', cleanData[:750, 0], cleanData[750:, 0]) #Slightly different from dataset used in pygralg dataTR = graph_nxDataset([cleanData[:750, 0], cleanData[:750, 2]], "LetterH", idx=cleanData[:750, 1]) dataVS = graph_nxDataset([cleanData[750:, 0], cleanData[750:, 2]], "LetterH", idx=cleanData[750:, 1]) del data1 del cleanData print("Setup...") extract_func = randomwalk_restart.extr_strategy(max_order=6) # extract_func = breadthFirstSearch.extr_strategy(max_order=6) # subgraph_extr = Extractor(extract_func) subgraph_extr = Extractor(extract_func) expTRSet = dataTR.fresh_dpcopy() for i, x in enumerate(dataTR): k = 0 while (k < 50): for j in range(1, 6): subgraph_extr.max_order = j expTRSet.add_keyVal(dataTR.to_key(i), subgraph_extr.extract(x)) k += 6 expVSSet = dataVS.fresh_dpcopy() for i, x in enumerate(dataVS): k = 0 while (k < 50): for j in range(1, 6): subgraph_extr.max_order = j expVSSet.add_keyVal(dataVS.to_key(i), subgraph_extr.extract(x)) k += 6 # Evaluate the individuals with an invalid fitness print("Initializing population...") subgraphs = subgraph_extr.randomExtractDataset(dataTR, 1260) invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.map( functools.partial(toolbox.evaluate, granulationBucket=subgraphs, trEmbeddBucket=expTRSet, vsEmbeddBucket=expVSSet, TRindices=dataTR.indices, VSindices=dataVS.indices, TRlabels=dataTR.labels, VSlabels=dataVS.labels), invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit if halloffame is not None: halloffame.update(population) record = stats.compile(population) if stats else {} logbook.record(gen=0, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) # Begin the generational process for gen in range(1, ngen + 1): # Vary the population offspring = varOr(population, toolbox, lambda_, cxpb, mutpb) #Evaluate invalid of modified individual invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = toolbox.map( functools.partial(toolbox.evaluate, granulationBucket=subgraphs, trEmbeddBucket=expTRSet, vsEmbeddBucket=expVSSet, TRindices=dataTR.indices, VSindices=dataVS.indices, TRlabels=dataTR.labels, VSlabels=dataVS.labels), invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(offspring) # Select the next generation population population[:] = toolbox.select(population + offspring, mu) # Update the statistics with the new population record = stats.compile(population) if stats is not None else {} logbook.record(gen=gen, nevals=len(invalid_ind), **record) if verbose: print(logbook.stream) return population, logbook