def main(): # initialize the pruner pruner = Pruner(args) # pruner.prune(args.checkpoint) pruner.evaluate() # Run regularization pruner.prune(args.checkpoint, fake_mask=True, perm=args.perm, num_iters=args.num_sort_iters) pruner.evaluate() pruner.regularize() pruner.apply_mask() pruner.evaluate() logging.debug("Fine-tuning model for {} epochs".format(args.epochs)) best_acc = pruner.fine_tune(args.epochs) logging.debug("Fine-tuned model") pruner.evaluate() write_summary(args, best_acc=best_acc)
def prune_graph(self, graph_file_name, seed_file_name, oov_list_file, output_graph_type, neighbour_prunning_method, neighbour_prunning_input): #oov_list oov_list = [] with open(oov_list_file) as inp: for line in inp: if line.strip() in self.phrase_to_id: oov_list.append(self.phrase_to_id[line.strip()]) # Pruning graph = {} with open(graph_file_name) as inp: for line in inp: parts = line.strip().split() node1 = parts[0] node2 = parts[1] weight = parts[2] if node1 in graph: graph[node1].append((node2, weight)) else: graph[node1] = [(node2, weight)] pruner = Pruner() new_graph = pruner.prune(graph, self.labeled_nodes, oov_list, output_graph_type, neighbour_prunning_method, neighbour_prunning_input) # reading graph file with open(graph_file_name, 'r') as inp: #TODO add pruning details to the file with open(graph_file_name + ".pruned", 'w') as inp2: for line in inp: parts = line.strip().split() if parts[0] in new_graph and parts[1] in new_graph: inp2.write(line) # reading seeds file with open(seed_file_name, 'r') as inp: #TODO add pruning details to the file with open(seed_file_name + ".pruned", 'w') as inp2: for line in inp: parts = line.strip().split() if parts[0] in new_graph: inp2.write(line)
def prune_graph(self, graph_file_name, seed_file_name, oov_list_file, output_graph_type, neighbour_prunning_method, neighbour_prunning_input): #oov_list oov_list = [] with open(oov_list_file) as inp: for line in inp: if line.strip() in self.phrase_to_id: oov_list.append(self.phrase_to_id[line.strip()]) # Pruning graph = {} with open(graph_file_name) as inp: for line in inp: parts = line.strip().split() node1= parts[0] node2= parts[1] weight= parts[2] if node1 in graph: graph[node1].append((node2,weight)) else: graph[node1] = [(node2,weight)] pruner = Pruner() new_graph = pruner.prune(graph, self.labeled_nodes, oov_list, output_graph_type, neighbour_prunning_method, neighbour_prunning_input) # reading graph file with open(graph_file_name,'r') as inp: #TODO add pruning details to the file with open(graph_file_name+".pruned",'w') as inp2: for line in inp: parts = line.strip().split() if parts[0] in new_graph and parts[1] in new_graph: inp2.write(line) # reading seeds file with open(seed_file_name,'r') as inp: #TODO add pruning details to the file with open(seed_file_name+".pruned",'w') as inp2: for line in inp: parts = line.strip().split() if parts[0] in new_graph: inp2.write(line)
class AntMinerSA: def __init__(self, no_of_ants, min_case_per_rule, max_uncovered_cases, no_rules_converg): self.no_of_ants = no_of_ants self.min_case_per_rule = min_case_per_rule self.max_uncovered_cases = max_uncovered_cases self.no_rules_converg = no_rules_converg self.discovered_rule_list = [] self._Dataset = None self._TermsManager = None self._Pruner = None self._no_of_uncovered_cases = None self._iterations = 0 def _global_stopping_condition(self, converg_list_index): if self._no_of_uncovered_cases < self.max_uncovered_cases: return True if self._iterations >= self.no_of_ants: return True # if converg_list_index >= self.no_rules_converg: # return True return False def _local_stopping_condition(self, ant_index, converg_test_index): if ant_index >= self.no_of_ants: return True elif converg_test_index >= self.no_rules_converg: return True return False def read_data(self, data_path=UserInputs.data_path, header_path=UserInputs.header_path, attr_survival_name=UserInputs.attr_survival_name, attr_event_name=UserInputs.attr_event_name, attr_id_name=UserInputs.attr_id_name, attr_to_ignore=UserInputs.attr_to_ignore, discretization=False): header = list(pd.read_csv(header_path, delimiter=',')) data = pd.read_csv(data_path, delimiter=',', header=None, names=header, index_col=False) data.reset_index() self._Dataset = Dataset(data, attr_survival_name, attr_event_name, attr_id_name, attr_to_ignore, discretization) return def fit(self): # Initialization self._TermsManager = TermsManager(self._Dataset, self.min_case_per_rule) self._Pruner = Pruner(self._Dataset, self._TermsManager) self._no_of_uncovered_cases = self._Dataset.get_no_of_uncovered_cases() converg_list_index = 0 while not self._global_stopping_condition(converg_list_index): # local variables ant_index = 0 converg_test_index = 1 # Initialize rules previous_rule = Rule(self._Dataset) best_rule = copy.deepcopy(previous_rule) best_rule.quality = 1 - UserInputs.alpha while not self._local_stopping_condition(ant_index, converg_test_index): current_rule = Rule(self._Dataset) current_rule.construct(self._TermsManager, self.min_case_per_rule) current_rule = self._Pruner.prune(current_rule) if current_rule.equals(previous_rule): converg_test_index += 1 else: converg_test_index = 1 if current_rule.quality > best_rule.quality: best_rule = copy.deepcopy(current_rule) self._TermsManager.pheromone_updating(current_rule.antecedent, current_rule.quality) previous_rule = copy.deepcopy(current_rule) ant_index += 1 if best_rule.quality == 1 - UserInputs.alpha: # did not generate any rules break else: if self._can_add_rule( best_rule): # check if rule already exists on the list self.discovered_rule_list.append(best_rule) self._Dataset.update_covered_cases( best_rule.sub_group_cases) self._no_of_uncovered_cases = self._Dataset.get_no_of_uncovered_cases( ) converg_list_index = 0 else: converg_list_index += 1 self._TermsManager.pheromone_init() self._iterations += 1 # END OF WHILE (AVAILABLE_CASES > MAX_UNCOVERED_CASES) # generates the rules representative strings for index, rule in enumerate(self.discovered_rule_list): rule.set_string_repr(index) return def save_results(self, log_file): f = open(log_file, "a+") f.write('\n\n====== ANT-MINER PARAMETERS ======') f.write('\nNumber of ants: {}'.format(self.no_of_ants)) f.write('\nNumber of minimum cases per rule: {}'.format( self.min_case_per_rule)) f.write('\nNumber of maximum uncovered cases: {}'.format( self.max_uncovered_cases)) f.write('\nNumber of rules for convergence: {}'.format( self.no_rules_converg)) f.write('\n\n====== USER INPUTS PARAMETERS ======') f.write('\nHeuristic method: {}'.format(UserInputs.heuristic_method)) f.write('\nAlpha value for KM function confidence interval: {}'.format( UserInputs.kmf_alpha)) f.write('\nAlpha value for LogRank confidence: {}'.format( UserInputs.alpha)) f.write('\n\n====== RUN INFO ======') f.write('\nDatabase path: {}'.format(UserInputs.data_path)) f.write('\nInstances: {}'.format(self._Dataset.data.shape[0])) f.write('\nAttributes: {}'.format(self._Dataset.data.shape[1])) f.write('\nNumber of remaining uncovered cases: {}'.format( self._no_of_uncovered_cases)) f.write('\nNumber of iterations: {}'.format(self._iterations)) f.write('\nNumber of discovered rules: {}'.format( len(self.discovered_rule_list))) f.write('\n\n====== DISCRETIZATION INFO ======') f.write('\nDiscretization method: {}'.format( UserInputs.discretization_method)) f.write('\nDiscretized attributes: ' + repr(UserInputs.attr_2disc_names)) f.write('\n\n====== DISCOVERED RULES ======') f.write('\n> Average survival on dataset: {}'.format( self._Dataset.average_survival) + '\n') f.close() # print all rules representatives and plots for index, rule in enumerate(self.discovered_rule_list): rule.print_rule(log_file) rule.plot_km_estimates(index) # print rules info f = open(log_file, "a+") f.write('\n\n====== DISCOVERED RULES INFO ======\n') f.close() for rule in self.discovered_rule_list: rule.print_rule(log_file) with open(log_file, "a+") as f: f.write('\n> Number of covered cases: {}'.format( rule.no_covered_cases)) f.write('\n> Covered cases: ' + repr(rule.sub_group_cases)) f.write('\n> Quality: ' + repr(rule.quality)) f.write('\n> p-value of LogRank test: ' + repr(rule.logrank_test.p_value)) f.write('\n') return def print_discovered_rules(self): for rule in self.discovered_rule_list: print(rule.string_repr[0] + ': ' + rule.string_repr[1]) return def get_data(self): return self._Dataset.get_data() def get_train_data(self): return self._Dataset.data def _can_add_rule(self, new_rule): # check if generated rule already exists on the list for rule in self.discovered_rule_list: if new_rule.equals(rule): return False return True
class ESMAM: def __init__(self, no_of_ants, min_case_per_rule, max_uncovered_cases, no_rules_converg, alpha): self.no_of_ants = no_of_ants self.min_case_per_rule = min_case_per_rule self.max_uncovered_cases = max_uncovered_cases self.no_rules_converg = no_rules_converg self.alpha = alpha self.discovered_rule_list = [] self._Dataset = None self._TermsManager = None self._Pruner = None self._data_path = None self._population_survModel = None self._no_of_uncovered_cases = None self._iterations = 0 self._run_time = None def _get_population_Survival(self): kmf = KaplanMeierFitter() kmf.fit(self._Dataset.survival_times[1], self._Dataset.events[1], label='KM estimates for population', alpha=self.alpha) self._population_survModel = kmf return def _save_SurvivalFunctions(self, prefix): index = self._population_survModel.survival_function_.index.copy() columns = ['times', 'population'] + [ rule.string_repr[0] for rule in self.discovered_rule_list ] df = pd.DataFrame(columns=columns) df.times = index.values df.population = self._population_survModel.survival_function_.values for rule in self.discovered_rule_list: survival_fnc = rule.KMmodel['subgroup'].survival_function_.reindex( index) survival_fnc.fillna(method='ffill', inplace=True) df[rule.string_repr[0]] = survival_fnc.values log_file = '{}_KM-Estimates.txt'.format(prefix) df.to_csv(log_file, index=False, header=True) return def _global_stopping_condition(self): if self._no_of_uncovered_cases <= self.max_uncovered_cases: return True if self._iterations >= self.no_of_ants: return True return False def _local_stopping_condition(self, ant_index, converg_test_index): if ant_index >= self.no_of_ants: return True elif converg_test_index >= self.no_rules_converg: return True return False def _can_add_rule(self, new_rule): # check if generated rule already exists on the list for rule in self.discovered_rule_list: if new_rule.equals(rule): return False return True def read_data(self, data_path, dtype_path, attr_survival_name, attr_event_name): if dtype_path: with open(dtype_path, 'r') as f: dtypes = json.load(f) data = pd.read_csv(data_path, delimiter=',', header=0, index_col=False, dtype=dtypes) data.reset_index(drop=True, inplace=True) else: data = pd.read_csv(data_path, delimiter=',', header=0, index_col=False) data.reset_index(drop=True, inplace=True) self._data_path = data_path self._Dataset = Dataset(data, attr_survival_name, attr_event_name) return def fit(self): begin = datetime.now() # Initialization self._TermsManager = TermsManager(self._Dataset, self.min_case_per_rule) self._Pruner = Pruner(self._Dataset, self._TermsManager, self.alpha) self._no_of_uncovered_cases = self._Dataset.get_no_of_uncovered_cases() self._get_population_Survival() while not self._global_stopping_condition(): # local variables ant_index = 0 converg_test_index = 1 # Initialize rules previous_rule = Rule(self._Dataset, self.alpha) best_rule = copy.deepcopy(previous_rule) # Local search while not self._local_stopping_condition(ant_index, converg_test_index): current_rule = Rule(self._Dataset, self.alpha) current_rule.construct(self._TermsManager, self.min_case_per_rule) current_rule = self._Pruner.prune(current_rule) if current_rule.equals(previous_rule): converg_test_index += 1 else: converg_test_index = 1 if current_rule.fitness > best_rule.fitness: best_rule = copy.deepcopy(current_rule) self._TermsManager.pheromone_updating(current_rule.antecedent, current_rule.fitness) previous_rule = copy.deepcopy(current_rule) ant_index += 1 # case: local search didnt find any exceptional rules if best_rule.fitness < 1 - self.alpha: break # saving local search results elif self._can_add_rule( best_rule): # check if rule already exists on the list self.discovered_rule_list.append(best_rule) self._Dataset.update_covered_cases(best_rule.sub_group_cases) self._no_of_uncovered_cases = self._Dataset.get_no_of_uncovered_cases( ) self._TermsManager.pheromone_init() self._iterations += 1 self._run_time = datetime.now() - begin # generates the rules representative strings for index, rule in enumerate(self.discovered_rule_list): rule.set_string_repr(index) rule.set_KMmodel() return def save_results(self, prefix): log_file = '{}_log.txt'.format(prefix) # LOG FILE FOR GENERAL INFO: f = open(log_file, "a+") f.write('\n\n====== ESMAM PARAMETERS ======') f.write('\nNumber of ants: {}'.format(self.no_of_ants)) f.write('\nNumber of minimum cases per rule: {}'.format( self.min_case_per_rule)) f.write('\nNumber of maximum uncovered cases: {}'.format( self.max_uncovered_cases)) f.write('\nNumber of rules for convergence: {}'.format( self.no_rules_converg)) f.write('\nAlpha value for LogRank confidence: {}'.format(self.alpha)) f.write('\n\n====== RUN INFO ======') f.write('\nDatabase path: {}'.format(self._data_path)) f.write('\nInstances: {}'.format(self._Dataset.data.shape[0])) f.write('\nAttributes: {}'.format(self._Dataset.data.shape[1])) f.write('\n# discovered rules: {}'.format( len(self.discovered_rule_list))) f.write('\nremaining uncovered cases (%): {}'.format( (self._no_of_uncovered_cases / self._Dataset.data.shape[0]))) f.write('\n>run-time: {}'.format(self._run_time)) f.close() # RULE-SET FILE (RULE MODEL INFO): rules_file = '{}_RuleSet.txt'.format(prefix) f = open(rules_file, "a+") f.write('> Average survival on dataset: {}'.format( self._Dataset.average_survival)) f.write('\nDISCOVERED RULES:') f.close() for index, rule in enumerate( self.discovered_rule_list ): # print all rules representatives and plots rule.print_rule(rules_file) # LOG FILE FOR KM ESTIMATES self._save_SurvivalFunctions(prefix) return
print("Inference Time: " + str(e - s)) if args.prune == 1: print("=" * 60) print("PRUNING") print("=" * 60) print("") name = args.data + '_' + args.load[:-4] set_sparsity(network, args.sensitivity, name) rule = get_rules("rules/" + name + ".rule") fname = args.load[:-4] + '_pruned' original_param, o_total = get_num_weights(network, verbose=False) pruner = Pruner(rule=rule) pruner.prune(model=network, stage=0, update_masks=True, verbose=False) if args.init_param == 1: network.apply(weights_init_uniform_rule) print("\nRe-initialised weights...") # prune for i in range(args.prune_iter): print("") print("-" * 60) print("PRUNE ITERATION", i) print("-" * 60) print("") run_training(args, network,