def filterModule(dict_context, controller): i = 1 print("Context Features:") text = "Context Features:" controller.getAMController().addToConsoleAll(text + "\n") for key, value in dict_context.items(): text = "CF" + str(i) + " - " + str(value) print(text) i = i + 1 controller.getAMController().addToConsoleAll(text + "\n") controller.getAMController().addToConsoleInput(text + "\n") start_time = time.time() # Takes the dictionary and converts it to the correct format for Crossing (e.g. ["b5:a", "b5:b"]) extracted_cross_filters, frequency_count = FILS.extractCrossFilters( dict_context, controller) # NOTE: CROSS is the collection of SSFs CROSS, frequency_count = FILS.processLVLs( extracted_cross_filters) # Returns the filter list for each level # Record run time module_time = (time.time() - start_time) # Update singleton frequency AMVS.getSingleton().updateFrequency_FilterModule(frequency_count, module_time) return CROSS
def crossProcessModule(df_dataset, np_CROSS, depth, controller): start_time = time.time() dict_significant_results, frequency_count, highest_process_frequency = CMPS.crossProcessOptimized( df_dataset, np_CROSS, depth, controller) # Record run time module_time = (time.time() - start_time) # Update singleton frequency AMVS.getSingleton().updateFrequency_CrossProcessModule( frequency_count, highest_process_frequency, module_time) return dict_significant_results
def loaderModule(): start_time = time.time() df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset, frequency_count = LS.loadInput( ) # Can add parameters # Record run time module_time = (time.time() - start_time) # Update singleton frequency AMVS.getSingleton().updateFrequency_LoaderModule(frequency_count, module_time) return df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset
def runAutomatedMining(controller): text = "RUNNING Automated Mining\n" # Show start message in console controller.getAMController().addToConsoleAll(text + "\n") text = "MAX CROSS: " + str( UICS.MAX_CROSS) # Show MAX CROSS in console and input controller.getAMController().addToConsoleAll(text + "\n") controller.getAMController().addToConsoleInput(text + "\n") text = "MAX LEVEL: " + str( UICS.MAX_LEVEL) + "\n" # Show MAX LEVEL in console and input controller.getAMController().addToConsoleAll(text + "\n") controller.getAMController().addToConsoleInput(text + "\n") df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset = loaderModule() # Run STATIC depth mining (Loops based on MAX DEPTH) # dict_significant_results = runStaticDepthMining(df_raw_dataset, df_dataset, ftr_names, controller) # Depth mining that continues on until the p-value stops updating dict_significant_results = runMobileDepthMining(df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset, controller) controller.isAMFinished( ) # Enables the Check button (Call on completion of the last iteration) print("Automated Mining Finished...") str_depths = str(AMVS.getSingleton().getDepths()) controller.getAMController().addToConsoleAll("\nTotal Depth: " + str_depths) print("Total Depth " + str_depths) str_run_time = str(AMVS.getSingleton().getTime()) controller.getAMController().addToConsoleAll("\nAM Run time:\n" + str_run_time + " seconds\n") print("Mining Run Time: " + str_run_time + " seconds") AMVS.getSingleton().resetSingleton() return dict_significant_results
def isConstantSSFs(list_currSSFs): singleton = AMVS.getSingleton() llist_prevSSFs = singleton.getLlSSFs( ) # Get the list of all parsed SSFs (from all depths) via the Singleton class state = False for SSFs in llist_prevSSFs: # Check if all items in the current SSFs list are contained # in any previously parsed SSFs list state = isListsMatch(SSFs, list_currSSFs) if state: # If there's a match, stop looping and return 'state' break return state
def crossProcessOptimized(df_dataset, np_CROSS, depth, controller): key = UICS.KEY_PRE_CROSS_MODULE # Key for progress bar controller.updateModuleProgress(key, UICS.MODULE_INDICATOR + "Starting CROSS PROCESS MODULE") # 1 # time.sleep(0.01) # Sleep # Generate datasets as dictated by filters # NOTE: # np_dataset_pairs[type] - A list of cross types # np_dataset_pairs[type][level] - A list of levels within the list of cross types # np_dataset_pairs[type][level][0] - A list of dataset pairs (list) within the list of levels # np_dataset_pairs[0][0][0][0] - The contents of the list containing the dataset pairs controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Extracting Datasets by Filter") # 2 # time.sleep(0.01) # Sleep np_cross_datasets, np_cross_filters = extractDatasets(df_dataset, np_CROSS) # TODO (Future) Try to optimize controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Successfully Extracted Datasets") # 3 # time.sleep(0.01) # Sleep len_cross_datasets = int(UICS.MAX_CROSS) # len(np_cross_datasets) len_cross_types = int(UICS.MAX_LEVEL) # UICS.MAX_CROSS # len(cross_type) # len_cross_level = UICS.MAX_LEVEL # len(cross_level) list_cross_ssfs = [] dict_result_table_sig = collections.OrderedDict() print("Processing - Please Wait...") controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Starting Cross Process : This might take some time...") # 4 # time.sleep(0.01) # Sleep # Prepare to update progress bar with the second half of the CROSS PROCESS MODULE key = UICS.KEY_CROSS_MODULE # Key for progress bar # Compute the total process of this section according to the computed cross type and level count # Compute for one pass at Level (See line commented with "LVL Pass 1") UICS.CROSS_MAX_PROCESS_COUNT = computeMaxCrossLevelCount(np_cross_datasets, len_cross_datasets, len_cross_types) # Multiply by 2 since you will record each pass (1) then update for exporting the table (1) data_filter_process_count = computeMaxProcessCount(np_cross_datasets, len_cross_datasets, len_cross_types) data_filter_process_count = data_filter_process_count # * 2 UICS.CROSS_MAX_PROCESS_COUNT = UICS.CROSS_MAX_PROCESS_COUNT + data_filter_process_count list_level_ssfs = None start_time = time.time() # Apply Chi-square on all dataset pairs in the list np_dataset_pairs for i_cross_type in range(len_cross_datasets): # TODO (Future) Find the best way to partition this cross_type = np_cross_datasets[i_cross_type] # Iterate through each CROSS TYPE for i_cross_level in range(len_cross_types): # The variable cross_level is the list of dataframes cross_level = cross_type[i_cross_level] # Iterate through each LEVEL len_cross_level = len(cross_level) list_level_ssfs = [] list_all_ssfs = [] list_ssfs = [] str_current_cross = "[" + str(i_cross_type) + "][" + str(i_cross_level + 1) + "]" # Title for the current cross process str_title = UICS.SUB_MODULE_INDICATOR + "Processing CROSS" + str_current_cross # LVL Pass 1 # Update the progress bar about the current CROSS[type][level] controller.updateModuleProgress(key, str_title) # Pass 1 # time.sleep(0.01) # Sleep i_process_count = 0 # Process count for current CROSS[type][level] # np_level_ssfs = np.array(list_level_ssfs) for i_dataset_pairs in range(len_cross_level): dataset_pairs = cross_level[i_dataset_pairs] len_dataset_pairs = len(dataset_pairs) str_cross_level_length = str(len_cross_level) # Description for the current cross process str_description = " " + str_current_cross + " - " + str(i_dataset_pairs + 1) + " of " + str_cross_level_length controller.updateModuleProgress(key, str_description) # INNER PASS 1 for i_dataset_pair in range(len_dataset_pairs): dataset_pair = dataset_pairs[i_dataset_pair] dict_chi_square = CHIS.chiSquare(dataset_pair) # if dict_chi_square is None: # print("dict_chi_square is NONE") # controller.updateModuleProgress(key, "Applying Chi-square") # time.sleep(0.01) df_processed_output, list_ssf, list_sig_output = CHIS.processChiSquareTable(dict_chi_square) # if df_processed_output is None: # print("df_processed_output is NONE") if df_processed_output is not None: dataset_pair_filter = np_cross_filters[i_cross_type][i_cross_level][i_dataset_pairs] if len(list_ssfs) == 0: list_ssfs = list_ssf else: list_ssfs = mergeAndFilter(list_ssfs, list_ssf) np_dataset_pair_filter = np.array(dataset_pair_filter) # list_chi_square_output.append([df_output, np_dataset_pair_filter]) list_index = [i_cross_type, i_cross_level] # controller.updateModuleProgress(key, "Exporting Chi-square Table") # time.sleep(0.01) df_output, str_pair_name = LS.exportChiSquareTable(df_processed_output, np_dataset_pair_filter, list_index) dict_result_table_sig = addToDictionaryResult(dict_result_table_sig, str_pair_name, list_sig_output) # else: # controller.updateModuleProgress(key, str_description) # Pass 2 # Add 1 to make up for the missed processes # print("DF OUTPUT IS NULL: Skipping Item") list_all_ssfs = mergeAndFilter(list_all_ssfs, list_ssfs) ssfs_filename = "SSFs - CROSS[" + str(i_cross_type) + "][" + str(i_cross_level + 1) + "].csv" LS.exportSSFs(list_ssfs, ssfs_filename, depth) # list_level_ssfs.append(list_all_ssfs) # Store SSF list # TODO: Commented out, check if still needed # list_cross_ssfs.append(list_level_ssfs) # TODO: Commented out, check if still needed run_time = (time.time() - start_time) AMVS.getSingleton().updateTime(run_time) # Update Singleton's run time print("--- %s seconds ---" % run_time) str_runtime = "\nCross Process Time:\n" + str(run_time) + " seconds" controller.getAMController().addToConsoleAll(str_runtime + "\n") print("Processing Complete") LS.exportOutputModuleResults(dict_result_table_sig, len_cross_datasets, len_cross_types, controller) return dict_result_table_sig
def crossProcessOptimized(df_dataset, np_CROSS, depth, controller): key = UICS.KEY_PRE_CROSS_MODULE # Key for progress bar controller.updateModuleProgress(key, UICS.MODULE_INDICATOR + "Starting CROSS PROCESS MODULE") # 1 # time.sleep(0.01) # Sleep # Generate datasets as dictated by filters # NOTE: # np_dataset_pairs[type] - A list of cross types # np_dataset_pairs[type][level] - A list of levels within the list of cross types # np_dataset_pairs[type][level][0] - A list of dataset pairs (list) within the list of levels # np_dataset_pairs[0][0][0][0] - The contents of the list containing the dataset pairs controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Extracting Datasets by Filter") # 2 # time.sleep(0.01) # Sleep np_cross_datasets, np_cross_filters = extractDatasets( df_dataset, np_CROSS) # TODO (Future) Try to optimize controller.updateModuleProgress(key, UICS.SUB_MODULE_INDICATOR + "Successfully Extracted Datasets") # 3 # time.sleep(0.01) # Sleep len_cross_datasets = int(UICS.MAX_CROSS) # len(np_cross_datasets) len_cross_types = int(UICS.MAX_LEVEL) # UICS.MAX_CROSS # len(cross_type) # len_cross_level = UICS.MAX_LEVEL # len(cross_level) list_cross_ssfs = [] dict_result_table_sig = collections.OrderedDict() print( "Processing - Please Wait... (Average Runtime for ALL Features - 8 minutes" ) controller.updateModuleProgress( key, UICS.SUB_MODULE_INDICATOR + "Starting Cross Process : This might take some time...") # 4 # time.sleep(0.01) # Sleep # Prepare to update progress bar with the second half of the CROSS PROCESS MODULE key = UICS.KEY_CROSS_MODULE # Key for progress bar # Compute the total process of this section according to the computed cross type and level count # Compute for one pass at Level (See line commented with "LVL Pass 1") UICS.CROSS_MAX_PROCESS_COUNT = computeMaxCrossLevelCount( np_cross_datasets, len_cross_datasets, len_cross_types) # Multiply by 2 since you will record each pass (1) then update for exporting the table (1) data_filter_process_count = computeMaxProcessCount(np_cross_datasets, len_cross_datasets, len_cross_types) data_filter_process_count = data_filter_process_count # * 2 UICS.CROSS_MAX_PROCESS_COUNT = UICS.CROSS_MAX_PROCESS_COUNT + data_filter_process_count start_time = time.time() pool_size = len_cross_datasets * len_cross_types process_params = [] # Iterable that will contain tuples of parameters print("Pool Size: " + str(pool_size)) pool = Pool(processes=pool_size) manager = multiprocessing.Manager() # Instantiate a Manager queue_flag = manager.Queue() queue_return = manager.Queue() queue_frequency = manager.Queue() # queue_time = manager.Queue() queue_console = manager.Queue() # Apply Chi-square on all dataset pairs in the list np_dataset_pairs for i_cross_type in range( len_cross_datasets ): # TODO (Future) Find the best way to partition this for i_cross_level in range(len_cross_types): queue_flag.put("Done") # Initialize the Flag Queue, queue_flag params = ( i_cross_type, i_cross_level ) # Instantiate a process tuple (iterable) parameter for every (i) cross type and level process_params.append(params) process_func = partial( CPMPP.process, queue_flag, queue_return, queue_frequency, depth, np_cross_filters, np_cross_datasets, queue_console ) # Declare the target function and the parameters, minus the iterable pool.map(process_func, process_params ) # Launch the partial function and iterable asynchronously pool.close() pool.join() # print((queue_console.qsize())) run_time = (time.time() - start_time) AMVS.getSingleton().updateTime(run_time) # Update Singleton's run time print("--- %s seconds ---" % run_time) str_runtime = "\nCross Process Time:\n" + str(run_time) + " seconds" controller.getAMController().addToConsoleAll(str_runtime + "\n") frequency_count = 0 highest_frequency = 0 # longest_run_time = 0 while not queue_return.empty(): dict_result_table_sig = queue_return.get() frequency_item = queue_frequency.get() frequency_count = frequency_count + frequency_item if frequency_item > highest_frequency: highest_frequency = frequency_item # time_item = queue_time.get() # if time_item > longest_run_time: # longest_run_time = time_item LS.exportOutputModuleResults(dict_result_table_sig, len_cross_datasets, len_cross_types, depth, controller) # module_time = longest_run_time controller.updateModuleProgress(100, UICS.SUB_MODULE_INDICATOR + "Finished Depth " + str(depth) + "") # 1 print("Processing Complete") return dict_result_table_sig, frequency_count, highest_frequency
def runMobileDepthMining(df_raw_dataset, df_dataset, ftr_names, pd_raw_dataset, controller): singleton = AMVS.getSingleton() # A Singleton class is used dict_significant_results = None isUpdating = True hasPrevSSFs = True i_depth = 0 while isUpdating: # Keep looping until the stop criteria are met curr_depth = i_depth + 1 singleton.resetCtrAccepted() print("Starting DEPTH: " + str(curr_depth)) # Select SSFs, if first iteration, use RFE, else load the generated SSFs of the previous depth if i_depth == 0: print("Loading SEED SSFs...") # dict_ranked_features = rfeModule(df_raw_dataset, ftr_names, controller) dict_ranked_features = UICS.SEED_SSFS AMVS.getSingleton().updateDictSSFs(dict_ranked_features, curr_depth) print("-- Successfully Loaded SEED SSFs --") print("Extracting RFE Features") # rfe_features = rfeModule(df_raw_dataset, ftr_names, pd_raw_dataset, controller) # print("-- Successfully Determined RFE Features --") # print(rfe_features) print("") else: print("Extracting SSFs from Previous Depth [" + str(i_depth) + "]...") # Load the previous SSFs and consolidate. The current depth # indicates the PREVIOUS SSF folder. df_SSFs = DS.loadPreviousSSFs(i_depth) print("df_SSFs") print(df_SSFs) if df_SSFs is None: # If there were no previously loaded SSFs, stop updating TODO: check if this can be determined earlier hasPrevSSFs = False isUpdating = False dict_ranked_features = None print("-- Failed to Locate Previous SSFs --") else: # Partition the extracted SSFs to 3 Ranks dict_new_ranked_features = DS.rankSSFs(df_SSFs) # Merge the new SSFs with the old SSFs AMVS.getSingleton().updateDictSSFs(dict_new_ranked_features, curr_depth) print("RANK") dict_ranked_features = AMVS.getSingleton().getDictSSFs() print(dict_ranked_features) print("-- Successfully Extracted Previous SSFs --") if hasPrevSSFs: print("Starting Filtering...") np_cross = filterModule(dict_ranked_features, controller) print("-- Filtering Finished --") print("") print("Starting Cross Process...") dict_significant_results = crossProcessModule( df_dataset, np_cross, curr_depth, controller) print("-- Cross Process Finished --") list_SSFs = getSSFsList(dict_ranked_features) print(list_SSFs) # if isConstantSSFs(list_SSFs): # Stop mining if the current list of SSFs have been parsed before if singleton.isConstantSSFs( list_SSFs ): # Stop mining if the current list of SSFs have been parsed before isUpdating = False elif singleton.getCtrAccepted( ) == 0: # Mark mining as finished when there are no more accepted values isUpdating = False print(singleton.getCtrAccepted()) i_depth = i_depth + 1 singleton.updateFrequencyCountsText(curr_depth) singleton.setDepths(i_depth - 1) # Log total number of depths singleton.printAllTextData() return dict_significant_results
def crossFilters(filters, level): singleton = AMVS.getSingleton() # Get possible combinations of options (in filters parameter) combination = list(itertools.combinations(filters, level)) set_combination = set(combination) list_combination = [] for item in set_combination: list_item = np.asarray(item) list_combination.append(list_item) # list_combination = [val for sublist in list_combination for val in sublist] list_combination = np.array(list_combination) len_list_combination = len(list_combination) cross_filters = [] end_index = len_list_combination - 1 # TODO [PRINT: Amount of reduced values for paper] ctr_Raw = 0 ctr_Valid = 0 ctr_Purged = 0 ctr_Filtered = 0 for i in range(end_index): item_1 = list_combination[i] for j in range(end_index): counter = i + (j + 1) if counter <= end_index: item_2 = list_combination[counter] cross = [ item_1, item_2 ] # Sample content: [array(['b1:a', 'p11:b'], dtype='<U5'), array(['b1:a', 'p11:a'], dtype='<U5')] if validComparison( cross ): # Only proceed if cross is VALID; FMI check notes above function if updateChecklist(cross, level): if not purgedCross(cross): # Remove repeating pairs # cross_filters.append(cross) # print(cross) ctr_Purged = ctr_Purged + 1 if not singleton.isFeaturePairParsed( cross ): # Don't include previously parsed pairs (from previous depths) cross_filters.append( cross) # Append a filter to cross_filters singleton.updateFeaturePairs(cross) singleton.addCtrAccepted() # print("Added:") # print(cross) # print("Singleton contents:") # print(singleton.getFeaturePairs()) ctr_Filtered = ctr_Filtered + 1 ctr_Valid = ctr_Valid + 1 ctr_Raw = ctr_Raw + 1 # Remove the extra details from the array, i.e. "dtype" list_cross_filters = [] for item in cross_filters: item = [list(i) for i in item] list_cross_filters.append(item) np_list_cross_filters = np.array(list_cross_filters) # print(np_list_cross_filters) print("") print("RAW " + str(ctr_Raw)) print("VALID " + str(ctr_Valid)) print("PURGED " + str(ctr_Purged)) print("ACCEPTED " + str(ctr_Filtered)) print("") frequency_count = end_index * end_index return np_list_cross_filters, frequency_count