def create_csurf_map(self, map_file): file_reader = open(map_file, 'r') lines = file_reader.readlines() csurf_map = defaultdict() for line in lines: path, sep, build_name = line.partition('\t') csurf_map[path] = build_name.split('\n')[0] self.csurf_map = csurf_map self.slicer = Slicer()
def __init__( self, values, base_values = None, data = None, display_data = None, instance_names = None, feature_names = None, output_names = None, output_indexes = None, lower_bounds = None, upper_bounds = None, main_effects = None, hierarchical_values = None, clustering = None ): self.op_history = [] # cloning. TODO: better cloning :) if issubclass(type(values), Explanation): e = values values = e.values base_values = e.base_values data = e.data output_dims = compute_output_dims(values, base_values, data) if len(_compute_shape(feature_names)) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) if len(values_shape) >= 1 and len(feature_names) == values_shape[0]: feature_names = Alias(list(feature_names), 0) elif len(values_shape) >= 2 and len(feature_names) == values_shape[1]: feature_names = Alias(list(feature_names), 1) if len(_compute_shape(output_names)) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) if len(values_shape) >= 1 and len(output_names) == values_shape[0]: output_names = Alias(list(output_names), 0) elif len(values_shape) >= 2 and len(output_names) == values_shape[1]: output_names = Alias(list(output_names), 1) self._s = Slicer( values = values, base_values = None if base_values is None else Obj(base_values, [0] + list(output_dims)), data = data, display_data = display_data, instance_names = None if instance_names is None else Alias(instance_names, 0), feature_names = feature_names, output_names = output_names, # None if output_names is None else Alias(output_names, output_dims), output_indexes = None if output_indexes is None else (output_dims, output_indexes), lower_bounds = lower_bounds, upper_bounds = lower_bounds, main_effects = main_effects, hierarchical_values = hierarchical_values, clustering = None if clustering is None else Obj(clustering, [0]) )
def register(fixed_volume_path, moving_volume_path, output_folder, params_path): output_image_path = r"{}\new_vol.hdf5".format(output_folder) moving_image = read_volume(moving_volume_path) fixed_image = read_volume(fixed_volume_path) # moving_image = normalize_ct_volume(moving_image) # fixed_image = normalize_ct_volume(fixed_image) # show_histogram(fixed_image, moving_image) # run_slicer_functionality({'moving_image': moving_image, 'fixed_image': fixed_image}, int(moving_image.shape[1] / 2)) # exit() # rotated_moving_image = rotate_and_save(moving_image) # unrotated = moving_image.copy() moving_image = transform_moving_image(moving_image) moving_image, fixed_image = crop_volumes(moving_image, fixed_image) res_image_array = move_moving_image_to_fixed(moving_image, fixed_image, params_path, output_folder) copy_and_add_volume(fixed_volume_path, output_image_path, res_image_array) all_slicer_images = OrderedDict([ # ('moving', unrotated), ('moving_rotated', moving_image), ('moving_result', res_image_array), ('fixed', fixed_image) ]) Slicer(all_slicer_images, int(fixed_image.shape[1] / 2)).show()
def runSlicer(self): """ Launch an instance of Slicer with the current state of vars. Function that runs when the user presses the run button """ # DEBUG: self.printSlicerVars() num_iters = self.num_slices_entry.get() if not num_iters: num_iters = self.num_imgs else: num_iters = int(num_iters) slicer = Slicer( in_dir = self.curr_dir_lbl.get(), out_dir = self.out_dir_lbl.get(), img_ext = self.img_ext, mode = self.mode.get(), reverse = self.reverse.get(), curve_depth = self.curve_depth, num_slices = num_iters ) self.progress["value"] = 0 self.progress["maximum"] = num_iters slice_thread = threading.Thread(target=slicer.slice) slice_thread.daemon = True slice_thread.start() self.slicer_running = True prog_thread = threading.Thread(target=self.watchProgress, args=(slicer, num_iters)) prog_thread.daemon = True prog_thread.start()
def create_csurf_map(self,map_file): file_reader = open(map_file,'r') lines = file_reader.readlines() csurf_map = defaultdict() for line in lines: path,sep,build_name = line.partition('\t') csurf_map[path] = build_name.split('\n')[0] self.csurf_map = csurf_map self.slicer = Slicer()
def slice(self): print "Taints: %s" % scanf_taint.taints taints = [] for _, v in self._hooks.iteritems(): taints.extend(v.taints) target_tmps, target_regs, target_addrs = self._slice_from_last_condition() try: slicer = Slicer(self._project, self._path, \ target_tmps, target_regs, target_addrs, \ self._mem_reads, self._mem_writes, taints) slicer.slice() except SlicerError: raise TracerError("Slicer failed") sources = self.insts_to_source(sorted(slicer.instructions)) for line in sources: print line
def __init__(self, size=(3, 3)): """ placeholder """ # list of train and test directories self._annotation_suffix = '_Annotated_Cars.png' # 15cm resolution self._GSD = 0.15 self._size = (int(round( (size[0] / self._GSD) / 2)), int(round((size[1] / self._GSD) / 2))) # xml conversion tweak self._custom_item_func = lambda x: 'object' # create image slicer self._slicer = Slicer() return
def __init__(self, expected_value, values, data=None, output_shape=tuple(), interaction_order=0, instance_names=None, input_names=None, output_names=None, output_indexes=None, feature_types=None, lower_bounds=None, upper_bounds=None, main_effects=None, hierarchical_values=None, partition_tree=None): input_shape = _compute_shape(data) values_dims = list( range(len(input_shape) + interaction_order + len(output_shape))) output_dims = range( len(input_shape) + interaction_order, values_dims[-1]) #main_effects_inds = values_dims[0:len(input_shape)] + values_dims[len(input_shape) + interaction_order:] self.output_names = output_names # TODO: needs to tracked after slicing still kwargs_dict = {} if lower_bounds is not None: kwargs_dict["lower_bounds"] = (values_dims, Slicer(lower_bounds)) if upper_bounds is not None: kwargs_dict["upper_bounds"] = (values_dims, Slicer(upper_bounds)) if main_effects is not None: kwargs_dict["main_effects"] = (values_dims, Slicer(main_effects)) if output_indexes is not None: kwargs_dict["output_indexes"] = (output_dims, Slicer(output_indexes)) if output_names is not None: kwargs_dict["output_names"] = (output_dims, Slicer(output_names)) if hierarchical_values is not None: kwargs_dict["hierarchical_values"] = (hierarchical_values, Slicer(hierarchical_values)) if partition_tree is not None: kwargs_dict["partition_tree"] = (partition_tree, Slicer(partition_tree)) super().__init__(data, values, input_shape, output_shape, expected_value, interaction_order, instance_names, input_names, feature_types, **kwargs_dict)
from slicer import Slicer import time import datetime dir_name = "C:\\Users\\absch\\Desktop\\slicer-test-large\\" img_ext = ".jpg" test_timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') f = open("timelog.txt", "a") begin = time.clock() slicer = Slicer(dir_name, img_ext, "simple", False) slicer.slice() f.write(test_timestamp + "\t\tSimple\t\t" + str(round(time.clock() - begin, 4)) + "\n") begin = time.clock() convex_slicer = Slicer(dir_name, img_ext, "convex", False, 10) convex_slicer.slice() f.write(test_timestamp + "\t\tConvex\t\t" + str(round(time.clock() - begin, 4)) + "\n") begin = time.clock() concave_slicer = Slicer(dir_name, img_ext, "concave", False, 10) concave_slicer.slice() f.write(test_timestamp + "\t\tConcave\t\t" + str(round(time.clock() - begin, 4)) + "\n")
slicer.extract_rolling_median(seriesname='raw', window_size=ws) rm = slicer.series['raw_rolling_median_' + str(ws)][start:end] rm_x = [ int(j.microseconds / 1000) for j in [i - rm.index[0] for i in rm.index] ] rm_y = [i for i in rm] #rm.plot(xticks=rm.index) plt.plot(rm_x, rm_y) plt.legend(['512Hz EEG']+[ 'Window size: %d' % ws \ for ws in window_sizes] ,loc='best') plt.ylabel(r"Potential ($\mu$V)") plt.xlabel(r"Time after stimulus (ms)") plt.grid() #plt.title('10 Hz rolling median, compared to 512Hz signal') ax.set_ylim(ax.get_ylim()[::-1]) pdfpages.savefig() #plt.show() #debug if __name__ == "__main__": slicer = Slicer() print 'loading raw from list of csvfiles' slicer.load_series_from_csv('raw', sys.argv[1:]) pp = PdfPages('rolling_median.pdf') do_charts(slicer, pp) pp.close()
#============================================================================== # Process Raw Data #============================================================================== if args.intype[0] == 'raw': if args.interpolate: process_series_files.process_all_in_dir(args.indir[0], join(out_dir, 'data')) data_dir = join(out_dir, 'data') """ else: #just copy the files print "Copying data files to ", data_dir for csvf in glob.iglob(join(args.indir[0],"*.csv")): shutil.copyfile(csvf, join(data_dir, os.path.basename(csvf))) """ print "Instantiating Slicer and loading series" slicer = Slicer(taskfile=join(data_dir, 'task.xls')) filelist=[join(data_dir,f) for f in os.listdir(data_dir) if \ re.compile(".*\.csv").match(f)] num_subjects = len(filelist) slicer.load_series_from_csv('raw', filelist) if args.stats: pp = PdfPages(join(report_dir, 'stats.pdf')) stats.plot_all(slicer, pp) fig, ax = plt.subplots() ax.plot(range(1, num_subjects + 1)) plt.title("Number of subjects") pp.savefig(fig) pp.close()
row = [] for _ in range(0, len(template[row_index])): row.append(" ") picture.append(row) return picture def add_piece(picture, slices): print_picture(picture) for i in slices: for j in i: picture[j[2]][j[1]] = j[0] print_picture(picture) def print_picture(picture): os.system('cls' if os.name == 'nt' else 'clear') for i in range(0, len(picture)): output = "" for j in range(0, len(picture[i])): output += picture[i][j] print(output) if __name__ == "__main__": lib = Library(input("Type your special character: ")) template = lib.assemble_line(input("Type your phrase: ")) slicer = Slicer(template) slices = slicer.get_pattern() picture = get_picture(template) add_piece(picture, slices)
from slicer import Slicer #dir_name = "C:\\Users\\absch\\Desktop\\Slicer-test\\" dir_name = "/mnt/c/Users/absch/Desktop/Slicer-test/" img_ext = ".jpg" slicer = Slicer(dir_name, img_ext, "simple", reverse=False, num_slices=20) slicer.slice() # convex_slicer = Slicer(dir_name, img_ext, "convex", False, 10) # convex_slicer.slice() # # concave_slicer = Slicer(dir_name, img_ext, "concave", False, 10) # concave_slicer.slice()
class SliceCompare: def create_csurf_map(self,map_file): file_reader = open(map_file,'r') lines = file_reader.readlines() csurf_map = defaultdict() for line in lines: path,sep,build_name = line.partition('\t') csurf_map[path] = build_name.split('\n')[0] self.csurf_map = csurf_map self.slicer = Slicer() def merge_data_control_slices(self,benchmark_folder): benchmarks = [] for root,dir,files in os.walk(benchmark_folder): #print(dir) for item in dir: benchmarks.append(root+item) break f_build_rate = open('assert_build_rate_ds.csv','w') f_slice_prop = open('assert_slice_property_ds.csv','w') f_build_rate.write('Benchmark,Slices,Size of smallest slice built,Size of largest slice built,Build Rate\n') f_slice_prop.write('Benchmark,Slices,Smallest slice size,Largest slice size,Average Slice size,Min procedure count, Max procedure count, Avg procedure count,Inter procedural slices, Inter file slices\n') f_result_csv = open('assert_result_ds.csv','w') f_result_csv.write('benchmark,slices,avg-data-slice-size,avg-full-slice-size,avg-slice-size,inter-procedural-slices,inter-file-slices,build_rate\n') build_rate = defaultdict(int) for benchmark in benchmarks: statements = 0 bench_list = ['tj-histo', 'json-c-json-c', 'jonas-tig', 'Cyan4973-zstd', 'Phildo-pixQL', 'kr-beanstalkd', 'joyent-http-parser', 'yrutschle-sslh', 'rui314-8cc', 'udp-json-parser', 'cisco-thor'] bench_list += [ 'libuv-libuv', 'patjak-bcwc_pcie', 'douban-beansdb', 'droe-sslsplit', 'orangeduck-mpc', 'machinezone-tcpkali', 'wg-wrk', 'karthick18-inception', 'vmg-houdini', 'antirez-disque'] if benchmark.split('/')[-1] in bench_list: self.inter_procedural_slices = 0 self.slice_size = 0 self.inter_file_slices = 0 self.min_slice_size = 0 self.max_slice_size = 0 self.min_slice_procedures = 0 self.max_slice_procedures = 0 self.avg_slice_procedures = 0 self.min_built_slice_size = 0 self.max_built_slice_size = 0 result_data_files = [] result_control_files = [] result_data_files, result_control_files = self.get_slice_files(benchmark) statements = len(result_data_files) #logger.warn('Number of data files = '+str(len(result_data_files))) matching_sets = 0 self.inter_procedural_slices = 0 self.inter_file_slices = 0 self.build_rate = 0 avg_slice_size = 0 avg_data_slice_size = 0 avg_control_slice_size = 0 if len(result_data_files) > 0 and len(result_control_files) > 0: for data_file in result_data_files: f_data_file = open(data_file,'r') data_slice_lines = f_data_file.readlines() f_data_file.close() data_line_set = set() files_in_slice = set() for line in data_slice_lines: if '.h' not in line: if line not in data_line_set and line.strip() != '': if line.split('\t')[0] not in files_in_slice: files_in_slice.add(line.split('\t')[0]) data_line_set.add(line) #self.get_wrapper_function(line) control_file = data_file.replace('result_assert','result_assert_control') f_control_file = open(control_file,'r') control_slice_lines = f_control_file.readlines() f_control_file.close() control_line_set = set() for line in control_slice_lines: if '.h' not in line: if line not in control_line_set and line.strip() != '': control_line_set.add(line) logger.info(str(len(data_line_set))) logger.info(str(len(control_line_set))) if data_line_set.issubset(control_line_set): matching_sets += 1 merged_slices = self.merge_slices(list(data_line_set),defaultdict(list), list(control_line_set),defaultdict(list),1) self.slice_size += len(merged_slices) avg_slice_size += len(merged_slices) if self.min_slice_size == 0: self.min_slice_size = len(merged_slices) if len(merged_slices) < self.min_slice_size: self.min_slice_size = len(merged_slices) if len(merged_slices) > self.max_slice_size: self.max_slice_size = len(merged_slices) avg_data_slice_size += len(data_line_set) avg_control_slice_size += len(control_line_set) if len(files_in_slice) > 1: self.inter_file_slices += 1 logger.critical('Slice size for '+data_file+' :'+str(len(merged_slices))) slice_file_location = self.slicer.get_file_path(data_slice_lines[0]) slice_code = self.slicer.get_slice_code(merged_slices) self.slicer.generate_slice_file(slice_code) if self.slicer.build_slice_file(slice_file_location) == True: self.build_rate +=1 build_rate[benchmark.split('/')[-1]]+=1 if self.min_built_slice_size == 0: self.min_built_slice_size = len(slice_code) if len(slice_code) < self.min_built_slice_size: self.min_built_slice_size = len(slice_code) if len(slice_code) > self.max_built_slice_size: self.max_built_slice_size = len(slice_code) else: logger.warn('set mismatch!!') return 0 logger.warn('Data slice is subset of control slice in '+benchmark) f_result_csv.write(benchmark.split('/')[-1]+','+str(statements)+','+str(avg_data_slice_size/100)+','+str(avg_control_slice_size/100)+','+str(avg_slice_size/100)+','+str(self.inter_procedural_slices)+','+str(self.inter_file_slices)+','+str(build_rate[benchmark.split('/')[-1]])+'\n') f_build_rate.write(benchmark.split('/')[-1]+','+str(statements)+','+str(self.min_built_slice_size)+','+str(self.max_built_slice_size)+','+str(build_rate[benchmark.split('/')[-1]])+'\n') f_slice_prop.write(benchmark.split('/')[-1]+','+str(statements)+','+str(self.min_slice_size)+','+str(self.max_slice_size)+','+str(self.slice_size/100)+','+str(self.min_slice_procedures)+','+str(self.max_slice_procedures)+','+str(self.avg_slice_procedures/100)+','+str(self.inter_procedural_slices)+','+str(self.inter_file_slices)+'\n') f_result_csv.close() f_build_rate.close() f_slice_prop.close() def merge_slices(self,data_slice_list,data_slice_fns,control_slice_list,control_slice_fns,depth): logger.info('data_slice_fns -'+str(data_slice_fns)) merged_slices = [] for data_slice in data_slice_list: if data_slice.strip() != '' and len(data_slice.split('\t')) == 2: function_decl,start_index,end_index = self.get_wrapper_function(data_slice) function_name = function_decl.split('(')[0].split(' ')[-1] if function_decl != '': data_slice_fns[function_decl] = [function_name,start_index,end_index] else: merged_slices.append(data_slice) resolve_call_sites = False for control_slice in control_slice_list: if control_slice.strip() != '' and len(control_slice.split('\t')) == 2: function_decl,start_index,end_index= self.get_wrapper_function(control_slice) function_name = function_decl.split('(')[0].split(' ')[-1] control_slice_fns[function_decl] = [function_name,start_index,end_index] if function_decl in data_slice_fns: has_new_call_sites,data_slice_fns = self.get_new_call_sites(control_slice,data_slice_fns,control_slice_fns) if has_new_call_sites == True: resolve_call_sites = True logger.info('call site found - '+control_slice) merged_slices.append(control_slice) if resolve_call_sites == True and depth < 5: return self.merge_slices(merged_slices,data_slice_fns,control_slice_list,control_slice_fns,depth+1) else: self.avg_slice_procedures += len(data_slice_fns) if self.min_slice_procedures == 0: self.min_slice_proceures = len(data_slice_fns) if len(data_slice_fns)< self.min_slice_procedures: self.min_slice_procedures = len(data_slice_fns) if len(data_slice_fns) > self.max_slice_procedures: self.max_slice_procedures = len(data_slice_fns) if len(data_slice_fns) > 1: self.inter_procedural_slices +=1 logger.info('control slice fns - '+str(control_slice_fns)) return merged_slices def get_new_call_sites(self,slice_line,data_slice_fns,control_slice_fns): keywords = ['if','switch','while'] file_name = slice_line.split('\t')[0] line_number = int(slice_line.split('\t')[1]) f_cfile = open(file_name,'r') lines = f_cfile.readlines() is_call_site = False line = lines[line_number-1] fns_called = [] if re.search('[a-zA-Z]+\([^\)]*\)(\.[^\)]*\))?',line): fn_names = line.split('(') index = 0 for fn_name in fn_names: if index == len(fn_names) -1: break temp = '' logger.info('splitting '+fn_name) for i in range(len(fn_name)-1, 0,-1): if fn_name[i].isalnum() == True or fn_name[i]=='_': temp = fn_name[i] + temp else: break index += 1 if self.has_any_item(temp,keywords) == False: fns_called.append(temp) is_call_site = True logger.info('call site -'+line) has_new_call_sites = False if is_call_site == True: for fn_called in fns_called: for key,value in control_slice_fns.items(): if value[0] == fn_called: if key not in data_slice_fns: data_slice_fns[key] = value has_new_call_sites = True f_cfile.close() return has_new_call_sites,data_slice_fns def get_slice_files(self,benchmark): result_data_files = [] result_control_files = [] print('Entering benchmark: '+benchmark) for root,dir,files in os.walk(benchmark): for f in files: if f.startswith('result_assert') and 'result_assert_control' not in f: result_data_files.append(root+'/'+f) if f.startswith('result_assert_control'): result_control_files.append(root+'/'+f) return result_data_files, result_control_files def get_wrapper_function(self,slice_line): keywords = ['if','switch','while'] special_char = [';','=','"'] if len(slice_line.split('\t')) < 2: return '',0,0 file_name = slice_line.split('\t')[0] line_number = int(slice_line.split('\t')[1]) f_cfile = open(file_name,'r') lines = f_cfile.readlines() line_num = 1 slice_found = False for line in lines: if re.search('\w\(',line) and line.count('(') < 2 and line.count(')') <2: if self.has_any_item(line,keywords) == False and self.has_any_item(line,special_char) == False: decl_end_index = self.find_decl_end_index(lines,line_num-1) start,end = self.find_block_limits(lines,line_num-1) if line_number >= start and line_number <= end+1: slice_found = True logger.info('Slice - '+slice_line) logger.info('Slice found in function - '+line) logger.info('limits - '+str(start)+' to '+str(end)) return line,start,end line_num +=1 if slice_found == False: logger.info('Slice '+slice_line+' not found in any function') f_cfile.close() return '',0,0 def has_any_item(self,line,item_list): for item in item_list: if item in line: return True return False def find_block_limits(self,lines,line_index): open_brace_count = 0 close_brace_count = 0 start_index = line_index end_index = line_index begin_found = False for i in range(line_index, len(lines)): open_brace_count += lines[i].count('{') close_brace_count += lines[i].count('}') if open_brace_count == 1 and begin_found != True: start_index = i begin_found = True if open_brace_count>0 and open_brace_count == close_brace_count: end_index = i break return start_index,end_index def find_decl_end_index(self,lines,line_number): for i in range(line_number,len(lines)): if ')' in lines[i]: return i; def create_control_slice(self,benchmark_folder): logger.warn(self.csurf_map) benchmarks = [] for root,dir,files in os.walk(benchmark_folder): #print(dir) for item in dir: benchmarks.append(root+item) break for benchmark in benchmarks: f_used_in = open(benchmark+'/used_input.txt','r') for line in f_used_in.readlines(): f_csurf_in = open(benchmark+'/input.txt','w') f_csurf_in.write(line) f_csurf_in.close() logger.warn(line) cfile_name = line.split(':')[0].split('/')[-1] line_num = int(line.split(':')[1]) try: command = 'csurf -nogui -l /home/nishanth/Workspace/PyHelium/csurf/plugin '+benchmark+'/myproj' #print('Running cmd - '+command) p = Popen(command,shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) response,_ = p.communicate(input=None) response = response.decode('utf8') print(response) #p.kill() except (Exception) as e: p.kill() print(e) response_lines = response.split('\n') for i in range(0,len(response_lines)): if 'Slice set size' in response_lines[i]: slice_set_size = int(response_lines[i].split(':')[1].strip()) #print('Slize set = '+str(slice_set_size)) if(slice_set_size > 0): #add result set to result file. f_result_in = open(benchmark+'/'+'result_assert_control'+cfile_name+str(line_num)+'.txt','w') for j in range(i+1,len(response_lines)): f_result_in.write(response_lines[j]+'\n') f_result_in.close() f_used_in.close()
start = pd.to_datetime('2010-12-13 13:54:10.5-05:00') end = pd.to_datetime('2010-12-13 13:54:11.5-05:00') window_sizes = [32, 64, 128] raw = slicer.series['raw'][start:end] raw.plot() for ws in window_sizes: slicer.extract_rolling_median(seriesname = 'raw', window_size = ws) rm = slicer.series['raw_rolling_median_' + str(ws)][start:end] rm.plot(xticks=[i for i in rm.index]) plt.legend(['512Hz EEG']+[ 'Rolling Median %d window size' % ws \ for ws in window_sizes] ,loc='best') plt.ylabel(r"Potential ($\mu$V)") plt.xlabel(r"Time ($\mu$Sec)") #plt.title('10 Hz rolling median, compared to 512Hz signal') ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%S.%f')) ax.set_ylim(ax.get_ylim()[::-1]) pdfpages.savefig() #plt.show() if __name__=="__main__": slicer = Slicer() print 'loading raw from list of csvfiles' slicer.load_series_from_csv('raw', sys.argv[1:]) pp = PdfPages('rolling_median.pdf') do_charts(slicer, pp) pp.close()
def Compute3DDice(PID: Union[int, List[int]], netparams: str, patchsize: int, batch: int = 10, bydim: int = 1, doeval: bool = True, dev: str = 'cpu', step: int = 0, saveout: bool = False, savename: str = 'x') -> List[float]: #OBS: in case of deepmed, patchsize means the size of output patch! #(i.e. if patchsize=9, the input to network will be 25x25) <-but this done in the code #step = in what steps you take patches. if ==0, you take nonoverlapping ones. if K, patch starts # at prev_patch_start+K. #saveout = whether we save the ful subject output. (for viewing and debugging) # GET NET: net, in1, in2, in3D = getNetwork(netparams, dev) if doeval: net.eval() else: net.train() device = torch.device(dev) print('Net loaded.') # CUT AND EVAL: loop through cutting smaller pieces, moving to torch and eval if isinstance(PID, int): PID = [PID] segmented = torch.zeros((1, 7), device=dev) existing = torch.zeros((1, 7), device=dev) intersec = torch.zeros( (1, 7), device=dev ) #these three needed to gather results, for post dice compute Dices = torch.zeros((len(PID), 7), device=dev) axes = [0, 2, 3] + ([4] if in3D else []) #set the right function to use TensorCropping = CenterCropTensor3d padding = [(0, 0), (0, patchsize), (0, patchsize), (0, patchsize)] #(16,patchsize+16) paddingall = [(0, 0), (0, patchsize), (0, patchsize), (0, patchsize)] #(16,patchsize+16) if in2: #deep med, we need to pad the input on all sides to be able to cut pieces as wanted paddingall[1:] = [(16 + 8, patchsize + 16 + 8)] * 3 patchsize = patchsize - 16 #since patchsize, as it goes into slicer, means the size of network output if not in3D: padding[bydim + 1] = (0, 0) paddingall[bydim + 1] = (0, 0) TensorCropping = CenterCropTensor # LOAD DATA: for idx, pid in enumerate(PID): #set accumulators to 0: segmented.zero_() existing.zero_() intersec.zero_() allin, gt, mask = loadSubject(pid, patchsize // 2) size_full = allin[0].shape #shape of 3d img, one channel mask = np.pad(mask, padding[1:], mode='constant') gt = np.pad(gt, padding, mode='constant') allin = np.pad(allin, paddingall, mode='constant') # print((size_full, gt.shape)) empty_subj = torch.zeros( gt.shape[1:]) #allin.shape[1:]) #cause we dont need channels slicer = Slicer(size_full, patchsize, in1, in2, in3D, bydim, step) #return string slice, include all channels # for cutting out the middle part based on step: #slice((sf-step)//2, sf-np.ceil((sf-step)/2)) slicing = "".join([ f'.narrow({idx}, {(patchsize-step)//2}, {step})' for idx in range(2, (4 + in3D)) ]) if step > 0 else "" paddingup = [0, patchsize - step] * 3 if not in3D: paddingup[-1 - bydim * 2] = 0 print(f'Eval on subj{pid}...') with torch.no_grad(): while slicer.todo > 0: gtslices, in1slices, in2slices = slicer.get_batch( batch) #multiple slices gts = np.stack(list( map(eval, [f'gt[{slajs}]' for slajs in gtslices])), axis=0) in1s = np.stack(list( map(eval, [f'allin[{slajs}]' for slajs in in1slices])), axis=0) #maske = np.stack([eval(f'mask[{slajs[2:]}]') for slajs in gtslices], axis=0) maske = np.stack(list( map(eval, [f'mask[{slajs[2:]}]' for slajs in gtslices])), axis=0) # move to torch: target_oh = torch.from_numpy(gts).squeeze().to(device) data = [torch.from_numpy(in1s).squeeze().float().to(device) ] #input 1 if in2: #in2s = np.stack([eval(f'allin[{slajs}]') for slajs in in2slices], axis=0) in2s = np.stack(list( map(eval, [f'allin[{slajs}]' for slajs in in2slices])), axis=0) data.append( torch.from_numpy(in2s).squeeze().float().to( device)) #input 2 #run net on data. get output, save sums in dice gather lists out = net(*data).exp() target_oh, out = TensorCropping( target_oh, out ) #in case of PSP net, might be that output is bigger than input/GT #dices = AllDices(out, target_oh) maske = torch.from_numpy(maske).squeeze().unsqueeze( 1).float().to(device) #cut only the middle part of OUT, MASKE and TARGET_OH for eval (depending on the step size) maske = eval('maske' + slicing) target_oh = eval('target_oh' + slicing) out = eval('out' + slicing) #when summing up, use only the middle of the patches. Depending on how big 'step' was. segmented += torch.sum(out * maske, axis=axes) existing += torch.sum(target_oh * maske, axis=axes) intersec += torch.sum(target_oh * maske * out, axis=axes) #save output if required if saveout: #whats faster, simply saving to an existing tensor, or iffing every loop?? for idd, slajs in enumerate(gtslices): tmp = torch.argmax(out[idd, ...], dim=0) if not in3D: tmp = tmp.unsqueeze(bydim) # print(tmp.shape) tmp = torch.nn.functional.pad(tmp, paddingup) eval(f'empty_subj[{slajs[2:]}].copy_(tmp)') #all saved, now calc actual dices: Dices[idx, :] = 2 * intersec / (existing + segmented ) #calc dice from the gathering lists if saveout: #save img as npy. np.save(f'out{pid}_{savename}.npy', empty_subj.cpu().numpy()) print('Done.') #pidis = [int(p) for p in PIDS] dices = np.concatenate((np.array(PID)[:, None], Dices.cpu().numpy()), axis=1) np.save(f'dices_{savename}.npy', dices) return dices
def __init__( # pylint: disable=too-many-arguments self, values, base_values=None, data=None, display_data=None, instance_names=None, feature_names=None, output_names=None, output_indexes=None, lower_bounds=None, upper_bounds=None, error_std=None, main_effects=None, hierarchical_values=None, clustering=None, compute_time=None): self.op_history = [] self.compute_time = compute_time # cloning. TODOsomeday: better cloning :) if issubclass(type(values), Explanation): e = values values = e.values base_values = e.base_values data = e.data self.output_dims = compute_output_dims(values, base_values, data, output_names) values_shape = _compute_shape(values) if output_names is None and len(self.output_dims) == 1: output_names = [ f"Output {i}" for i in range(values_shape[self.output_dims[0]]) ] if len( _compute_shape(feature_names) ) == 1: # TODOsomeday: should always be an alias once slicer supports per-row aliases if len(values_shape) >= 1 and len( feature_names) == values_shape[0]: feature_names = Alias(list(feature_names), 0) elif len(values_shape) >= 2 and len( feature_names) == values_shape[1]: feature_names = Alias(list(feature_names), 1) if len( _compute_shape(output_names) ) == 1: # TODOsomeday: should always be an alias once slicer supports per-row aliases output_names = Alias(list(output_names), self.output_dims[0]) # if len(values_shape) >= 1 and len(output_names) == values_shape[0]: # output_names = Alias(list(output_names), 0) # elif len(values_shape) >= 2 and len(output_names) == values_shape[1]: # output_names = Alias(list(output_names), 1) if output_names is not None and not isinstance(output_names, Alias): l = len(_compute_shape(output_names)) if l == 0: pass elif l == 1: output_names = Obj(output_names, self.output_dims) elif l == 2: output_names = Obj(output_names, [0] + list(self.output_dims)) else: raise ValueError( "shap.Explanation does not yet support output_names of order greater than 3!" ) if not hasattr(base_values, "__len__") or len(base_values) == 0: pass elif len(_compute_shape(base_values)) == len(self.output_dims): base_values = Obj(base_values, list(self.output_dims)) else: base_values = Obj(base_values, [0] + list(self.output_dims)) self._s = Slicer( values=values, base_values=base_values, data=list_wrap(data), display_data=list_wrap(display_data), instance_names=None if instance_names is None else Alias( instance_names, 0), feature_names=feature_names, output_names=output_names, output_indexes=None if output_indexes is None else (self.output_dims, output_indexes), lower_bounds=list_wrap(lower_bounds), upper_bounds=list_wrap(upper_bounds), error_std=list_wrap(error_std), main_effects=list_wrap(main_effects), hierarchical_values=list_wrap(hierarchical_values), clustering=None if clustering is None else Obj(clustering, [0]))
class Cowc: def __init__(self, size=(3, 3)): """ placeholder """ # list of train and test directories self._annotation_suffix = '_Annotated_Cars.png' # 15cm resolution self._GSD = 0.15 self._size = (int(round( (size[0] / self._GSD) / 2)), int(round((size[1] / self._GSD) / 2))) # xml conversion tweak self._custom_item_func = lambda x: 'object' # create image slicer self._slicer = Slicer() return def process(self, data_path, out_path): """ create images and annotations for train and validation """ # for each subset for subset in ['train', 'test']: # locate all images in data path path = os.path.join(data_path, subset) files = glob.glob(os.path.join(os.path.join(path, '**'), '*.png'), recursive=True) files = [x for x in files if 'Annotated' not in x] # slice up images for f in files: slices = self._slicer.process( f, os.path.join(out_path, '{}/images'.format(subset))) # check annotation image exists pathname = os.path.join( f.replace('.png', self._annotation_suffix)) if os.path.exists(pathname): # create PASCAL VOC schema for each image slice annotation_image = cv2.imread(pathname) for s in slices: self.getAnnotation( s, annotation_image, os.path.join(out_path, '{}/annotations'.format(subset))) return def getAnnotation(self, s, annotation_image, out_path, writeback=False, overwrite=True): """ create annotation xml files encoding bounding box locations """ # create label pathname filename = os.path.splitext(os.path.basename( s['pathname']))[0] + '.xml' annotation_pathname = os.path.join(out_path, filename) if not os.path.exists(annotation_pathname) or overwrite: # get bounding boxes for cars in aoi results, label_locs = self.getBoundingBoxes(s, annotation_image) schema = self.getSchema(s, results) # create output dir if necessary if not os.path.exists(out_path): os.makedirs(out_path) # write annotation to xml file with open(os.path.join(out_path, filename), "w+") as outfile: # parse xml into string xml = dicttoxml.dicttoxml( schema, attr_type=False, item_func=self._custom_item_func, custom_root='annotation' ) \ .replace(b'<annotation>',b'<annotation verified="yes">') \ .replace(b'<items>',b'').replace(b'</items>',b'') \ dom = parseString(xml) # write xml string to file outfile.write(dom.toprettyxml()) # plot writeback if writeback: self.drawBoundingBoxes(s['pathname'], results) return def getBoundingBoxes(self, s, annotation_image, heading='fixed'): """ extract bounding boxes around car locations from annotation image """ # process each slice records = [] # extract window from annotation image x0 = s['x0'] y0 = s['y0'] window = annotation_image[y0:y0 + s['height'], x0:x0 + s['width']] # find locations of non-zero pixels - add zero rotation column label_locs = np.where(window > 0) label_locs = np.transpose( np.vstack( [label_locs[0], label_locs[1], np.zeros(len(label_locs[0]))])) if label_locs.size > 0: # create bounding box for annotated car locations for loc in label_locs: record = self.getBoundingBox(loc, window.shape) # ignore annotated objects close to image edge if record: records.append(record) return records, label_locs def getBoundingBox(self, loc, dims): """ placeholder """ # extrapolate bbox from centroid coords record = {} yc, xc, angle = loc # compute pts along vertical line rotated at mid point x0_r, y0_r = self.rotatePoint(xc, yc + self._size[1], xc, yc, math.radians(angle)) x1_r, y1_r = self.rotatePoint(xc, yc - self._size[1], xc, yc, math.radians(angle)) # compute corner pts orthogonal to rotated line end points corner = np.empty((4, 2), float) corner[0] = self.rotatePoint(x0_r, y0_r + self._size[0], x0_r, y0_r, math.radians(angle + 90.0)) corner[1] = self.rotatePoint(x0_r, y0_r - self._size[0], x0_r, y0_r, math.radians(angle + 90.0)) corner[2] = self.rotatePoint(x1_r, y1_r + self._size[0], x1_r, y1_r, math.radians(angle + 90.0)) corner[3] = self.rotatePoint(x1_r, y1_r - self._size[0], x1_r, y1_r, math.radians(angle + 90.0)) # get min and max coordinates for bbox x_min = np.amin(corner[:, 0]) x_max = np.amax(corner[:, 0]) y_min = np.amin(corner[:, 1]) y_max = np.amax(corner[:, 1]) # check limits x_min_c = max(0, x_min) y_min_c = max(0, y_min) x_max_c = min(x_max, dims[1] - 1) y_max_c = min(y_max, dims[0] - 1) area = (x_max - x_min) * (y_max - y_min) area_c = (x_max_c - x_min_c) * (y_max_c - y_min_c) # only retain bboxes not constrained by image edges if area_c / area > 0.95: record['bbox'] = [x_min_c, y_min_c, x_max_c, y_max_c] # readjust perimeter points corner[:, 0] = np.where(corner[:, 0] < 0.0, 0.0, corner[:, 0]) corner[:, 0] = np.where(corner[:, 0] > dims[1] - 1, dims[1] - 1, corner[:, 0]) corner[:, 1] = np.where(corner[:, 1] < 0.0, 0.0, corner[:, 1]) corner[:, 1] = np.where(corner[:, 1] > dims[0] - 1, dims[0] - 1, corner[:, 1]) # minimise distance between points d1 = np.linalg.norm(corner[1] - corner[2]) d2 = np.linalg.norm(corner[1] - corner[3]) if d1 > d2: corner[[2, 3]] = corner[[3, 2]] record['corner'] = list(corner.flatten()) return record def rotatePoint(self, x, y, xc, yc, angle): """ compute rotation of point around origin """ # Rotate point counterclockwise by a given angle around a given origin. qx = xc + math.cos(angle) * (x - xc) - math.sin(angle) * (y - yc) qy = yc + math.sin(angle) * (x - xc) + math.cos(angle) * (y - yc) return qx, qy def getSchema(self, s, records): """ convert annotation into ordered list for conversion into PASCAL VOC schema """ # convert to PASCAL VOC annotation schema object_list = [] for record in records: bbox = record['bbox'] #corner = record[ 'corner' ] object_list.append( OrderedDict({ 'name': 'car', 'pose': 'Topdown', 'truncated': 0, 'difficult': 0, 'bndbox': { 'xmin': bbox[0], 'ymin': bbox[1], 'xmax': bbox[2], 'ymax': bbox[3] } #'segmentation' : ','.join( (str(pt) for pt in corner ) ) })) # return full schema as dictionary return OrderedDict({ 'folder': 'images', 'filename': os.path.basename(s['pathname']), 'path': os.path.dirname(s['pathname']), 'source': { 'database': 'cowc' }, 'size': { 'width': s['width'], 'height': s['height'], 'depth': 3 }, 'segmented': 0, 'items': object_list }) def drawBoundingBoxes(self, pathname, records): """ placeholder """ # no action if no bboxes if len(records) > 0: # load image img = cv2.imread(pathname) height = img.shape[0] width = img.shape[1] # show image plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) ax = plt.gca() fig = plt.gcf() fig.canvas.set_window_title(os.path.basename(pathname)) print(pathname) # draw bbox lines colors = ['r', 'g', 'y', 'b', 'm', 'c'] idx = 0 for record in records: x0, y0, x1, y1 = record['bbox'] color = colors[idx] + '-' idx = idx + 1 if idx + 1 < len(colors) else 0 ax.plot([x0, x1], [y0, y0], color) ax.plot([x0, x1], [y1, y1], color) ax.plot([x0, x0], [y0, y1], color) ax.plot([x1, x1], [y0, y1], color) """ # get run length encoding from perimeter points string rl_encoding = mask.frPyObjects( [ record[ 'corner' ] ] , height, width ) binary_mask = mask.decode( rl_encoding ) binary_mask = np.amax(binary_mask, axis=2) masked = np.ma.masked_where(binary_mask == 0, binary_mask ) ax.imshow( masked, 'jet', interpolation='None', alpha=0.5 ) """ plt.show() return
def __init__(self, expected_value, values, data=None, output_shape=tuple(), interaction_order=0, instance_names=None, input_names=None, output_names=None, output_indexes=None, feature_types=None, lower_bounds=None, upper_bounds=None, main_effects=None, hierarchical_values=None, original_rows=None, clustering=None): self.transform_history = [] input_shape = _compute_shape(data) # trim any trailing None shapes since we don't want slicer to try and use those if len(input_shape) > 0 and input_shape[-1] is None: input_shape = input_shape[:-1] values_dims = list( range(len(input_shape) + interaction_order + len(output_shape))) output_dims = range( len(input_shape) + interaction_order, values_dims[-1]) #main_effects_inds = values_dims[0:len(input_shape)] + values_dims[len(input_shape) + interaction_order:] self.output_names = output_names # TODO: needs to tracked after slicing still kwargs_dict = {} if lower_bounds is not None: kwargs_dict["lower_bounds"] = (values_dims, Slicer(lower_bounds)) if upper_bounds is not None: kwargs_dict["upper_bounds"] = (values_dims, Slicer(upper_bounds)) if main_effects is not None: kwargs_dict["main_effects"] = (values_dims, Slicer(main_effects)) if output_indexes is not None: kwargs_dict["output_indexes"] = (output_dims, Slicer(output_indexes)) if output_names is not None: kwargs_dict["output_names"] = (output_dims, Slicer(output_names)) if hierarchical_values is not None: kwargs_dict["hierarchical_values"] = (values_dims, Slicer(hierarchical_values)) if input_names is not None: if not is_1d(input_names): input_name_dims = values_dims else: input_name_dims = values_dims[1:] kwargs_dict["input_names"] = (input_name_dims, Slicer(input_names)) if original_rows is not None: kwargs_dict["original_rows"] = (values_dims[1:], Slicer(original_rows)) if clustering is not None: kwargs_dict["clustering"] = ([0], Slicer(clustering)) if expected_value is not None: ndims = len(getattr(expected_value, "shape", [])) if ndims == len(values_dims): kwargs_dict["expected_value"] = (values_dims, Slicer(expected_value)) elif ndims == len(values_dims) - 1: kwargs_dict["expected_value"] = (values_dims[1:], Slicer(expected_value)) else: raise Exception( "The shape of the passed expected_value does not match the shape of the passed values!" ) # if clustering is not None: # self.clustering = clustering super().__init__(data, values, input_shape, output_shape, expected_value, interaction_order, instance_names, input_names, feature_types, **kwargs_dict)
class Explanation(object, metaclass=MetaExplanation): """ This is currently an experimental feature don't depend on this object yet! :) """ def __init__(self, values, base_values=None, data=None, display_data=None, instance_names=None, feature_names=None, output_names=None, output_indexes=None, lower_bounds=None, upper_bounds=None, main_effects=None, hierarchical_values=None, clustering=None): self.transform_history = [] # cloning. TODO: better cloning :) if issubclass(type(values), Explanation): e = values values = e.values base_values = e.base_values data = e.data output_dims = compute_output_dims(values, base_values, data) if len( _compute_shape(feature_names) ) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) if len(values_shape) >= 1 and len( feature_names) == values_shape[0]: feature_names = Alias(feature_names, 0) elif len(values_shape) >= 2 and len( feature_names) == values_shape[1]: feature_names = Alias(feature_names, 1) self._s = Slicer( values=values, base_values=base_values, data=data, display_data=display_data, instance_names=None if instance_names is None else Alias( instance_names, 0), feature_names=feature_names, output_names=None if output_names is None else Alias( output_names, output_dims), output_indexes=None if output_indexes is None else (output_dims, output_indexes), lower_bounds=lower_bounds, upper_bounds=lower_bounds, main_effects=main_effects, hierarchical_values= hierarchical_values, #Obj(hierarchical_values, (0,None)), clustering=clustering) @property def shape(self): return _compute_shape(self._s.values) @property def values(self): return self._s.values @values.setter def values(self, new_values): self._s.values = new_values @property def base_values(self): return self._s.base_values @base_values.setter def base_values(self, new_base_values): self._s.base_values = new_base_values @property def data(self): return self._s.data @data.setter def data(self, new_data): self._s.data = new_data @property def display_data(self): return self._s.display_data @display_data.setter def display_data(self, new_display_data): self._s.display_data = new_display_data @property def instance_names(self): return self._s.instance_names @property def output_names(self): return self._s.output_names @property def output_indexes(self): return self._s.output_indexes @property def feature_names(self): return self._s.feature_names @feature_names.setter def feature_names(self, new_feature_names): self._s.feature_names = new_feature_names @property def lower_bounds(self): return self._s.lower_bounds @property def upper_bounds(self): return self._s.upper_bounds @property def main_effects(self): return self._s.main_effects @main_effects.setter def main_effects(self, new_main_effects): self._s.main_effects = new_main_effects @property def hierarchical_values(self): return self._s.hierarchical_values @hierarchical_values.setter def hierarchical_values(self, new_hierarchical_values): self._s.hierarchical_values = new_hierarchical_values @property def clustering(self): return self._s.clustering @clustering.setter def clustering(self, new_clustering): self._s.clustering = new_clustering def __repr__(self): out = ".values =\n" + self.values.__repr__() if self.base_values is not None: out += "\n\n.base_values =\n" + self.base_values.__repr__() if self.data is not None: out += "\n\n.data =\n" + self.data.__repr__() return out def __getitem__(self, item): """ This adds support for magic string indexes like "rank(0)". """ if not isinstance(item, tuple): item = (item, ) # convert any OpChains or magic strings for i, t in enumerate(item): orig_t = t if issubclass(type(t), OpChain): t = t.apply(self) if issubclass( type(t), (np.int64, np.int32)): # because slicer does not like numpy indexes t = int(t) elif issubclass(type(t), np.ndarray): t = [int(v) for v in t ] # slicer wants lists not numpy arrays for indexing elif issubclass(type(t), Explanation): t = t.values elif type(t) is str: if is_1d(self.feature_names): ind = np.where(np.array(self.feature_names) == t)[0][0] t = int(ind) else: new_values = [] new_data = [] for i in range(len(self.values)): for s, v, d in zip(self.feature_names[i], self.values[i], self.data[i]): if s == t: new_values.append(v) new_data.append(d) new_self = copy.deepcopy(self) new_self.values = new_values new_self.data = new_data new_self.feature_names = t new_self.clustering = None return new_self if issubclass(type(t), np.ndarray): t = [int(j) for j in t] elif issubclass(type(t), (np.int8, np.int16, np.int32, np.int64)): t = int(t) if t is not orig_t: tmp = list(item) tmp[i] = t item = tuple(tmp) # call slicer for the real work new_self = copy.copy(self) new_self.transform_history.append(("__getitem__", (item, ))) new_self._s = self._s.__getitem__(item) return new_self def __len__(self): return self.shape[0] def __copy__(self): return Explanation(self.values, self.base_values, self.data, self.display_data, self.instance_names, self.feature_names, self.output_names, self.output_indexes, self.lower_bounds, self.upper_bounds, self.main_effects, self.hierarchical_values, self.clustering) @property def abs(self): new_self = copy.copy(self) new_self.values = np.abs(new_self.values) new_self.transform_history.append(("abs", None)) return new_self def _numpy_func(self, fname, **kwargs): new_self = copy.copy(self) axis = kwargs.get("axis", None) # collapse the slicer to right shape if axis == 0: new_self = new_self[0] elif axis == 1: new_self = new_self[1] elif axis == 2: new_self = new_self[2] if self.feature_names is not None and not is_1d( self.feature_names) and axis == 0: new_values = self._flatten_feature_names() new_self.feature_names = np.array(list(new_values.keys())) new_self.values = np.array( [getattr(np, fname)(v) for v in new_values.values()]) new_self.clustering = None else: new_self.values = getattr(np, fname)(np.array(self.values), **kwargs) if new_self.data is not None: try: new_self.data = getattr(np, fname)(np.array(self.data), **kwargs) except: new_self.data = None if new_self.base_values is not None and issubclass( type(axis), int) and len(self.base_values.shape) > axis: new_self.base_values = getattr(np, fname)(self.base_values, **kwargs) elif issubclass(type(axis), int): new_self.base_values = None if axis == 0 and self.clustering is not None and len( self.clustering.shape) == 3: if self.clustering.std(0).sum() < 1e-8: new_self.clustering = self.clustering[0] else: new_self.clustering = None new_self.transform_history.append((fname, kwargs)) return new_self def mean(self, axis): return self._numpy_func("mean", axis=axis) def max(self, axis): return self._numpy_func("max", axis=axis) def min(self, axis): return self._numpy_func("min", axis=axis) def sum(self, axis): return self._numpy_func("sum", axis=axis) @property def abs(self): return self._numpy_func("abs") @property def argsort(self): return self._numpy_func("argsort") @property def flip(self): return self._numpy_func("flip") def hclust(self, metric="sqeuclidean", axis=0): """ Computes an optimal leaf ordering sort order using hclustering. hclust(metric="sqeuclidean") Parameters ---------- metric : string A metric supported by scipy clustering. axis : int The axis to cluster along. """ values = self.values if len(values.shape) != 2: raise Exception( "The hclust order only supports 2D arrays right now!") if axis == 1: values = values.T # compute a hierarchical clustering and return the optimal leaf ordering D = sp.spatial.distance.pdist(values, metric) cluster_matrix = sp.cluster.hierarchy.complete(D) inds = sp.cluster.hierarchy.leaves_list( sp.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D)) return inds def sample(self, max_samples, replace=False, random_state=0): """ Randomly samples the instances (rows) of the Explanation object. Parameters ---------- max_samples : int The number of rows to sample. Note that if replace=False then less than fewer than max_samples will be drawn if explanation.shape[0] < max_samples. replace : bool Sample with or without replacement. """ prev_seed = np.random.seed(random_state) inds = np.random.choice(self.shape[0], min(max_samples, self.shape[0]), replace=replace) np.random.seed(prev_seed) return self[list(inds)] def _flatten_feature_names(self): new_values = {} for i in range(len(self.values)): for s, v in zip(self.feature_names[i], self.values[i]): if s not in new_values: new_values[s] = [] new_values[s].append(v) return new_values def _use_data_as_feature_names(self): new_values = {} for i in range(len(self.values)): for s, v in zip(self.data[i], self.values[i]): if s not in new_values: new_values[s] = [] new_values[s].append(v) return new_values def percentile(self, q, axis=None): new_self = copy.deepcopy(self) if self.feature_names is not None and not is_1d( self.feature_names) and axis == 0: new_values = self._flatten_feature_names() new_self.feature_names = np.array(list(new_values.keys())) new_self.values = np.array( [np.percentile(v, q) for v in new_values.values()]) new_self.clustering = None else: new_self.values = np.percentile(new_self.values, q, axis) new_self.data = np.percentile(new_self.data, q, axis) #new_self.data = None new_self.transform_history.append(("percentile", (axis, ))) return new_self
def __init__( self, values, base_values=None, data=None, display_data=None, instance_names=None, feature_names=None, output_names=None, output_indexes=None, lower_bounds=None, upper_bounds=None, main_effects=None, hierarchical_values=None, clustering=None, interactions=None, feature_groups=None, ): self.op_history = [] # cloning. TODO: better cloning :) if issubclass(type(values), Explanation): e = values values = e.values base_values = e.base_values data = e.data output_dims = compute_output_dims(values, base_values, data) if len( _compute_shape(feature_names) ) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) if len(values_shape) >= 1 and len( feature_names) == values_shape[0]: feature_names = Alias(list(feature_names), 0) elif len(values_shape) >= 2 and len( feature_names) == values_shape[1]: feature_names = Alias(list(feature_names), 1) if len( _compute_shape(output_names) ) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) output_names = Alias(list(output_names), output_dims[0]) # if len(values_shape) >= 1 and len(output_names) == values_shape[0]: # output_names = Alias(list(output_names), 0) # elif len(values_shape) >= 2 and len(output_names) == values_shape[1]: # output_names = Alias(list(output_names), 1) if output_names is not None and not isinstance(output_names, Alias): l = len(_compute_shape(output_names)) if l == 0: pass elif l == 1: output_names = Obj(output_names, output_dims) elif l == 2: output_names = Obj(output_names, [0] + list(output_dims)) else: raise ValueError( "shap.Explanation does not yet support output_names of order greater than 3!" ) self._s = Slicer( values=values, base_values=None if base_values is None else Obj( base_values, [0] + list(output_dims)), data=data, display_data=display_data, instance_names=None if instance_names is None else Alias( instance_names, 0), feature_names=feature_names, output_names=output_names, output_indexes=None if output_indexes is None else (output_dims, output_indexes), lower_bounds=lower_bounds, upper_bounds=upper_bounds, main_effects=main_effects, hierarchical_values=hierarchical_values, clustering=None if clustering is None else Obj(clustering, [0]), interactions=interactions, feature_groups=feature_groups)
class SliceCompare: def create_csurf_map(self, map_file): file_reader = open(map_file, 'r') lines = file_reader.readlines() csurf_map = defaultdict() for line in lines: path, sep, build_name = line.partition('\t') csurf_map[path] = build_name.split('\n')[0] self.csurf_map = csurf_map self.slicer = Slicer() def merge_data_control_slices(self, benchmark_folder): benchmarks = [] for root, dir, files in os.walk(benchmark_folder): #print(dir) for item in dir: benchmarks.append(root + item) break f_build_rate = open('assert_build_rate_ds.csv', 'w') f_slice_prop = open('assert_slice_property_ds.csv', 'w') f_build_rate.write( 'Benchmark,Slices,Size of smallest slice built,Size of largest slice built,Build Rate\n' ) f_slice_prop.write( 'Benchmark,Slices,Smallest slice size,Largest slice size,Average Slice size,Min procedure count, Max procedure count, Avg procedure count,Inter procedural slices, Inter file slices\n' ) f_result_csv = open('assert_result_ds.csv', 'w') f_result_csv.write( 'benchmark,slices,avg-data-slice-size,avg-full-slice-size,avg-slice-size,inter-procedural-slices,inter-file-slices,build_rate\n' ) build_rate = defaultdict(int) for benchmark in benchmarks: statements = 0 bench_list = [ 'tj-histo', 'json-c-json-c', 'jonas-tig', 'Cyan4973-zstd', 'Phildo-pixQL', 'kr-beanstalkd', 'joyent-http-parser', 'yrutschle-sslh', 'rui314-8cc', 'udp-json-parser', 'cisco-thor' ] bench_list += [ 'libuv-libuv', 'patjak-bcwc_pcie', 'douban-beansdb', 'droe-sslsplit', 'orangeduck-mpc', 'machinezone-tcpkali', 'wg-wrk', 'karthick18-inception', 'vmg-houdini', 'antirez-disque' ] if benchmark.split('/')[-1] in bench_list: self.inter_procedural_slices = 0 self.slice_size = 0 self.inter_file_slices = 0 self.min_slice_size = 0 self.max_slice_size = 0 self.min_slice_procedures = 0 self.max_slice_procedures = 0 self.avg_slice_procedures = 0 self.min_built_slice_size = 0 self.max_built_slice_size = 0 result_data_files = [] result_control_files = [] result_data_files, result_control_files = self.get_slice_files( benchmark) statements = len(result_data_files) #logger.warn('Number of data files = '+str(len(result_data_files))) matching_sets = 0 self.inter_procedural_slices = 0 self.inter_file_slices = 0 self.build_rate = 0 avg_slice_size = 0 avg_data_slice_size = 0 avg_control_slice_size = 0 if len(result_data_files) > 0 and len( result_control_files) > 0: for data_file in result_data_files: f_data_file = open(data_file, 'r') data_slice_lines = f_data_file.readlines() f_data_file.close() data_line_set = set() files_in_slice = set() for line in data_slice_lines: if '.h' not in line: if line not in data_line_set and line.strip( ) != '': if line.split( '\t')[0] not in files_in_slice: files_in_slice.add(line.split('\t')[0]) data_line_set.add(line) #self.get_wrapper_function(line) control_file = data_file.replace( 'result_assert', 'result_assert_control') f_control_file = open(control_file, 'r') control_slice_lines = f_control_file.readlines() f_control_file.close() control_line_set = set() for line in control_slice_lines: if '.h' not in line: if line not in control_line_set and line.strip( ) != '': control_line_set.add(line) logger.info(str(len(data_line_set))) logger.info(str(len(control_line_set))) if data_line_set.issubset(control_line_set): matching_sets += 1 merged_slices = self.merge_slices( list(data_line_set), defaultdict(list), list(control_line_set), defaultdict(list), 1) self.slice_size += len(merged_slices) avg_slice_size += len(merged_slices) if self.min_slice_size == 0: self.min_slice_size = len(merged_slices) if len(merged_slices) < self.min_slice_size: self.min_slice_size = len(merged_slices) if len(merged_slices) > self.max_slice_size: self.max_slice_size = len(merged_slices) avg_data_slice_size += len(data_line_set) avg_control_slice_size += len(control_line_set) if len(files_in_slice) > 1: self.inter_file_slices += 1 logger.critical('Slice size for ' + data_file + ' :' + str(len(merged_slices))) slice_file_location = self.slicer.get_file_path( data_slice_lines[0]) slice_code = self.slicer.get_slice_code( merged_slices) self.slicer.generate_slice_file(slice_code) if self.slicer.build_slice_file( slice_file_location) == True: self.build_rate += 1 build_rate[benchmark.split('/')[-1]] += 1 if self.min_built_slice_size == 0: self.min_built_slice_size = len(slice_code) if len(slice_code) < self.min_built_slice_size: self.min_built_slice_size = len(slice_code) if len(slice_code) > self.max_built_slice_size: self.max_built_slice_size = len(slice_code) else: logger.warn('set mismatch!!') return 0 logger.warn('Data slice is subset of control slice in ' + benchmark) f_result_csv.write( benchmark.split('/')[-1] + ',' + str(statements) + ',' + str(avg_data_slice_size / 100) + ',' + str(avg_control_slice_size / 100) + ',' + str(avg_slice_size / 100) + ',' + str(self.inter_procedural_slices) + ',' + str(self.inter_file_slices) + ',' + str(build_rate[benchmark.split('/')[-1]]) + '\n') f_build_rate.write( benchmark.split('/')[-1] + ',' + str(statements) + ',' + str(self.min_built_slice_size) + ',' + str(self.max_built_slice_size) + ',' + str(build_rate[benchmark.split('/')[-1]]) + '\n') f_slice_prop.write( benchmark.split('/')[-1] + ',' + str(statements) + ',' + str(self.min_slice_size) + ',' + str(self.max_slice_size) + ',' + str(self.slice_size / 100) + ',' + str(self.min_slice_procedures) + ',' + str(self.max_slice_procedures) + ',' + str(self.avg_slice_procedures / 100) + ',' + str(self.inter_procedural_slices) + ',' + str(self.inter_file_slices) + '\n') f_result_csv.close() f_build_rate.close() f_slice_prop.close() def merge_slices(self, data_slice_list, data_slice_fns, control_slice_list, control_slice_fns, depth): logger.info('data_slice_fns -' + str(data_slice_fns)) merged_slices = [] for data_slice in data_slice_list: if data_slice.strip() != '' and len(data_slice.split('\t')) == 2: function_decl, start_index, end_index = self.get_wrapper_function( data_slice) function_name = function_decl.split('(')[0].split(' ')[-1] if function_decl != '': data_slice_fns[function_decl] = [ function_name, start_index, end_index ] else: merged_slices.append(data_slice) resolve_call_sites = False for control_slice in control_slice_list: if control_slice.strip() != '' and len( control_slice.split('\t')) == 2: function_decl, start_index, end_index = self.get_wrapper_function( control_slice) function_name = function_decl.split('(')[0].split(' ')[-1] control_slice_fns[function_decl] = [ function_name, start_index, end_index ] if function_decl in data_slice_fns: has_new_call_sites, data_slice_fns = self.get_new_call_sites( control_slice, data_slice_fns, control_slice_fns) if has_new_call_sites == True: resolve_call_sites = True logger.info('call site found - ' + control_slice) merged_slices.append(control_slice) if resolve_call_sites == True and depth < 5: return self.merge_slices(merged_slices, data_slice_fns, control_slice_list, control_slice_fns, depth + 1) else: self.avg_slice_procedures += len(data_slice_fns) if self.min_slice_procedures == 0: self.min_slice_proceures = len(data_slice_fns) if len(data_slice_fns) < self.min_slice_procedures: self.min_slice_procedures = len(data_slice_fns) if len(data_slice_fns) > self.max_slice_procedures: self.max_slice_procedures = len(data_slice_fns) if len(data_slice_fns) > 1: self.inter_procedural_slices += 1 logger.info('control slice fns - ' + str(control_slice_fns)) return merged_slices def get_new_call_sites(self, slice_line, data_slice_fns, control_slice_fns): keywords = ['if', 'switch', 'while'] file_name = slice_line.split('\t')[0] line_number = int(slice_line.split('\t')[1]) f_cfile = open(file_name, 'r') lines = f_cfile.readlines() is_call_site = False line = lines[line_number - 1] fns_called = [] if re.search('[a-zA-Z]+\([^\)]*\)(\.[^\)]*\))?', line): fn_names = line.split('(') index = 0 for fn_name in fn_names: if index == len(fn_names) - 1: break temp = '' logger.info('splitting ' + fn_name) for i in range(len(fn_name) - 1, 0, -1): if fn_name[i].isalnum() == True or fn_name[i] == '_': temp = fn_name[i] + temp else: break index += 1 if self.has_any_item(temp, keywords) == False: fns_called.append(temp) is_call_site = True logger.info('call site -' + line) has_new_call_sites = False if is_call_site == True: for fn_called in fns_called: for key, value in control_slice_fns.items(): if value[0] == fn_called: if key not in data_slice_fns: data_slice_fns[key] = value has_new_call_sites = True f_cfile.close() return has_new_call_sites, data_slice_fns def get_slice_files(self, benchmark): result_data_files = [] result_control_files = [] print('Entering benchmark: ' + benchmark) for root, dir, files in os.walk(benchmark): for f in files: if f.startswith( 'result_assert') and 'result_assert_control' not in f: result_data_files.append(root + '/' + f) if f.startswith('result_assert_control'): result_control_files.append(root + '/' + f) return result_data_files, result_control_files def get_wrapper_function(self, slice_line): keywords = ['if', 'switch', 'while'] special_char = [';', '=', '"'] if len(slice_line.split('\t')) < 2: return '', 0, 0 file_name = slice_line.split('\t')[0] line_number = int(slice_line.split('\t')[1]) f_cfile = open(file_name, 'r') lines = f_cfile.readlines() line_num = 1 slice_found = False for line in lines: if re.search('\w\(', line) and line.count('(') < 2 and line.count(')') < 2: if self.has_any_item(line, keywords) == False and self.has_any_item( line, special_char) == False: decl_end_index = self.find_decl_end_index( lines, line_num - 1) start, end = self.find_block_limits(lines, line_num - 1) if line_number >= start and line_number <= end + 1: slice_found = True logger.info('Slice - ' + slice_line) logger.info('Slice found in function - ' + line) logger.info('limits - ' + str(start) + ' to ' + str(end)) return line, start, end line_num += 1 if slice_found == False: logger.info('Slice ' + slice_line + ' not found in any function') f_cfile.close() return '', 0, 0 def has_any_item(self, line, item_list): for item in item_list: if item in line: return True return False def find_block_limits(self, lines, line_index): open_brace_count = 0 close_brace_count = 0 start_index = line_index end_index = line_index begin_found = False for i in range(line_index, len(lines)): open_brace_count += lines[i].count('{') close_brace_count += lines[i].count('}') if open_brace_count == 1 and begin_found != True: start_index = i begin_found = True if open_brace_count > 0 and open_brace_count == close_brace_count: end_index = i break return start_index, end_index def find_decl_end_index(self, lines, line_number): for i in range(line_number, len(lines)): if ')' in lines[i]: return i def create_control_slice(self, benchmark_folder): logger.warn(self.csurf_map) benchmarks = [] for root, dir, files in os.walk(benchmark_folder): #print(dir) for item in dir: benchmarks.append(root + item) break for benchmark in benchmarks: f_used_in = open(benchmark + '/used_input.txt', 'r') for line in f_used_in.readlines(): f_csurf_in = open(benchmark + '/input.txt', 'w') f_csurf_in.write(line) f_csurf_in.close() logger.warn(line) cfile_name = line.split(':')[0].split('/')[-1] line_num = int(line.split(':')[1]) try: command = 'csurf -nogui -l /home/nishanth/Workspace/PyHelium/csurf/plugin ' + benchmark + '/myproj' #print('Running cmd - '+command) p = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) response, _ = p.communicate(input=None) response = response.decode('utf8') print(response) #p.kill() except (Exception) as e: p.kill() print(e) response_lines = response.split('\n') for i in range(0, len(response_lines)): if 'Slice set size' in response_lines[i]: slice_set_size = int( response_lines[i].split(':')[1].strip()) #print('Slize set = '+str(slice_set_size)) if (slice_set_size > 0): #add result set to result file. f_result_in = open( benchmark + '/' + 'result_assert_control' + cfile_name + str(line_num) + '.txt', 'w') for j in range(i + 1, len(response_lines)): f_result_in.write(response_lines[j] + '\n') f_result_in.close() f_used_in.close()
class Explanation(object, metaclass=MetaExplanation): """ A slicable set of parallel arrays representing a SHAP explanation. """ def __init__(self, values, base_values=None, data=None, display_data=None, instance_names=None, feature_names=None, output_names=None, output_indexes=None, lower_bounds=None, upper_bounds=None, main_effects=None, hierarchical_values=None, clustering=None): self.op_history = [] # cloning. TODO: better cloning :) if issubclass(type(values), Explanation): e = values values = e.values base_values = e.base_values data = e.data output_dims = compute_output_dims(values, base_values, data) if len( _compute_shape(feature_names) ) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) if len(values_shape) >= 1 and len( feature_names) == values_shape[0]: feature_names = Alias(list(feature_names), 0) elif len(values_shape) >= 2 and len( feature_names) == values_shape[1]: feature_names = Alias(list(feature_names), 1) if len( _compute_shape(output_names) ) == 1: # TODO: should always be an alias once slicer supports per-row aliases values_shape = _compute_shape(values) if len(values_shape) >= 1 and len(output_names) == values_shape[0]: output_names = Alias(list(output_names), 0) elif len(values_shape) >= 2 and len( output_names) == values_shape[1]: output_names = Alias(list(output_names), 1) self._s = Slicer( values=values, base_values=None if base_values is None else Obj( base_values, [0] + list(output_dims)), data=data, display_data=display_data, instance_names=None if instance_names is None else Alias( instance_names, 0), feature_names=feature_names, output_names= output_names, # None if output_names is None else Alias(output_names, output_dims), output_indexes=None if output_indexes is None else (output_dims, output_indexes), lower_bounds=lower_bounds, upper_bounds=lower_bounds, main_effects=main_effects, hierarchical_values=hierarchical_values, clustering=None if clustering is None else Obj(clustering, [0])) @property def shape(self): return _compute_shape(self._s.values) @property def values(self): return self._s.values @values.setter def values(self, new_values): self._s.values = new_values @property def base_values(self): return self._s.base_values @base_values.setter def base_values(self, new_base_values): self._s.base_values = new_base_values @property def data(self): return self._s.data @data.setter def data(self, new_data): self._s.data = new_data @property def display_data(self): return self._s.display_data @display_data.setter def display_data(self, new_display_data): if issubclass(type(new_display_data), pd.DataFrame): new_display_data = new_display_data.values self._s.display_data = new_display_data @property def instance_names(self): return self._s.instance_names @property def output_names(self): return self._s.output_names @output_names.setter def output_names(self, new_output_names): self._s.output_names = new_output_names @property def output_indexes(self): return self._s.output_indexes @property def feature_names(self): return self._s.feature_names @feature_names.setter def feature_names(self, new_feature_names): self._s.feature_names = new_feature_names @property def lower_bounds(self): return self._s.lower_bounds @property def upper_bounds(self): return self._s.upper_bounds @property def main_effects(self): return self._s.main_effects @main_effects.setter def main_effects(self, new_main_effects): self._s.main_effects = new_main_effects @property def hierarchical_values(self): return self._s.hierarchical_values @hierarchical_values.setter def hierarchical_values(self, new_hierarchical_values): self._s.hierarchical_values = new_hierarchical_values @property def clustering(self): return self._s.clustering @clustering.setter def clustering(self, new_clustering): self._s.clustering = new_clustering def cohorts(self, cohorts): """ Split this explanation into several cohorts. Parameters ---------- cohorts : int or array If this is an integer then we auto build that many cohorts using a decision tree. If this is an array then we treat that as an array of cohort names/ids for each instance. """ if isinstance(cohorts, int): return _auto_cohorts(self, max_cohorts=cohorts) elif isinstance(cohorts, (list, tuple, np.ndarray)): cohorts = np.array(cohorts) return Cohorts( **{name: self[cohorts == name] for name in np.unique(cohorts)}) else: raise Exception( "The given set of cohort indicators is not recognized! Please give an array or int." ) def __repr__(self): out = ".values =\n" + self.values.__repr__() if self.base_values is not None: out += "\n\n.base_values =\n" + self.base_values.__repr__() if self.data is not None: out += "\n\n.data =\n" + self.data.__repr__() return out def __getitem__(self, item): """ This adds support for magic string indexes like "rank(0)". """ if not isinstance(item, tuple): item = (item, ) # convert any OpChains or magic strings for i, t in enumerate(item): orig_t = t if issubclass(type(t), OpChain): t = t.apply(self) if issubclass( type(t), (np.int64, np.int32)): # because slicer does not like numpy indexes t = int(t) elif issubclass(type(t), np.ndarray): t = [int(v) for v in t ] # slicer wants lists not numpy arrays for indexing elif issubclass(type(t), Explanation): t = t.values elif type(t) is str: if is_1d(self.feature_names): ind = np.where(np.array(self.feature_names) == t)[0][0] t = int(ind) else: new_values = [] new_base_values = [] new_data = [] if self.output_names is not None and ( self.output_names.ndim >= 2 or self.output_names.shape[0] >= 2): new_self = copy.deepcopy(self) for i in range(len(self.values)): for j in range(len(self.output_names[i])): s = self.output_names[i][j] if s == t: new_values.append( np.array(self.values[i][:, j])) new_data.append(np.array(self.data[i])) new_base_values.append( self.base_values[i][j]) new_self = copy.deepcopy(self) new_self.values = np.array(new_values) new_self.base_values = np.array(new_base_values) new_self.data = np.array(new_data) new_self.output_names = t new_self.feature_names = np.array(new_data) new_self.clustering = None else: for i in range(len(self.values)): for s, v, d in zip(self.feature_names[i], self.values[i], self.data[i]): if s == t: new_values.append(v) new_data.append(d) new_self = copy.deepcopy(self) new_self.values = new_values new_self.data = new_data new_self.feature_names = t new_self.clustering = None return new_self if issubclass(type(t), (np.int8, np.int16, np.int32, np.int64)): t = int(t) if t is not orig_t: tmp = list(item) tmp[i] = t item = tuple(tmp) # call slicer for the real work new_self = copy.copy(self) new_self._s = self._s.__getitem__(item) new_self.op_history.append({ "name": "__getitem__", "args": (item, ), "prev_shape": self.shape }) return new_self def __len__(self): return self.shape[0] def __copy__(self): new_exp = Explanation(self.values, self.base_values, self.data, self.display_data, self.instance_names, self.feature_names, self.output_names, self.output_indexes, self.lower_bounds, self.upper_bounds, self.main_effects, self.hierarchical_values, self.clustering) new_exp.op_history = copy.copy(self.op_history) return new_exp def _apply_binary_operator(self, other, binary_op, op_name): new_exp = self.__copy__() new_exp.op_history = copy.copy(self.op_history) new_exp.op_history.append({ "name": op_name, "args": (other, ), "prev_shape": self.shape }) if isinstance(other, Explanation): new_exp.values = binary_op(new_exp.values, other.values) if new_exp.data is not None: new_exp.data = binary_op(new_exp.data, other.data) if new_exp.base_values is not None: new_exp.base_values = binary_op(new_exp.base_values, other.base_values) else: new_exp.values = binary_op(new_exp.values, other) if new_exp.data is not None: new_exp.data = binary_op(new_exp.data, other) if new_exp.base_values is not None: new_exp.base_values = binary_op(new_exp.base_values, other) return new_exp def __add__(self, other): return self._apply_binary_operator(other, operator.add, "__add__") def __radd__(self, other): return self._apply_binary_operator(other, operator.add, "__add__") def __sub__(self, other): return self._apply_binary_operator(other, operator.sub, "__sub__") def __rsub__(self, other): return self._apply_binary_operator(other, operator.sub, "__sub__") def __mul__(self, other): return self._apply_binary_operator(other, operator.mul, "__mul__") def __rmul__(self, other): return self._apply_binary_operator(other, operator.mul, "__mul__") def __truediv__(self, other): return self._apply_binary_operator(other, operator.truediv, "__truediv__") @property def abs(self): new_self = copy.copy(self) new_self.values = np.abs(new_self.values) new_self.op_history.append({"name": "abs", "prev_shape": self.shape}) return new_self def _numpy_func(self, fname, **kwargs): new_self = copy.copy(self) axis = kwargs.get("axis", None) # collapse the slicer to right shape if axis == 0: new_self = new_self[0] elif axis == 1: new_self = new_self[1] elif axis == 2: new_self = new_self[2] if axis in [0, 1, 2]: new_self.op_history = new_self.op_history[: -1] # pop off the slicing operation we just used if self.feature_names is not None and not is_1d( self.feature_names) and axis == 0: new_values = self._flatten_feature_names() new_self.feature_names = np.array(list(new_values.keys())) new_self.values = np.array( [getattr(np, fname)(v, 0) for v in new_values.values()]) new_self.clustering = None else: new_self.values = getattr(np, fname)(np.array(self.values), **kwargs) if new_self.data is not None: try: new_self.data = getattr(np, fname)(np.array(self.data), **kwargs) except: new_self.data = None if new_self.base_values is not None and issubclass( type(axis), int) and len(self.base_values.shape) > axis: new_self.base_values = getattr(np, fname)(self.base_values, **kwargs) elif issubclass(type(axis), int): new_self.base_values = None if axis == 0 and self.clustering is not None and len( self.clustering.shape) == 3: if self.clustering.std(0).sum() < 1e-8: new_self.clustering = self.clustering[0] else: new_self.clustering = None new_self.op_history.append({ "name": fname, "kwargs": kwargs, "prev_shape": self.shape, "collapsed_instances": axis == 0 }) return new_self def mean(self, axis): return self._numpy_func("mean", axis=axis) def max(self, axis): return self._numpy_func("max", axis=axis) def min(self, axis): return self._numpy_func("min", axis=axis) def sum(self, axis=None, grouping=None): if grouping is None: return self._numpy_func("sum", axis=axis) elif axis == 1 or len(self.shape) == 1: return group_features(self, grouping) else: raise Exception( "Only axis = 1 is supported for grouping right now...") # def reshape(self, *args): # return self._numpy_func("reshape", newshape=args) @property def abs(self): return self._numpy_func("abs") @property def identity(self): return self @property def argsort(self): return self._numpy_func("argsort") @property def flip(self): return self._numpy_func("flip") def hclust(self, metric="sqeuclidean", axis=0): """ Computes an optimal leaf ordering sort order using hclustering. hclust(metric="sqeuclidean") Parameters ---------- metric : string A metric supported by scipy clustering. axis : int The axis to cluster along. """ values = self.values if len(values.shape) != 2: raise Exception( "The hclust order only supports 2D arrays right now!") if axis == 1: values = values.T # compute a hierarchical clustering and return the optimal leaf ordering D = sp.spatial.distance.pdist(values, metric) cluster_matrix = sp.cluster.hierarchy.complete(D) inds = sp.cluster.hierarchy.leaves_list( sp.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D)) return inds def sample(self, max_samples, replace=False, random_state=0): """ Randomly samples the instances (rows) of the Explanation object. Parameters ---------- max_samples : int The number of rows to sample. Note that if replace=False then less than fewer than max_samples will be drawn if explanation.shape[0] < max_samples. replace : bool Sample with or without replacement. """ prev_seed = np.random.seed(random_state) inds = np.random.choice(self.shape[0], min(max_samples, self.shape[0]), replace=replace) np.random.seed(prev_seed) return self[list(inds)] def _flatten_feature_names(self): new_values = {} for i in range(len(self.values)): for s, v in zip(self.feature_names[i], self.values[i]): if s not in new_values: new_values[s] = [] new_values[s].append(v) return new_values def _use_data_as_feature_names(self): new_values = {} for i in range(len(self.values)): for s, v in zip(self.data[i], self.values[i]): if s not in new_values: new_values[s] = [] new_values[s].append(v) return new_values def percentile(self, q, axis=None): new_self = copy.deepcopy(self) if self.feature_names is not None and not is_1d( self.feature_names) and axis == 0: new_values = self._flatten_feature_names() new_self.feature_names = np.array(list(new_values.keys())) new_self.values = np.array( [np.percentile(v, q) for v in new_values.values()]) new_self.clustering = None else: new_self.values = np.percentile(new_self.values, q, axis) new_self.data = np.percentile(new_self.data, q, axis) #new_self.data = None new_self.op_history.append({ "name": "percentile", "args": (axis, ), "prev_shape": self.shape, "collapsed_instances": axis == 0 }) return new_self
# Process Raw Data #============================================================================== if args.intype[0]=='raw': if args.interpolate: process_series_files.process_all_in_dir(args.indir[0], join(out_dir,'data')) data_dir = join(out_dir,'data') """ else: #just copy the files print "Copying data files to ", data_dir for csvf in glob.iglob(join(args.indir[0],"*.csv")): shutil.copyfile(csvf, join(data_dir, os.path.basename(csvf))) """ print "Instantiating Slicer and loading series" slicer = Slicer(taskfile=join(data_dir,'task.xls')) filelist=[join(data_dir,f) for f in os.listdir(data_dir) if \ re.compile(".*\.csv").match(f)] num_subjects = len(filelist) slicer.load_series_from_csv('raw', filelist) if args.stats: pp = PdfPages(join(report_dir, 'stats.pdf')) stats.plot_all(slicer, pp) fig, ax = plt.subplots() ax.plot(range(1,num_subjects+1)) plt.title("Number of subjects") pp.savefig(fig) pp.close()