def create_csurf_map(self, map_file):
     file_reader = open(map_file, 'r')
     lines = file_reader.readlines()
     csurf_map = defaultdict()
     for line in lines:
         path, sep, build_name = line.partition('\t')
         csurf_map[path] = build_name.split('\n')[0]
     self.csurf_map = csurf_map
     self.slicer = Slicer()
示例#2
0
    def __init__(
        self,
        values,
        base_values = None,
        data = None,
        display_data = None,
        instance_names = None,
        feature_names = None,
        output_names = None,
        output_indexes = None,
        lower_bounds = None,
        upper_bounds = None,
        main_effects = None,
        hierarchical_values = None,
        clustering = None
    ):
        self.op_history = []

        # cloning. TODO: better cloning :)
        if issubclass(type(values), Explanation):
            e = values
            values = e.values
            base_values = e.base_values
            data = e.data
            
        output_dims = compute_output_dims(values, base_values, data)

        if len(_compute_shape(feature_names)) == 1: # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            if len(values_shape) >= 1 and len(feature_names) == values_shape[0]:
                feature_names = Alias(list(feature_names), 0)
            elif len(values_shape) >= 2 and len(feature_names) == values_shape[1]:
                feature_names = Alias(list(feature_names), 1)
        
        if len(_compute_shape(output_names)) == 1: # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            if len(values_shape) >= 1 and len(output_names) == values_shape[0]:
                output_names = Alias(list(output_names), 0)
            elif len(values_shape) >= 2 and len(output_names) == values_shape[1]:
                output_names = Alias(list(output_names), 1)
                
        self._s = Slicer(
            values = values,
            base_values = None if base_values is None else Obj(base_values, [0] + list(output_dims)),
            data = data,
            display_data = display_data,
            instance_names = None if instance_names is None else Alias(instance_names, 0),
            feature_names = feature_names, 
            output_names =  output_names, # None if output_names is None else Alias(output_names, output_dims),
            output_indexes = None if output_indexes is None else (output_dims, output_indexes),
            lower_bounds = lower_bounds,
            upper_bounds = lower_bounds,
            main_effects = main_effects,
            hierarchical_values = hierarchical_values,
            clustering = None if clustering is None else Obj(clustering, [0])
        )
示例#3
0
def register(fixed_volume_path, moving_volume_path, output_folder,
             params_path):
    output_image_path = r"{}\new_vol.hdf5".format(output_folder)

    moving_image = read_volume(moving_volume_path)
    fixed_image = read_volume(fixed_volume_path)

    # moving_image = normalize_ct_volume(moving_image)
    # fixed_image = normalize_ct_volume(fixed_image)

    # show_histogram(fixed_image, moving_image)

    # run_slicer_functionality({'moving_image': moving_image, 'fixed_image': fixed_image}, int(moving_image.shape[1] / 2))
    # exit()

    # rotated_moving_image = rotate_and_save(moving_image)
    # unrotated = moving_image.copy()
    moving_image = transform_moving_image(moving_image)

    moving_image, fixed_image = crop_volumes(moving_image, fixed_image)

    res_image_array = move_moving_image_to_fixed(moving_image, fixed_image,
                                                 params_path, output_folder)

    copy_and_add_volume(fixed_volume_path, output_image_path, res_image_array)

    all_slicer_images = OrderedDict([
        # ('moving', unrotated),
        ('moving_rotated', moving_image),
        ('moving_result', res_image_array),
        ('fixed', fixed_image)
    ])

    Slicer(all_slicer_images, int(fixed_image.shape[1] / 2)).show()
示例#4
0
    def runSlicer(self):
        """ Launch an instance of Slicer with the current state of vars.

            Function that runs when the user presses the run button """
        # DEBUG:
        self.printSlicerVars()

        num_iters = self.num_slices_entry.get()
        if not num_iters:
            num_iters = self.num_imgs
        else:
            num_iters = int(num_iters)

        slicer = Slicer(
            in_dir = self.curr_dir_lbl.get(),
            out_dir = self.out_dir_lbl.get(),
            img_ext = self.img_ext,
            mode = self.mode.get(),
            reverse = self.reverse.get(),
            curve_depth = self.curve_depth,
            num_slices = num_iters
        )
        self.progress["value"] = 0
        self.progress["maximum"] = num_iters

        slice_thread = threading.Thread(target=slicer.slice)
        slice_thread.daemon = True
        slice_thread.start()

        self.slicer_running = True

        prog_thread = threading.Thread(target=self.watchProgress, args=(slicer, num_iters))
        prog_thread.daemon = True
        prog_thread.start()
 def create_csurf_map(self,map_file):
     file_reader = open(map_file,'r')
     lines = file_reader.readlines()
     csurf_map = defaultdict()
     for line in lines:
         path,sep,build_name = line.partition('\t')
         csurf_map[path] = build_name.split('\n')[0]
     self.csurf_map = csurf_map
     self.slicer = Slicer()
示例#6
0
    def slice(self):
        print "Taints: %s" % scanf_taint.taints
        taints = []
        for _, v in self._hooks.iteritems():
            taints.extend(v.taints)

        target_tmps, target_regs, target_addrs = self._slice_from_last_condition()

        try:
            slicer = Slicer(self._project, self._path, \
                            target_tmps, target_regs, target_addrs, \
                            self._mem_reads, self._mem_writes, taints)
            slicer.slice()
        except SlicerError:
            raise TracerError("Slicer failed")

        sources = self.insts_to_source(sorted(slicer.instructions))
        for line in sources:
            print line
示例#7
0
文件: cowc.py 项目: chris010970/cowc
    def __init__(self, size=(3, 3)):
        """
        placeholder
        """

        # list of train and test directories
        self._annotation_suffix = '_Annotated_Cars.png'

        # 15cm resolution
        self._GSD = 0.15
        self._size = (int(round(
            (size[0] / self._GSD) / 2)), int(round((size[1] / self._GSD) / 2)))

        # xml conversion tweak
        self._custom_item_func = lambda x: 'object'

        # create image slicer
        self._slicer = Slicer()

        return
示例#8
0
    def __init__(self,
                 expected_value,
                 values,
                 data=None,
                 output_shape=tuple(),
                 interaction_order=0,
                 instance_names=None,
                 input_names=None,
                 output_names=None,
                 output_indexes=None,
                 feature_types=None,
                 lower_bounds=None,
                 upper_bounds=None,
                 main_effects=None,
                 hierarchical_values=None,
                 partition_tree=None):

        input_shape = _compute_shape(data)
        values_dims = list(
            range(len(input_shape) + interaction_order + len(output_shape)))
        output_dims = range(
            len(input_shape) + interaction_order, values_dims[-1])

        #main_effects_inds = values_dims[0:len(input_shape)] + values_dims[len(input_shape) + interaction_order:]
        self.output_names = output_names  # TODO: needs to tracked after slicing still

        kwargs_dict = {}
        if lower_bounds is not None:
            kwargs_dict["lower_bounds"] = (values_dims, Slicer(lower_bounds))
        if upper_bounds is not None:
            kwargs_dict["upper_bounds"] = (values_dims, Slicer(upper_bounds))
        if main_effects is not None:
            kwargs_dict["main_effects"] = (values_dims, Slicer(main_effects))
        if output_indexes is not None:
            kwargs_dict["output_indexes"] = (output_dims,
                                             Slicer(output_indexes))
        if output_names is not None:
            kwargs_dict["output_names"] = (output_dims, Slicer(output_names))
        if hierarchical_values is not None:
            kwargs_dict["hierarchical_values"] = (hierarchical_values,
                                                  Slicer(hierarchical_values))
        if partition_tree is not None:
            kwargs_dict["partition_tree"] = (partition_tree,
                                             Slicer(partition_tree))

        super().__init__(data, values, input_shape, output_shape,
                         expected_value, interaction_order, instance_names,
                         input_names, feature_types, **kwargs_dict)
示例#9
0
from slicer import Slicer
import time
import datetime

dir_name = "C:\\Users\\absch\\Desktop\\slicer-test-large\\"
img_ext = ".jpg"

test_timestamp = datetime.datetime.fromtimestamp(
    time.time()).strftime('%Y-%m-%d %H:%M:%S')
f = open("timelog.txt", "a")

begin = time.clock()
slicer = Slicer(dir_name, img_ext, "simple", False)
slicer.slice()
f.write(test_timestamp + "\t\tSimple\t\t" +
        str(round(time.clock() - begin, 4)) + "\n")

begin = time.clock()
convex_slicer = Slicer(dir_name, img_ext, "convex", False, 10)
convex_slicer.slice()
f.write(test_timestamp + "\t\tConvex\t\t" +
        str(round(time.clock() - begin, 4)) + "\n")

begin = time.clock()
concave_slicer = Slicer(dir_name, img_ext, "concave", False, 10)
concave_slicer.slice()
f.write(test_timestamp + "\t\tConcave\t\t" +
        str(round(time.clock() - begin, 4)) + "\n")
示例#10
0
        slicer.extract_rolling_median(seriesname='raw', window_size=ws)
        rm = slicer.series['raw_rolling_median_' + str(ws)][start:end]
        rm_x = [
            int(j.microseconds / 1000)
            for j in [i - rm.index[0] for i in rm.index]
        ]
        rm_y = [i for i in rm]
        #rm.plot(xticks=rm.index)
        plt.plot(rm_x, rm_y)

    plt.legend(['512Hz EEG']+[ 'Window size: %d' % ws \
                                for ws in window_sizes]
                                ,loc='best')
    plt.ylabel(r"Potential ($\mu$V)")
    plt.xlabel(r"Time after stimulus (ms)")
    plt.grid()
    #plt.title('10 Hz rolling median, compared to 512Hz signal')
    ax.set_ylim(ax.get_ylim()[::-1])

    pdfpages.savefig()
    #plt.show() #debug


if __name__ == "__main__":
    slicer = Slicer()
    print 'loading raw from list of csvfiles'
    slicer.load_series_from_csv('raw', sys.argv[1:])
    pp = PdfPages('rolling_median.pdf')
    do_charts(slicer, pp)
    pp.close()
示例#11
0
    #==============================================================================
    #     Process Raw Data
    #==============================================================================
    if args.intype[0] == 'raw':
        if args.interpolate:
            process_series_files.process_all_in_dir(args.indir[0],
                                                    join(out_dir, 'data'))
            data_dir = join(out_dir, 'data')
        """
        else: #just copy the files
            print "Copying data files to ", data_dir
            for csvf in glob.iglob(join(args.indir[0],"*.csv")):
                shutil.copyfile(csvf, join(data_dir, os.path.basename(csvf)))
        """
        print "Instantiating Slicer and loading series"
        slicer = Slicer(taskfile=join(data_dir, 'task.xls'))
        filelist=[join(data_dir,f) for f in os.listdir(data_dir) if \
            re.compile(".*\.csv").match(f)]
        num_subjects = len(filelist)
        slicer.load_series_from_csv('raw', filelist)

        if args.stats:
            pp = PdfPages(join(report_dir, 'stats.pdf'))
            stats.plot_all(slicer, pp)

            fig, ax = plt.subplots()
            ax.plot(range(1, num_subjects + 1))
            plt.title("Number of subjects")
            pp.savefig(fig)
            pp.close()
示例#12
0
        row = []
        for _ in range(0, len(template[row_index])):
            row.append(" ")
        picture.append(row)
    return picture


def add_piece(picture, slices):
    print_picture(picture)
    for i in slices:
        for j in i:
            picture[j[2]][j[1]] = j[0]
            print_picture(picture)


def print_picture(picture):
    os.system('cls' if os.name == 'nt' else 'clear')
    for i in range(0, len(picture)):
        output = ""
        for j in range(0, len(picture[i])):
            output += picture[i][j]
        print(output)


if __name__ == "__main__":
    lib = Library(input("Type your special character: "))
    template = lib.assemble_line(input("Type your phrase: "))
    slicer = Slicer(template)
    slices = slicer.get_pattern()
    picture = get_picture(template)
    add_piece(picture, slices)
示例#13
0
from slicer import Slicer

#dir_name = "C:\\Users\\absch\\Desktop\\Slicer-test\\"
dir_name = "/mnt/c/Users/absch/Desktop/Slicer-test/"
img_ext = ".jpg"

slicer = Slicer(dir_name, img_ext, "simple", reverse=False, num_slices=20)
slicer.slice()

# convex_slicer = Slicer(dir_name, img_ext, "convex", False, 10)
# convex_slicer.slice()
#
# concave_slicer = Slicer(dir_name, img_ext, "concave", False, 10)
# concave_slicer.slice()
class SliceCompare:
    def create_csurf_map(self,map_file):
        file_reader = open(map_file,'r')
        lines = file_reader.readlines()
        csurf_map = defaultdict()
        for line in lines:
            path,sep,build_name = line.partition('\t')
            csurf_map[path] = build_name.split('\n')[0]
        self.csurf_map = csurf_map
        self.slicer = Slicer()
    def merge_data_control_slices(self,benchmark_folder):
        benchmarks = []
        for root,dir,files in os.walk(benchmark_folder):
            #print(dir)
            for item in dir:
                benchmarks.append(root+item)
            break
        f_build_rate = open('assert_build_rate_ds.csv','w')
        f_slice_prop = open('assert_slice_property_ds.csv','w')
        f_build_rate.write('Benchmark,Slices,Size of smallest slice built,Size of largest slice built,Build Rate\n')
        f_slice_prop.write('Benchmark,Slices,Smallest slice size,Largest slice size,Average Slice size,Min procedure count, Max procedure count, Avg procedure count,Inter procedural slices, Inter file slices\n')
        f_result_csv = open('assert_result_ds.csv','w')
        f_result_csv.write('benchmark,slices,avg-data-slice-size,avg-full-slice-size,avg-slice-size,inter-procedural-slices,inter-file-slices,build_rate\n')
        build_rate = defaultdict(int)
        for benchmark in benchmarks:
            statements = 0
            bench_list = ['tj-histo', 'json-c-json-c', 'jonas-tig', 'Cyan4973-zstd', 'Phildo-pixQL', 'kr-beanstalkd', 'joyent-http-parser', 'yrutschle-sslh', 'rui314-8cc', 'udp-json-parser', 'cisco-thor']
            bench_list += [ 'libuv-libuv', 'patjak-bcwc_pcie', 'douban-beansdb', 'droe-sslsplit', 'orangeduck-mpc', 'machinezone-tcpkali', 'wg-wrk', 'karthick18-inception', 'vmg-houdini', 'antirez-disque']
            if benchmark.split('/')[-1] in bench_list:
                self.inter_procedural_slices = 0
                self.slice_size = 0
                self.inter_file_slices = 0
                self.min_slice_size = 0
                self.max_slice_size = 0
                self.min_slice_procedures = 0
                self.max_slice_procedures = 0
                self.avg_slice_procedures = 0
                self.min_built_slice_size = 0
                self.max_built_slice_size = 0

                result_data_files = []
                result_control_files = []
                result_data_files, result_control_files = self.get_slice_files(benchmark)
                statements = len(result_data_files)
                #logger.warn('Number of data files = '+str(len(result_data_files)))
                matching_sets = 0
                self.inter_procedural_slices = 0
                self.inter_file_slices = 0
                self.build_rate = 0
                avg_slice_size = 0
                avg_data_slice_size = 0
                avg_control_slice_size = 0
                if len(result_data_files) > 0 and len(result_control_files) > 0:
                    for data_file in result_data_files:
                        f_data_file = open(data_file,'r')
                        data_slice_lines = f_data_file.readlines()
                        f_data_file.close()
                        data_line_set = set()
                        files_in_slice = set()
                        for line in data_slice_lines:
                            if '.h' not in line:
                                if line not in data_line_set and line.strip() != '':
                                    if line.split('\t')[0] not in files_in_slice:
                                        files_in_slice.add(line.split('\t')[0])
                                    data_line_set.add(line)
                                    #self.get_wrapper_function(line)
                        control_file = data_file.replace('result_assert','result_assert_control')
                        f_control_file = open(control_file,'r')
                        control_slice_lines = f_control_file.readlines()
                        f_control_file.close()
                        control_line_set = set()
                        for line in control_slice_lines:
                            if '.h' not in line:
                                if line not in control_line_set and line.strip() != '':
                                    control_line_set.add(line)
                        logger.info(str(len(data_line_set)))
                        logger.info(str(len(control_line_set)))
                        if data_line_set.issubset(control_line_set):
                            matching_sets += 1
                            merged_slices = self.merge_slices(list(data_line_set),defaultdict(list), list(control_line_set),defaultdict(list),1)
                            
                            self.slice_size += len(merged_slices)
                            avg_slice_size += len(merged_slices)

                            if self.min_slice_size == 0:
                                self.min_slice_size = len(merged_slices)
                            if len(merged_slices) < self.min_slice_size:
                                self.min_slice_size = len(merged_slices)
                            if len(merged_slices) > self.max_slice_size:
                                self.max_slice_size = len(merged_slices)

                            avg_data_slice_size += len(data_line_set)
                            avg_control_slice_size += len(control_line_set)
                            if len(files_in_slice) > 1:
                                self.inter_file_slices += 1
                            logger.critical('Slice size for '+data_file+' :'+str(len(merged_slices)))
                            slice_file_location = self.slicer.get_file_path(data_slice_lines[0])
                            slice_code = self.slicer.get_slice_code(merged_slices)
                            self.slicer.generate_slice_file(slice_code)
                            if self.slicer.build_slice_file(slice_file_location) == True:
                                self.build_rate +=1

                                build_rate[benchmark.split('/')[-1]]+=1
                                if self.min_built_slice_size == 0:
                                    self.min_built_slice_size = len(slice_code)
                                if len(slice_code) < self.min_built_slice_size:
                                    self.min_built_slice_size = len(slice_code)
                                if len(slice_code) > self.max_built_slice_size:
                                    self.max_built_slice_size = len(slice_code)

                        else:
                            logger.warn('set mismatch!!')
                            return 0
                    logger.warn('Data slice is subset of control slice in '+benchmark)
                f_result_csv.write(benchmark.split('/')[-1]+','+str(statements)+','+str(avg_data_slice_size/100)+','+str(avg_control_slice_size/100)+','+str(avg_slice_size/100)+','+str(self.inter_procedural_slices)+','+str(self.inter_file_slices)+','+str(build_rate[benchmark.split('/')[-1]])+'\n')
                f_build_rate.write(benchmark.split('/')[-1]+','+str(statements)+','+str(self.min_built_slice_size)+','+str(self.max_built_slice_size)+','+str(build_rate[benchmark.split('/')[-1]])+'\n')
                f_slice_prop.write(benchmark.split('/')[-1]+','+str(statements)+','+str(self.min_slice_size)+','+str(self.max_slice_size)+','+str(self.slice_size/100)+','+str(self.min_slice_procedures)+','+str(self.max_slice_procedures)+','+str(self.avg_slice_procedures/100)+','+str(self.inter_procedural_slices)+','+str(self.inter_file_slices)+'\n')
        f_result_csv.close()
        f_build_rate.close()
        f_slice_prop.close()

    def merge_slices(self,data_slice_list,data_slice_fns,control_slice_list,control_slice_fns,depth):
        logger.info('data_slice_fns -'+str(data_slice_fns))
        merged_slices = []
        for data_slice in data_slice_list:
            if data_slice.strip() != '' and len(data_slice.split('\t')) == 2:
                function_decl,start_index,end_index = self.get_wrapper_function(data_slice)
                function_name = function_decl.split('(')[0].split(' ')[-1]
                if function_decl != '':
                    data_slice_fns[function_decl] = [function_name,start_index,end_index]
                else:
                    merged_slices.append(data_slice)
        resolve_call_sites = False
        for control_slice in control_slice_list:
            if control_slice.strip() != '' and len(control_slice.split('\t')) == 2:
                function_decl,start_index,end_index= self.get_wrapper_function(control_slice)
                function_name = function_decl.split('(')[0].split(' ')[-1]
                control_slice_fns[function_decl] = [function_name,start_index,end_index]
                if function_decl in data_slice_fns:
                    has_new_call_sites,data_slice_fns = self.get_new_call_sites(control_slice,data_slice_fns,control_slice_fns)
                    if  has_new_call_sites == True:
                        resolve_call_sites = True
                        logger.info('call site found - '+control_slice)
                    merged_slices.append(control_slice)
        if resolve_call_sites == True and depth < 5:
            return self.merge_slices(merged_slices,data_slice_fns,control_slice_list,control_slice_fns,depth+1)
        else:
            self.avg_slice_procedures += len(data_slice_fns)
            if self.min_slice_procedures == 0:
                self.min_slice_proceures = len(data_slice_fns)
            if len(data_slice_fns)< self.min_slice_procedures:
                self.min_slice_procedures = len(data_slice_fns)
            if len(data_slice_fns) > self.max_slice_procedures:
                self.max_slice_procedures = len(data_slice_fns)
            if len(data_slice_fns) > 1:
                self.inter_procedural_slices +=1
            logger.info('control slice fns - '+str(control_slice_fns))
            return merged_slices
    def get_new_call_sites(self,slice_line,data_slice_fns,control_slice_fns):
        keywords = ['if','switch','while']
        file_name = slice_line.split('\t')[0]
        line_number = int(slice_line.split('\t')[1])
        f_cfile = open(file_name,'r')
        lines = f_cfile.readlines()
        is_call_site = False
        line = lines[line_number-1]
        fns_called = []
        if re.search('[a-zA-Z]+\([^\)]*\)(\.[^\)]*\))?',line):
            fn_names  = line.split('(')
            index = 0
            for fn_name in fn_names:
                if index == len(fn_names) -1:
                    break
                temp = ''
                logger.info('splitting '+fn_name)
                for i in range(len(fn_name)-1, 0,-1):
                    if fn_name[i].isalnum() == True or fn_name[i]=='_': 
                        temp = fn_name[i] + temp
                    else:
                        break
                index += 1
                if self.has_any_item(temp,keywords) == False:
                    fns_called.append(temp)
                    is_call_site = True
                    logger.info('call site -'+line)
        has_new_call_sites = False
        if is_call_site == True:
            for fn_called in fns_called:
                for key,value in control_slice_fns.items():
                    if value[0] == fn_called:
                        if key not in data_slice_fns:
                            data_slice_fns[key] = value
                            has_new_call_sites = True
        f_cfile.close()
        return has_new_call_sites,data_slice_fns  
    def get_slice_files(self,benchmark):
        result_data_files = []
        result_control_files = []
        print('Entering benchmark: '+benchmark)
        for root,dir,files in os.walk(benchmark):
            for f in files:
                if f.startswith('result_assert') and 'result_assert_control' not in f:
                    result_data_files.append(root+'/'+f)
                if f.startswith('result_assert_control'):
                    result_control_files.append(root+'/'+f)
        return result_data_files, result_control_files
    def get_wrapper_function(self,slice_line):
        keywords = ['if','switch','while']
        special_char = [';','=','"']
        if len(slice_line.split('\t')) < 2:
            return '',0,0
        file_name = slice_line.split('\t')[0]
        line_number = int(slice_line.split('\t')[1])
        f_cfile = open(file_name,'r')
        lines = f_cfile.readlines()
        line_num = 1
        slice_found = False
        for line in lines:
            if re.search('\w\(',line)  and line.count('(') < 2 and line.count(')') <2:
                if self.has_any_item(line,keywords) == False and self.has_any_item(line,special_char) == False:
                    decl_end_index = self.find_decl_end_index(lines,line_num-1)
                    start,end = self.find_block_limits(lines,line_num-1)
                    if line_number >= start and line_number <= end+1:
                        slice_found = True
                        logger.info('Slice - '+slice_line)
                        logger.info('Slice found in function - '+line)
                        logger.info('limits - '+str(start)+' to '+str(end))
                        return line,start,end
            line_num +=1
        if slice_found == False:
            logger.info('Slice '+slice_line+' not found in any function')
        f_cfile.close()
        return '',0,0
    def has_any_item(self,line,item_list):
        for item in item_list:
            if item in line:
                return True
        return False
    def find_block_limits(self,lines,line_index):
        open_brace_count = 0
        close_brace_count = 0
        start_index = line_index
        end_index = line_index
        begin_found = False
        for i in range(line_index, len(lines)):
            open_brace_count += lines[i].count('{')
            close_brace_count += lines[i].count('}')
            if open_brace_count == 1 and begin_found != True:
                start_index = i
                begin_found = True
            if open_brace_count>0 and open_brace_count == close_brace_count:
                end_index = i
                break
        return start_index,end_index
    def find_decl_end_index(self,lines,line_number):
        for i in range(line_number,len(lines)):
            if ')' in lines[i]:
                return i;
    def create_control_slice(self,benchmark_folder):
        logger.warn(self.csurf_map)
        benchmarks = []
        for root,dir,files in os.walk(benchmark_folder):
            #print(dir)
            for item in dir:
                benchmarks.append(root+item)
            break
        for benchmark in benchmarks:
            f_used_in = open(benchmark+'/used_input.txt','r')
            for line in f_used_in.readlines():
                f_csurf_in = open(benchmark+'/input.txt','w')
                f_csurf_in.write(line)
                f_csurf_in.close()
                logger.warn(line)
                cfile_name = line.split(':')[0].split('/')[-1]
                line_num =  int(line.split(':')[1])
                try:
                    command = 'csurf -nogui -l /home/nishanth/Workspace/PyHelium/csurf/plugin '+benchmark+'/myproj'
                    #print('Running cmd - '+command)
                    p = Popen(command,shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE)
                    response,_ = p.communicate(input=None)
                    response = response.decode('utf8')
                    print(response)
                    #p.kill()
                except (Exception) as e:
                    p.kill()
                    print(e)
                response_lines = response.split('\n')
                for i in range(0,len(response_lines)):
                    if 'Slice set size' in response_lines[i]:
                        slice_set_size = int(response_lines[i].split(':')[1].strip())
                        #print('Slize set = '+str(slice_set_size))
                        if(slice_set_size > 0):
                            #add result set to result file.
                            f_result_in = open(benchmark+'/'+'result_assert_control'+cfile_name+str(line_num)+'.txt','w')
                            for j in range(i+1,len(response_lines)):
                                f_result_in.write(response_lines[j]+'\n')
                            f_result_in.close()
            f_used_in.close()
示例#15
0
    start = pd.to_datetime('2010-12-13 13:54:10.5-05:00')
    end = pd.to_datetime('2010-12-13 13:54:11.5-05:00')
    
    window_sizes = [32, 64, 128]
    raw = slicer.series['raw'][start:end]
    raw.plot()
    
    for ws in window_sizes:
        slicer.extract_rolling_median(seriesname = 'raw', window_size = ws)
        rm = slicer.series['raw_rolling_median_' + str(ws)][start:end]
        rm.plot(xticks=[i for i in rm.index])
    
    plt.legend(['512Hz EEG']+[ 'Rolling Median %d window size' % ws \
                                for ws in window_sizes]
                                ,loc='best')
    plt.ylabel(r"Potential ($\mu$V)")
    plt.xlabel(r"Time ($\mu$Sec)")
    #plt.title('10 Hz rolling median, compared to 512Hz signal')
    ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%S.%f'))
    ax.set_ylim(ax.get_ylim()[::-1])
    pdfpages.savefig()
    #plt.show()
   
if __name__=="__main__":
    slicer = Slicer()
    print 'loading raw from list of csvfiles'
    slicer.load_series_from_csv('raw', sys.argv[1:])
    pp = PdfPages('rolling_median.pdf')
    do_charts(slicer, pp)
    pp.close()
示例#16
0
def Compute3DDice(PID: Union[int, List[int]],
                  netparams: str,
                  patchsize: int,
                  batch: int = 10,
                  bydim: int = 1,
                  doeval: bool = True,
                  dev: str = 'cpu',
                  step: int = 0,
                  saveout: bool = False,
                  savename: str = 'x') -> List[float]:
    #OBS: in case of deepmed, patchsize means the size of output patch!
    #(i.e. if patchsize=9, the input to network will be 25x25) <-but this done in the code
    #step = in what steps you take patches. if ==0, you take nonoverlapping ones. if K, patch starts
    # at prev_patch_start+K.
    #saveout = whether we save the ful subject output. (for viewing and debugging)

    # GET NET:
    net, in1, in2, in3D = getNetwork(netparams, dev)
    if doeval:
        net.eval()
    else:
        net.train()
    device = torch.device(dev)
    print('Net loaded.')
    # CUT AND EVAL: loop through cutting smaller pieces, moving to torch and eval
    if isinstance(PID, int):
        PID = [PID]
    segmented = torch.zeros((1, 7), device=dev)
    existing = torch.zeros((1, 7), device=dev)
    intersec = torch.zeros(
        (1, 7), device=dev
    )  #these three needed to gather results, for post dice compute
    Dices = torch.zeros((len(PID), 7), device=dev)
    axes = [0, 2, 3] + ([4] if in3D else [])

    #set the right function to use
    TensorCropping = CenterCropTensor3d
    padding = [(0, 0), (0, patchsize), (0, patchsize),
               (0, patchsize)]  #(16,patchsize+16)
    paddingall = [(0, 0), (0, patchsize), (0, patchsize),
                  (0, patchsize)]  #(16,patchsize+16)

    if in2:  #deep med, we need to pad the input on all sides to be able to cut pieces as wanted
        paddingall[1:] = [(16 + 8, patchsize + 16 + 8)] * 3
        patchsize = patchsize - 16
        #since patchsize, as it goes into slicer, means the size of network output

    if not in3D:
        padding[bydim + 1] = (0, 0)
        paddingall[bydim + 1] = (0, 0)
        TensorCropping = CenterCropTensor

    # LOAD DATA:
    for idx, pid in enumerate(PID):
        #set accumulators to 0:
        segmented.zero_()
        existing.zero_()
        intersec.zero_()

        allin, gt, mask = loadSubject(pid, patchsize // 2)

        size_full = allin[0].shape  #shape of 3d img, one channel

        mask = np.pad(mask, padding[1:], mode='constant')
        gt = np.pad(gt, padding, mode='constant')
        allin = np.pad(allin, paddingall, mode='constant')
        #  print((size_full, gt.shape))
        empty_subj = torch.zeros(
            gt.shape[1:])  #allin.shape[1:]) #cause we dont need channels

        slicer = Slicer(size_full, patchsize, in1, in2, in3D, bydim,
                        step)  #return string slice, include all channels
        # for cutting out the middle part based on step:
        #slice((sf-step)//2, sf-np.ceil((sf-step)/2))
        slicing = "".join([
            f'.narrow({idx}, {(patchsize-step)//2}, {step})'
            for idx in range(2, (4 + in3D))
        ]) if step > 0 else ""
        paddingup = [0, patchsize - step] * 3
        if not in3D:
            paddingup[-1 - bydim * 2] = 0

        print(f'Eval on subj{pid}...')
        with torch.no_grad():
            while slicer.todo > 0:
                gtslices, in1slices, in2slices = slicer.get_batch(
                    batch)  #multiple slices

                gts = np.stack(list(
                    map(eval, [f'gt[{slajs}]' for slajs in gtslices])),
                               axis=0)
                in1s = np.stack(list(
                    map(eval, [f'allin[{slajs}]' for slajs in in1slices])),
                                axis=0)
                #maske = np.stack([eval(f'mask[{slajs[2:]}]') for slajs in gtslices], axis=0)
                maske = np.stack(list(
                    map(eval, [f'mask[{slajs[2:]}]' for slajs in gtslices])),
                                 axis=0)

                # move to torch:
                target_oh = torch.from_numpy(gts).squeeze().to(device)
                data = [torch.from_numpy(in1s).squeeze().float().to(device)
                        ]  #input 1
                if in2:
                    #in2s = np.stack([eval(f'allin[{slajs}]') for slajs in in2slices], axis=0)
                    in2s = np.stack(list(
                        map(eval, [f'allin[{slajs}]' for slajs in in2slices])),
                                    axis=0)
                    data.append(
                        torch.from_numpy(in2s).squeeze().float().to(
                            device))  #input 2

                #run net on data. get output, save sums in dice gather lists
                out = net(*data).exp()
                target_oh, out = TensorCropping(
                    target_oh, out
                )  #in case of PSP net, might be that output is bigger than input/GT
                #dices = AllDices(out, target_oh)
                maske = torch.from_numpy(maske).squeeze().unsqueeze(
                    1).float().to(device)

                #cut only the middle part of OUT, MASKE and TARGET_OH for eval (depending on the step size)
                maske = eval('maske' + slicing)
                target_oh = eval('target_oh' + slicing)
                out = eval('out' + slicing)

                #when summing up, use only the middle of the patches. Depending on how big 'step' was.
                segmented += torch.sum(out * maske, axis=axes)
                existing += torch.sum(target_oh * maske, axis=axes)
                intersec += torch.sum(target_oh * maske * out, axis=axes)

                #save output if required
                if saveout:  #whats faster, simply saving to an existing tensor, or iffing every loop??
                    for idd, slajs in enumerate(gtslices):
                        tmp = torch.argmax(out[idd, ...], dim=0)
                        if not in3D:
                            tmp = tmp.unsqueeze(bydim)
                    # print(tmp.shape)
                        tmp = torch.nn.functional.pad(tmp, paddingup)
                        eval(f'empty_subj[{slajs[2:]}].copy_(tmp)')

        #all saved, now calc actual dices:
        Dices[idx, :] = 2 * intersec / (existing + segmented
                                        )  #calc dice from the gathering lists
        if saveout:
            #save img as npy.
            np.save(f'out{pid}_{savename}.npy', empty_subj.cpu().numpy())

    print('Done.')
    #pidis = [int(p) for p in PIDS]
    dices = np.concatenate((np.array(PID)[:, None], Dices.cpu().numpy()),
                           axis=1)
    np.save(f'dices_{savename}.npy', dices)
    return dices
示例#17
0
    def __init__(  # pylint: disable=too-many-arguments
            self,
            values,
            base_values=None,
            data=None,
            display_data=None,
            instance_names=None,
            feature_names=None,
            output_names=None,
            output_indexes=None,
            lower_bounds=None,
            upper_bounds=None,
            error_std=None,
            main_effects=None,
            hierarchical_values=None,
            clustering=None,
            compute_time=None):
        self.op_history = []

        self.compute_time = compute_time

        # cloning. TODOsomeday: better cloning :)
        if issubclass(type(values), Explanation):
            e = values
            values = e.values
            base_values = e.base_values
            data = e.data

        self.output_dims = compute_output_dims(values, base_values, data,
                                               output_names)
        values_shape = _compute_shape(values)

        if output_names is None and len(self.output_dims) == 1:
            output_names = [
                f"Output {i}" for i in range(values_shape[self.output_dims[0]])
            ]

        if len(
                _compute_shape(feature_names)
        ) == 1:  # TODOsomeday: should always be an alias once slicer supports per-row aliases
            if len(values_shape) >= 1 and len(
                    feature_names) == values_shape[0]:
                feature_names = Alias(list(feature_names), 0)
            elif len(values_shape) >= 2 and len(
                    feature_names) == values_shape[1]:
                feature_names = Alias(list(feature_names), 1)

        if len(
                _compute_shape(output_names)
        ) == 1:  # TODOsomeday: should always be an alias once slicer supports per-row aliases
            output_names = Alias(list(output_names), self.output_dims[0])
            # if len(values_shape) >= 1 and len(output_names) == values_shape[0]:
            #     output_names = Alias(list(output_names), 0)
            # elif len(values_shape) >= 2 and len(output_names) == values_shape[1]:
            #     output_names = Alias(list(output_names), 1)

        if output_names is not None and not isinstance(output_names, Alias):
            l = len(_compute_shape(output_names))
            if l == 0:
                pass
            elif l == 1:
                output_names = Obj(output_names, self.output_dims)
            elif l == 2:
                output_names = Obj(output_names, [0] + list(self.output_dims))
            else:
                raise ValueError(
                    "shap.Explanation does not yet support output_names of order greater than 3!"
                )

        if not hasattr(base_values, "__len__") or len(base_values) == 0:
            pass
        elif len(_compute_shape(base_values)) == len(self.output_dims):
            base_values = Obj(base_values, list(self.output_dims))
        else:
            base_values = Obj(base_values, [0] + list(self.output_dims))

        self._s = Slicer(
            values=values,
            base_values=base_values,
            data=list_wrap(data),
            display_data=list_wrap(display_data),
            instance_names=None if instance_names is None else Alias(
                instance_names, 0),
            feature_names=feature_names,
            output_names=output_names,
            output_indexes=None if output_indexes is None else
            (self.output_dims, output_indexes),
            lower_bounds=list_wrap(lower_bounds),
            upper_bounds=list_wrap(upper_bounds),
            error_std=list_wrap(error_std),
            main_effects=list_wrap(main_effects),
            hierarchical_values=list_wrap(hierarchical_values),
            clustering=None if clustering is None else Obj(clustering, [0]))
示例#18
0
文件: cowc.py 项目: chris010970/cowc
class Cowc:
    def __init__(self, size=(3, 3)):
        """
        placeholder
        """

        # list of train and test directories
        self._annotation_suffix = '_Annotated_Cars.png'

        # 15cm resolution
        self._GSD = 0.15
        self._size = (int(round(
            (size[0] / self._GSD) / 2)), int(round((size[1] / self._GSD) / 2)))

        # xml conversion tweak
        self._custom_item_func = lambda x: 'object'

        # create image slicer
        self._slicer = Slicer()

        return

    def process(self, data_path, out_path):
        """
        create images and annotations for train and validation
        """

        # for each subset
        for subset in ['train', 'test']:

            # locate all images in data path
            path = os.path.join(data_path, subset)
            files = glob.glob(os.path.join(os.path.join(path, '**'), '*.png'),
                              recursive=True)
            files = [x for x in files if 'Annotated' not in x]

            # slice up images
            for f in files:

                slices = self._slicer.process(
                    f, os.path.join(out_path, '{}/images'.format(subset)))

                # check annotation image exists
                pathname = os.path.join(
                    f.replace('.png', self._annotation_suffix))
                if os.path.exists(pathname):

                    # create PASCAL VOC schema for each image slice
                    annotation_image = cv2.imread(pathname)
                    for s in slices:
                        self.getAnnotation(
                            s, annotation_image,
                            os.path.join(out_path,
                                         '{}/annotations'.format(subset)))

        return

    def getAnnotation(self,
                      s,
                      annotation_image,
                      out_path,
                      writeback=False,
                      overwrite=True):
        """
        create annotation xml files encoding bounding box locations
        """

        # create label pathname
        filename = os.path.splitext(os.path.basename(
            s['pathname']))[0] + '.xml'
        annotation_pathname = os.path.join(out_path, filename)

        if not os.path.exists(annotation_pathname) or overwrite:

            # get bounding boxes for cars in aoi
            results, label_locs = self.getBoundingBoxes(s, annotation_image)
            schema = self.getSchema(s, results)

            # create output dir if necessary
            if not os.path.exists(out_path):
                os.makedirs(out_path)

            # write annotation to xml file
            with open(os.path.join(out_path, filename), "w+") as outfile:

                # parse xml into string
                xml = dicttoxml.dicttoxml( schema, attr_type=False, item_func=self._custom_item_func, custom_root='annotation' ) \
                        .replace(b'<annotation>',b'<annotation verified="yes">') \
                        .replace(b'<items>',b'').replace(b'</items>',b'') \

                dom = parseString(xml)

                # write xml string to file
                outfile.write(dom.toprettyxml())

            # plot writeback
            if writeback:
                self.drawBoundingBoxes(s['pathname'], results)

        return

    def getBoundingBoxes(self, s, annotation_image, heading='fixed'):
        """
        extract bounding boxes around car locations from annotation image
        """

        # process each slice
        records = []

        # extract window from annotation image
        x0 = s['x0']
        y0 = s['y0']
        window = annotation_image[y0:y0 + s['height'], x0:x0 + s['width']]

        # find locations of non-zero pixels - add zero rotation column
        label_locs = np.where(window > 0)
        label_locs = np.transpose(
            np.vstack(
                [label_locs[0], label_locs[1],
                 np.zeros(len(label_locs[0]))]))

        if label_locs.size > 0:

            # create bounding box for annotated car locations
            for loc in label_locs:
                record = self.getBoundingBox(loc, window.shape)

                # ignore annotated objects close to image edge
                if record:
                    records.append(record)

        return records, label_locs

    def getBoundingBox(self, loc, dims):
        """
        placeholder
        """

        # extrapolate bbox from centroid coords
        record = {}
        yc, xc, angle = loc

        # compute pts along vertical line rotated at mid point
        x0_r, y0_r = self.rotatePoint(xc, yc + self._size[1], xc, yc,
                                      math.radians(angle))
        x1_r, y1_r = self.rotatePoint(xc, yc - self._size[1], xc, yc,
                                      math.radians(angle))

        # compute corner pts orthogonal to rotated line end points
        corner = np.empty((4, 2), float)

        corner[0] = self.rotatePoint(x0_r, y0_r + self._size[0], x0_r, y0_r,
                                     math.radians(angle + 90.0))
        corner[1] = self.rotatePoint(x0_r, y0_r - self._size[0], x0_r, y0_r,
                                     math.radians(angle + 90.0))

        corner[2] = self.rotatePoint(x1_r, y1_r + self._size[0], x1_r, y1_r,
                                     math.radians(angle + 90.0))
        corner[3] = self.rotatePoint(x1_r, y1_r - self._size[0], x1_r, y1_r,
                                     math.radians(angle + 90.0))

        # get min and max coordinates for bbox
        x_min = np.amin(corner[:, 0])
        x_max = np.amax(corner[:, 0])
        y_min = np.amin(corner[:, 1])
        y_max = np.amax(corner[:, 1])

        # check limits
        x_min_c = max(0, x_min)
        y_min_c = max(0, y_min)
        x_max_c = min(x_max, dims[1] - 1)
        y_max_c = min(y_max, dims[0] - 1)

        area = (x_max - x_min) * (y_max - y_min)
        area_c = (x_max_c - x_min_c) * (y_max_c - y_min_c)

        # only retain bboxes not constrained by image edges
        if area_c / area > 0.95:

            record['bbox'] = [x_min_c, y_min_c, x_max_c, y_max_c]

            # readjust perimeter points
            corner[:, 0] = np.where(corner[:, 0] < 0.0, 0.0, corner[:, 0])
            corner[:, 0] = np.where(corner[:, 0] > dims[1] - 1, dims[1] - 1,
                                    corner[:, 0])

            corner[:, 1] = np.where(corner[:, 1] < 0.0, 0.0, corner[:, 1])
            corner[:, 1] = np.where(corner[:, 1] > dims[0] - 1, dims[0] - 1,
                                    corner[:, 1])

            # minimise distance between points
            d1 = np.linalg.norm(corner[1] - corner[2])
            d2 = np.linalg.norm(corner[1] - corner[3])
            if d1 > d2:
                corner[[2, 3]] = corner[[3, 2]]

            record['corner'] = list(corner.flatten())

        return record

    def rotatePoint(self, x, y, xc, yc, angle):
        """
        compute rotation of point around origin
        """

        # Rotate point counterclockwise by a given angle around a given origin.
        qx = xc + math.cos(angle) * (x - xc) - math.sin(angle) * (y - yc)
        qy = yc + math.sin(angle) * (x - xc) + math.cos(angle) * (y - yc)

        return qx, qy

    def getSchema(self, s, records):
        """
        convert annotation into ordered list for conversion into PASCAL VOC schema
        """

        # convert to PASCAL VOC annotation schema
        object_list = []
        for record in records:

            bbox = record['bbox']
            #corner = record[ 'corner' ]
            object_list.append(
                OrderedDict({
                    'name': 'car',
                    'pose': 'Topdown',
                    'truncated': 0,
                    'difficult': 0,
                    'bndbox': {
                        'xmin': bbox[0],
                        'ymin': bbox[1],
                        'xmax': bbox[2],
                        'ymax': bbox[3]
                    }
                    #'segmentation' : ','.join( (str(pt) for pt in corner ) )
                }))

        # return full schema as dictionary
        return OrderedDict({
            'folder': 'images',
            'filename': os.path.basename(s['pathname']),
            'path': os.path.dirname(s['pathname']),
            'source': {
                'database': 'cowc'
            },
            'size': {
                'width': s['width'],
                'height': s['height'],
                'depth': 3
            },
            'segmented': 0,
            'items': object_list
        })

    def drawBoundingBoxes(self, pathname, records):
        """
        placeholder
        """

        # no action if no bboxes
        if len(records) > 0:

            # load image
            img = cv2.imread(pathname)
            height = img.shape[0]
            width = img.shape[1]

            # show image
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            ax = plt.gca()
            fig = plt.gcf()
            fig.canvas.set_window_title(os.path.basename(pathname))
            print(pathname)

            # draw bbox lines
            colors = ['r', 'g', 'y', 'b', 'm', 'c']
            idx = 0
            for record in records:

                x0, y0, x1, y1 = record['bbox']

                color = colors[idx] + '-'
                idx = idx + 1 if idx + 1 < len(colors) else 0

                ax.plot([x0, x1], [y0, y0], color)
                ax.plot([x0, x1], [y1, y1], color)
                ax.plot([x0, x0], [y0, y1], color)
                ax.plot([x1, x1], [y0, y1], color)
                """
                # get run length encoding from perimeter points string
                rl_encoding = mask.frPyObjects( [ record[ 'corner' ] ] , height, width )

                binary_mask = mask.decode( rl_encoding )
                binary_mask = np.amax(binary_mask, axis=2)

                masked = np.ma.masked_where(binary_mask == 0, binary_mask )
                ax.imshow( masked, 'jet', interpolation='None', alpha=0.5 )
                """

            plt.show()

        return
示例#19
0
    def __init__(self,
                 expected_value,
                 values,
                 data=None,
                 output_shape=tuple(),
                 interaction_order=0,
                 instance_names=None,
                 input_names=None,
                 output_names=None,
                 output_indexes=None,
                 feature_types=None,
                 lower_bounds=None,
                 upper_bounds=None,
                 main_effects=None,
                 hierarchical_values=None,
                 original_rows=None,
                 clustering=None):
        self.transform_history = []

        input_shape = _compute_shape(data)

        # trim any trailing None shapes since we don't want slicer to try and use those
        if len(input_shape) > 0 and input_shape[-1] is None:
            input_shape = input_shape[:-1]

        values_dims = list(
            range(len(input_shape) + interaction_order + len(output_shape)))
        output_dims = range(
            len(input_shape) + interaction_order, values_dims[-1])

        #main_effects_inds = values_dims[0:len(input_shape)] + values_dims[len(input_shape) + interaction_order:]
        self.output_names = output_names  # TODO: needs to tracked after slicing still

        kwargs_dict = {}
        if lower_bounds is not None:
            kwargs_dict["lower_bounds"] = (values_dims, Slicer(lower_bounds))
        if upper_bounds is not None:
            kwargs_dict["upper_bounds"] = (values_dims, Slicer(upper_bounds))
        if main_effects is not None:
            kwargs_dict["main_effects"] = (values_dims, Slicer(main_effects))
        if output_indexes is not None:
            kwargs_dict["output_indexes"] = (output_dims,
                                             Slicer(output_indexes))
        if output_names is not None:
            kwargs_dict["output_names"] = (output_dims, Slicer(output_names))
        if hierarchical_values is not None:
            kwargs_dict["hierarchical_values"] = (values_dims,
                                                  Slicer(hierarchical_values))
        if input_names is not None:
            if not is_1d(input_names):
                input_name_dims = values_dims
            else:
                input_name_dims = values_dims[1:]
            kwargs_dict["input_names"] = (input_name_dims, Slicer(input_names))
        if original_rows is not None:
            kwargs_dict["original_rows"] = (values_dims[1:],
                                            Slicer(original_rows))
        if clustering is not None:
            kwargs_dict["clustering"] = ([0], Slicer(clustering))
        if expected_value is not None:
            ndims = len(getattr(expected_value, "shape", []))
            if ndims == len(values_dims):
                kwargs_dict["expected_value"] = (values_dims,
                                                 Slicer(expected_value))
            elif ndims == len(values_dims) - 1:
                kwargs_dict["expected_value"] = (values_dims[1:],
                                                 Slicer(expected_value))
            else:
                raise Exception(
                    "The shape of the passed expected_value does not match the shape of the passed values!"
                )
        # if clustering is not None:
        #     self.clustering = clustering

        super().__init__(data, values, input_shape, output_shape,
                         expected_value, interaction_order, instance_names,
                         input_names, feature_types, **kwargs_dict)
示例#20
0
class Explanation(object, metaclass=MetaExplanation):
    """ This is currently an experimental feature don't depend on this object yet! :)
    """
    def __init__(self,
                 values,
                 base_values=None,
                 data=None,
                 display_data=None,
                 instance_names=None,
                 feature_names=None,
                 output_names=None,
                 output_indexes=None,
                 lower_bounds=None,
                 upper_bounds=None,
                 main_effects=None,
                 hierarchical_values=None,
                 clustering=None):
        self.transform_history = []

        # cloning. TODO: better cloning :)
        if issubclass(type(values), Explanation):
            e = values
            values = e.values
            base_values = e.base_values
            data = e.data

        output_dims = compute_output_dims(values, base_values, data)

        if len(
                _compute_shape(feature_names)
        ) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            if len(values_shape) >= 1 and len(
                    feature_names) == values_shape[0]:
                feature_names = Alias(feature_names, 0)
            elif len(values_shape) >= 2 and len(
                    feature_names) == values_shape[1]:
                feature_names = Alias(feature_names, 1)

        self._s = Slicer(
            values=values,
            base_values=base_values,
            data=data,
            display_data=display_data,
            instance_names=None if instance_names is None else Alias(
                instance_names, 0),
            feature_names=feature_names,
            output_names=None if output_names is None else Alias(
                output_names, output_dims),
            output_indexes=None if output_indexes is None else
            (output_dims, output_indexes),
            lower_bounds=lower_bounds,
            upper_bounds=lower_bounds,
            main_effects=main_effects,
            hierarchical_values=
            hierarchical_values,  #Obj(hierarchical_values, (0,None)),
            clustering=clustering)

    @property
    def shape(self):
        return _compute_shape(self._s.values)

    @property
    def values(self):
        return self._s.values

    @values.setter
    def values(self, new_values):
        self._s.values = new_values

    @property
    def base_values(self):
        return self._s.base_values

    @base_values.setter
    def base_values(self, new_base_values):
        self._s.base_values = new_base_values

    @property
    def data(self):
        return self._s.data

    @data.setter
    def data(self, new_data):
        self._s.data = new_data

    @property
    def display_data(self):
        return self._s.display_data

    @display_data.setter
    def display_data(self, new_display_data):
        self._s.display_data = new_display_data

    @property
    def instance_names(self):
        return self._s.instance_names

    @property
    def output_names(self):
        return self._s.output_names

    @property
    def output_indexes(self):
        return self._s.output_indexes

    @property
    def feature_names(self):
        return self._s.feature_names

    @feature_names.setter
    def feature_names(self, new_feature_names):
        self._s.feature_names = new_feature_names

    @property
    def lower_bounds(self):
        return self._s.lower_bounds

    @property
    def upper_bounds(self):
        return self._s.upper_bounds

    @property
    def main_effects(self):
        return self._s.main_effects

    @main_effects.setter
    def main_effects(self, new_main_effects):
        self._s.main_effects = new_main_effects

    @property
    def hierarchical_values(self):
        return self._s.hierarchical_values

    @hierarchical_values.setter
    def hierarchical_values(self, new_hierarchical_values):
        self._s.hierarchical_values = new_hierarchical_values

    @property
    def clustering(self):
        return self._s.clustering

    @clustering.setter
    def clustering(self, new_clustering):
        self._s.clustering = new_clustering

    def __repr__(self):
        out = ".values =\n" + self.values.__repr__()
        if self.base_values is not None:
            out += "\n\n.base_values =\n" + self.base_values.__repr__()
        if self.data is not None:
            out += "\n\n.data =\n" + self.data.__repr__()
        return out

    def __getitem__(self, item):
        """ This adds support for magic string indexes like "rank(0)".
        """
        if not isinstance(item, tuple):
            item = (item, )

        # convert any OpChains or magic strings
        for i, t in enumerate(item):
            orig_t = t
            if issubclass(type(t), OpChain):
                t = t.apply(self)
                if issubclass(
                        type(t),
                    (np.int64,
                     np.int32)):  # because slicer does not like numpy indexes
                    t = int(t)
                elif issubclass(type(t), np.ndarray):
                    t = [int(v) for v in t
                         ]  # slicer wants lists not numpy arrays for indexing
            elif issubclass(type(t), Explanation):
                t = t.values
            elif type(t) is str:
                if is_1d(self.feature_names):
                    ind = np.where(np.array(self.feature_names) == t)[0][0]
                    t = int(ind)
                else:
                    new_values = []
                    new_data = []
                    for i in range(len(self.values)):
                        for s, v, d in zip(self.feature_names[i],
                                           self.values[i], self.data[i]):
                            if s == t:
                                new_values.append(v)
                                new_data.append(d)
                    new_self = copy.deepcopy(self)
                    new_self.values = new_values
                    new_self.data = new_data
                    new_self.feature_names = t
                    new_self.clustering = None
                    return new_self
            if issubclass(type(t), np.ndarray):
                t = [int(j) for j in t]
            elif issubclass(type(t), (np.int8, np.int16, np.int32, np.int64)):
                t = int(t)

            if t is not orig_t:
                tmp = list(item)
                tmp[i] = t
                item = tuple(tmp)

        # call slicer for the real work
        new_self = copy.copy(self)
        new_self.transform_history.append(("__getitem__", (item, )))
        new_self._s = self._s.__getitem__(item)

        return new_self

    def __len__(self):
        return self.shape[0]

    def __copy__(self):
        return Explanation(self.values, self.base_values, self.data,
                           self.display_data, self.instance_names,
                           self.feature_names, self.output_names,
                           self.output_indexes, self.lower_bounds,
                           self.upper_bounds, self.main_effects,
                           self.hierarchical_values, self.clustering)

    @property
    def abs(self):
        new_self = copy.copy(self)
        new_self.values = np.abs(new_self.values)
        new_self.transform_history.append(("abs", None))
        return new_self

    def _numpy_func(self, fname, **kwargs):
        new_self = copy.copy(self)
        axis = kwargs.get("axis", None)

        # collapse the slicer to right shape
        if axis == 0:
            new_self = new_self[0]
        elif axis == 1:
            new_self = new_self[1]
        elif axis == 2:
            new_self = new_self[2]

        if self.feature_names is not None and not is_1d(
                self.feature_names) and axis == 0:
            new_values = self._flatten_feature_names()
            new_self.feature_names = np.array(list(new_values.keys()))
            new_self.values = np.array(
                [getattr(np, fname)(v) for v in new_values.values()])
            new_self.clustering = None
        else:
            new_self.values = getattr(np, fname)(np.array(self.values),
                                                 **kwargs)
            if new_self.data is not None:
                try:
                    new_self.data = getattr(np, fname)(np.array(self.data),
                                                       **kwargs)
                except:
                    new_self.data = None
            if new_self.base_values is not None and issubclass(
                    type(axis), int) and len(self.base_values.shape) > axis:
                new_self.base_values = getattr(np, fname)(self.base_values,
                                                          **kwargs)
            elif issubclass(type(axis), int):
                new_self.base_values = None

        if axis == 0 and self.clustering is not None and len(
                self.clustering.shape) == 3:
            if self.clustering.std(0).sum() < 1e-8:
                new_self.clustering = self.clustering[0]
            else:
                new_self.clustering = None

        new_self.transform_history.append((fname, kwargs))

        return new_self

    def mean(self, axis):
        return self._numpy_func("mean", axis=axis)

    def max(self, axis):
        return self._numpy_func("max", axis=axis)

    def min(self, axis):
        return self._numpy_func("min", axis=axis)

    def sum(self, axis):
        return self._numpy_func("sum", axis=axis)

    @property
    def abs(self):
        return self._numpy_func("abs")

    @property
    def argsort(self):
        return self._numpy_func("argsort")

    @property
    def flip(self):
        return self._numpy_func("flip")

    def hclust(self, metric="sqeuclidean", axis=0):
        """ Computes an optimal leaf ordering sort order using hclustering.
        
        hclust(metric="sqeuclidean")
        
        Parameters
        ----------
        metric : string
            A metric supported by scipy clustering.

        axis : int
            The axis to cluster along.
        """
        values = self.values

        if len(values.shape) != 2:
            raise Exception(
                "The hclust order only supports 2D arrays right now!")

        if axis == 1:
            values = values.T

        # compute a hierarchical clustering and return the optimal leaf ordering
        D = sp.spatial.distance.pdist(values, metric)
        cluster_matrix = sp.cluster.hierarchy.complete(D)
        inds = sp.cluster.hierarchy.leaves_list(
            sp.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D))
        return inds

    def sample(self, max_samples, replace=False, random_state=0):
        """ Randomly samples the instances (rows) of the Explanation object.

        Parameters
        ----------
        max_samples : int
            The number of rows to sample. Note that if replace=False then less than
            fewer than max_samples will be drawn if explanation.shape[0] < max_samples.
        
        replace : bool
            Sample with or without replacement.
        """
        prev_seed = np.random.seed(random_state)
        inds = np.random.choice(self.shape[0],
                                min(max_samples, self.shape[0]),
                                replace=replace)
        np.random.seed(prev_seed)
        return self[list(inds)]

    def _flatten_feature_names(self):
        new_values = {}
        for i in range(len(self.values)):
            for s, v in zip(self.feature_names[i], self.values[i]):
                if s not in new_values:
                    new_values[s] = []
                new_values[s].append(v)
        return new_values

    def _use_data_as_feature_names(self):
        new_values = {}
        for i in range(len(self.values)):
            for s, v in zip(self.data[i], self.values[i]):
                if s not in new_values:
                    new_values[s] = []
                new_values[s].append(v)
        return new_values

    def percentile(self, q, axis=None):
        new_self = copy.deepcopy(self)
        if self.feature_names is not None and not is_1d(
                self.feature_names) and axis == 0:
            new_values = self._flatten_feature_names()
            new_self.feature_names = np.array(list(new_values.keys()))
            new_self.values = np.array(
                [np.percentile(v, q) for v in new_values.values()])
            new_self.clustering = None
        else:
            new_self.values = np.percentile(new_self.values, q, axis)
            new_self.data = np.percentile(new_self.data, q, axis)
        #new_self.data = None
        new_self.transform_history.append(("percentile", (axis, )))
        return new_self
示例#21
0
    def __init__(
        self,
        values,
        base_values=None,
        data=None,
        display_data=None,
        instance_names=None,
        feature_names=None,
        output_names=None,
        output_indexes=None,
        lower_bounds=None,
        upper_bounds=None,
        main_effects=None,
        hierarchical_values=None,
        clustering=None,
        interactions=None,
        feature_groups=None,
    ):
        self.op_history = []

        # cloning. TODO: better cloning :)
        if issubclass(type(values), Explanation):
            e = values
            values = e.values
            base_values = e.base_values
            data = e.data

        output_dims = compute_output_dims(values, base_values, data)

        if len(
                _compute_shape(feature_names)
        ) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            if len(values_shape) >= 1 and len(
                    feature_names) == values_shape[0]:
                feature_names = Alias(list(feature_names), 0)
            elif len(values_shape) >= 2 and len(
                    feature_names) == values_shape[1]:
                feature_names = Alias(list(feature_names), 1)

        if len(
                _compute_shape(output_names)
        ) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            output_names = Alias(list(output_names), output_dims[0])
            # if len(values_shape) >= 1 and len(output_names) == values_shape[0]:
            #     output_names = Alias(list(output_names), 0)
            # elif len(values_shape) >= 2 and len(output_names) == values_shape[1]:
            #     output_names = Alias(list(output_names), 1)

        if output_names is not None and not isinstance(output_names, Alias):
            l = len(_compute_shape(output_names))
            if l == 0:
                pass
            elif l == 1:
                output_names = Obj(output_names, output_dims)
            elif l == 2:
                output_names = Obj(output_names, [0] + list(output_dims))
            else:
                raise ValueError(
                    "shap.Explanation does not yet support output_names of order greater than 3!"
                )

        self._s = Slicer(
            values=values,
            base_values=None if base_values is None else Obj(
                base_values, [0] + list(output_dims)),
            data=data,
            display_data=display_data,
            instance_names=None if instance_names is None else Alias(
                instance_names, 0),
            feature_names=feature_names,
            output_names=output_names,
            output_indexes=None if output_indexes is None else
            (output_dims, output_indexes),
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            main_effects=main_effects,
            hierarchical_values=hierarchical_values,
            clustering=None if clustering is None else Obj(clustering, [0]),
            interactions=interactions,
            feature_groups=feature_groups)
class SliceCompare:
    def create_csurf_map(self, map_file):
        file_reader = open(map_file, 'r')
        lines = file_reader.readlines()
        csurf_map = defaultdict()
        for line in lines:
            path, sep, build_name = line.partition('\t')
            csurf_map[path] = build_name.split('\n')[0]
        self.csurf_map = csurf_map
        self.slicer = Slicer()

    def merge_data_control_slices(self, benchmark_folder):
        benchmarks = []
        for root, dir, files in os.walk(benchmark_folder):
            #print(dir)
            for item in dir:
                benchmarks.append(root + item)
            break
        f_build_rate = open('assert_build_rate_ds.csv', 'w')
        f_slice_prop = open('assert_slice_property_ds.csv', 'w')
        f_build_rate.write(
            'Benchmark,Slices,Size of smallest slice built,Size of largest slice built,Build Rate\n'
        )
        f_slice_prop.write(
            'Benchmark,Slices,Smallest slice size,Largest slice size,Average Slice size,Min procedure count, Max procedure count, Avg procedure count,Inter procedural slices, Inter file slices\n'
        )
        f_result_csv = open('assert_result_ds.csv', 'w')
        f_result_csv.write(
            'benchmark,slices,avg-data-slice-size,avg-full-slice-size,avg-slice-size,inter-procedural-slices,inter-file-slices,build_rate\n'
        )
        build_rate = defaultdict(int)
        for benchmark in benchmarks:
            statements = 0
            bench_list = [
                'tj-histo', 'json-c-json-c', 'jonas-tig', 'Cyan4973-zstd',
                'Phildo-pixQL', 'kr-beanstalkd', 'joyent-http-parser',
                'yrutschle-sslh', 'rui314-8cc', 'udp-json-parser', 'cisco-thor'
            ]
            bench_list += [
                'libuv-libuv', 'patjak-bcwc_pcie', 'douban-beansdb',
                'droe-sslsplit', 'orangeduck-mpc', 'machinezone-tcpkali',
                'wg-wrk', 'karthick18-inception', 'vmg-houdini',
                'antirez-disque'
            ]
            if benchmark.split('/')[-1] in bench_list:
                self.inter_procedural_slices = 0
                self.slice_size = 0
                self.inter_file_slices = 0
                self.min_slice_size = 0
                self.max_slice_size = 0
                self.min_slice_procedures = 0
                self.max_slice_procedures = 0
                self.avg_slice_procedures = 0
                self.min_built_slice_size = 0
                self.max_built_slice_size = 0

                result_data_files = []
                result_control_files = []
                result_data_files, result_control_files = self.get_slice_files(
                    benchmark)
                statements = len(result_data_files)
                #logger.warn('Number of data files = '+str(len(result_data_files)))
                matching_sets = 0
                self.inter_procedural_slices = 0
                self.inter_file_slices = 0
                self.build_rate = 0
                avg_slice_size = 0
                avg_data_slice_size = 0
                avg_control_slice_size = 0
                if len(result_data_files) > 0 and len(
                        result_control_files) > 0:
                    for data_file in result_data_files:
                        f_data_file = open(data_file, 'r')
                        data_slice_lines = f_data_file.readlines()
                        f_data_file.close()
                        data_line_set = set()
                        files_in_slice = set()
                        for line in data_slice_lines:
                            if '.h' not in line:
                                if line not in data_line_set and line.strip(
                                ) != '':
                                    if line.split(
                                            '\t')[0] not in files_in_slice:
                                        files_in_slice.add(line.split('\t')[0])
                                    data_line_set.add(line)
                                    #self.get_wrapper_function(line)
                        control_file = data_file.replace(
                            'result_assert', 'result_assert_control')
                        f_control_file = open(control_file, 'r')
                        control_slice_lines = f_control_file.readlines()
                        f_control_file.close()
                        control_line_set = set()
                        for line in control_slice_lines:
                            if '.h' not in line:
                                if line not in control_line_set and line.strip(
                                ) != '':
                                    control_line_set.add(line)
                        logger.info(str(len(data_line_set)))
                        logger.info(str(len(control_line_set)))
                        if data_line_set.issubset(control_line_set):
                            matching_sets += 1
                            merged_slices = self.merge_slices(
                                list(data_line_set), defaultdict(list),
                                list(control_line_set), defaultdict(list), 1)

                            self.slice_size += len(merged_slices)
                            avg_slice_size += len(merged_slices)

                            if self.min_slice_size == 0:
                                self.min_slice_size = len(merged_slices)
                            if len(merged_slices) < self.min_slice_size:
                                self.min_slice_size = len(merged_slices)
                            if len(merged_slices) > self.max_slice_size:
                                self.max_slice_size = len(merged_slices)

                            avg_data_slice_size += len(data_line_set)
                            avg_control_slice_size += len(control_line_set)
                            if len(files_in_slice) > 1:
                                self.inter_file_slices += 1
                            logger.critical('Slice size for ' + data_file +
                                            ' :' + str(len(merged_slices)))
                            slice_file_location = self.slicer.get_file_path(
                                data_slice_lines[0])
                            slice_code = self.slicer.get_slice_code(
                                merged_slices)
                            self.slicer.generate_slice_file(slice_code)
                            if self.slicer.build_slice_file(
                                    slice_file_location) == True:
                                self.build_rate += 1

                                build_rate[benchmark.split('/')[-1]] += 1
                                if self.min_built_slice_size == 0:
                                    self.min_built_slice_size = len(slice_code)
                                if len(slice_code) < self.min_built_slice_size:
                                    self.min_built_slice_size = len(slice_code)
                                if len(slice_code) > self.max_built_slice_size:
                                    self.max_built_slice_size = len(slice_code)

                        else:
                            logger.warn('set mismatch!!')
                            return 0
                    logger.warn('Data slice is subset of control slice in ' +
                                benchmark)
                f_result_csv.write(
                    benchmark.split('/')[-1] + ',' + str(statements) + ',' +
                    str(avg_data_slice_size / 100) + ',' +
                    str(avg_control_slice_size / 100) + ',' +
                    str(avg_slice_size / 100) + ',' +
                    str(self.inter_procedural_slices) + ',' +
                    str(self.inter_file_slices) + ',' +
                    str(build_rate[benchmark.split('/')[-1]]) + '\n')
                f_build_rate.write(
                    benchmark.split('/')[-1] + ',' + str(statements) + ',' +
                    str(self.min_built_slice_size) + ',' +
                    str(self.max_built_slice_size) + ',' +
                    str(build_rate[benchmark.split('/')[-1]]) + '\n')
                f_slice_prop.write(
                    benchmark.split('/')[-1] + ',' + str(statements) + ',' +
                    str(self.min_slice_size) + ',' + str(self.max_slice_size) +
                    ',' + str(self.slice_size / 100) + ',' +
                    str(self.min_slice_procedures) + ',' +
                    str(self.max_slice_procedures) + ',' +
                    str(self.avg_slice_procedures / 100) + ',' +
                    str(self.inter_procedural_slices) + ',' +
                    str(self.inter_file_slices) + '\n')
        f_result_csv.close()
        f_build_rate.close()
        f_slice_prop.close()

    def merge_slices(self, data_slice_list, data_slice_fns, control_slice_list,
                     control_slice_fns, depth):
        logger.info('data_slice_fns -' + str(data_slice_fns))
        merged_slices = []
        for data_slice in data_slice_list:
            if data_slice.strip() != '' and len(data_slice.split('\t')) == 2:
                function_decl, start_index, end_index = self.get_wrapper_function(
                    data_slice)
                function_name = function_decl.split('(')[0].split(' ')[-1]
                if function_decl != '':
                    data_slice_fns[function_decl] = [
                        function_name, start_index, end_index
                    ]
                else:
                    merged_slices.append(data_slice)
        resolve_call_sites = False
        for control_slice in control_slice_list:
            if control_slice.strip() != '' and len(
                    control_slice.split('\t')) == 2:
                function_decl, start_index, end_index = self.get_wrapper_function(
                    control_slice)
                function_name = function_decl.split('(')[0].split(' ')[-1]
                control_slice_fns[function_decl] = [
                    function_name, start_index, end_index
                ]
                if function_decl in data_slice_fns:
                    has_new_call_sites, data_slice_fns = self.get_new_call_sites(
                        control_slice, data_slice_fns, control_slice_fns)
                    if has_new_call_sites == True:
                        resolve_call_sites = True
                        logger.info('call site found - ' + control_slice)
                    merged_slices.append(control_slice)
        if resolve_call_sites == True and depth < 5:
            return self.merge_slices(merged_slices, data_slice_fns,
                                     control_slice_list, control_slice_fns,
                                     depth + 1)
        else:
            self.avg_slice_procedures += len(data_slice_fns)
            if self.min_slice_procedures == 0:
                self.min_slice_proceures = len(data_slice_fns)
            if len(data_slice_fns) < self.min_slice_procedures:
                self.min_slice_procedures = len(data_slice_fns)
            if len(data_slice_fns) > self.max_slice_procedures:
                self.max_slice_procedures = len(data_slice_fns)
            if len(data_slice_fns) > 1:
                self.inter_procedural_slices += 1
            logger.info('control slice fns - ' + str(control_slice_fns))
            return merged_slices

    def get_new_call_sites(self, slice_line, data_slice_fns,
                           control_slice_fns):
        keywords = ['if', 'switch', 'while']
        file_name = slice_line.split('\t')[0]
        line_number = int(slice_line.split('\t')[1])
        f_cfile = open(file_name, 'r')
        lines = f_cfile.readlines()
        is_call_site = False
        line = lines[line_number - 1]
        fns_called = []
        if re.search('[a-zA-Z]+\([^\)]*\)(\.[^\)]*\))?', line):
            fn_names = line.split('(')
            index = 0
            for fn_name in fn_names:
                if index == len(fn_names) - 1:
                    break
                temp = ''
                logger.info('splitting ' + fn_name)
                for i in range(len(fn_name) - 1, 0, -1):
                    if fn_name[i].isalnum() == True or fn_name[i] == '_':
                        temp = fn_name[i] + temp
                    else:
                        break
                index += 1
                if self.has_any_item(temp, keywords) == False:
                    fns_called.append(temp)
                    is_call_site = True
                    logger.info('call site -' + line)
        has_new_call_sites = False
        if is_call_site == True:
            for fn_called in fns_called:
                for key, value in control_slice_fns.items():
                    if value[0] == fn_called:
                        if key not in data_slice_fns:
                            data_slice_fns[key] = value
                            has_new_call_sites = True
        f_cfile.close()
        return has_new_call_sites, data_slice_fns

    def get_slice_files(self, benchmark):
        result_data_files = []
        result_control_files = []
        print('Entering benchmark: ' + benchmark)
        for root, dir, files in os.walk(benchmark):
            for f in files:
                if f.startswith(
                        'result_assert') and 'result_assert_control' not in f:
                    result_data_files.append(root + '/' + f)
                if f.startswith('result_assert_control'):
                    result_control_files.append(root + '/' + f)
        return result_data_files, result_control_files

    def get_wrapper_function(self, slice_line):
        keywords = ['if', 'switch', 'while']
        special_char = [';', '=', '"']
        if len(slice_line.split('\t')) < 2:
            return '', 0, 0
        file_name = slice_line.split('\t')[0]
        line_number = int(slice_line.split('\t')[1])
        f_cfile = open(file_name, 'r')
        lines = f_cfile.readlines()
        line_num = 1
        slice_found = False
        for line in lines:
            if re.search('\w\(',
                         line) and line.count('(') < 2 and line.count(')') < 2:
                if self.has_any_item(line,
                                     keywords) == False and self.has_any_item(
                                         line, special_char) == False:
                    decl_end_index = self.find_decl_end_index(
                        lines, line_num - 1)
                    start, end = self.find_block_limits(lines, line_num - 1)
                    if line_number >= start and line_number <= end + 1:
                        slice_found = True
                        logger.info('Slice - ' + slice_line)
                        logger.info('Slice found in function - ' + line)
                        logger.info('limits - ' + str(start) + ' to ' +
                                    str(end))
                        return line, start, end
            line_num += 1
        if slice_found == False:
            logger.info('Slice ' + slice_line + ' not found in any function')
        f_cfile.close()
        return '', 0, 0

    def has_any_item(self, line, item_list):
        for item in item_list:
            if item in line:
                return True
        return False

    def find_block_limits(self, lines, line_index):
        open_brace_count = 0
        close_brace_count = 0
        start_index = line_index
        end_index = line_index
        begin_found = False
        for i in range(line_index, len(lines)):
            open_brace_count += lines[i].count('{')
            close_brace_count += lines[i].count('}')
            if open_brace_count == 1 and begin_found != True:
                start_index = i
                begin_found = True
            if open_brace_count > 0 and open_brace_count == close_brace_count:
                end_index = i
                break
        return start_index, end_index

    def find_decl_end_index(self, lines, line_number):
        for i in range(line_number, len(lines)):
            if ')' in lines[i]:
                return i

    def create_control_slice(self, benchmark_folder):
        logger.warn(self.csurf_map)
        benchmarks = []
        for root, dir, files in os.walk(benchmark_folder):
            #print(dir)
            for item in dir:
                benchmarks.append(root + item)
            break
        for benchmark in benchmarks:
            f_used_in = open(benchmark + '/used_input.txt', 'r')
            for line in f_used_in.readlines():
                f_csurf_in = open(benchmark + '/input.txt', 'w')
                f_csurf_in.write(line)
                f_csurf_in.close()
                logger.warn(line)
                cfile_name = line.split(':')[0].split('/')[-1]
                line_num = int(line.split(':')[1])
                try:
                    command = 'csurf -nogui -l /home/nishanth/Workspace/PyHelium/csurf/plugin ' + benchmark + '/myproj'
                    #print('Running cmd - '+command)
                    p = Popen(command,
                              shell=True,
                              stdin=PIPE,
                              stdout=PIPE,
                              stderr=PIPE)
                    response, _ = p.communicate(input=None)
                    response = response.decode('utf8')
                    print(response)
                    #p.kill()
                except (Exception) as e:
                    p.kill()
                    print(e)
                response_lines = response.split('\n')
                for i in range(0, len(response_lines)):
                    if 'Slice set size' in response_lines[i]:
                        slice_set_size = int(
                            response_lines[i].split(':')[1].strip())
                        #print('Slize set = '+str(slice_set_size))
                        if (slice_set_size > 0):
                            #add result set to result file.
                            f_result_in = open(
                                benchmark + '/' + 'result_assert_control' +
                                cfile_name + str(line_num) + '.txt', 'w')
                            for j in range(i + 1, len(response_lines)):
                                f_result_in.write(response_lines[j] + '\n')
                            f_result_in.close()
            f_used_in.close()
示例#23
0
class Explanation(object, metaclass=MetaExplanation):
    """ A slicable set of parallel arrays representing a SHAP explanation.
    """
    def __init__(self,
                 values,
                 base_values=None,
                 data=None,
                 display_data=None,
                 instance_names=None,
                 feature_names=None,
                 output_names=None,
                 output_indexes=None,
                 lower_bounds=None,
                 upper_bounds=None,
                 main_effects=None,
                 hierarchical_values=None,
                 clustering=None):
        self.op_history = []

        # cloning. TODO: better cloning :)
        if issubclass(type(values), Explanation):
            e = values
            values = e.values
            base_values = e.base_values
            data = e.data

        output_dims = compute_output_dims(values, base_values, data)

        if len(
                _compute_shape(feature_names)
        ) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            if len(values_shape) >= 1 and len(
                    feature_names) == values_shape[0]:
                feature_names = Alias(list(feature_names), 0)
            elif len(values_shape) >= 2 and len(
                    feature_names) == values_shape[1]:
                feature_names = Alias(list(feature_names), 1)

        if len(
                _compute_shape(output_names)
        ) == 1:  # TODO: should always be an alias once slicer supports per-row aliases
            values_shape = _compute_shape(values)
            if len(values_shape) >= 1 and len(output_names) == values_shape[0]:
                output_names = Alias(list(output_names), 0)
            elif len(values_shape) >= 2 and len(
                    output_names) == values_shape[1]:
                output_names = Alias(list(output_names), 1)

        self._s = Slicer(
            values=values,
            base_values=None if base_values is None else Obj(
                base_values, [0] + list(output_dims)),
            data=data,
            display_data=display_data,
            instance_names=None if instance_names is None else Alias(
                instance_names, 0),
            feature_names=feature_names,
            output_names=
            output_names,  # None if output_names is None else Alias(output_names, output_dims),
            output_indexes=None if output_indexes is None else
            (output_dims, output_indexes),
            lower_bounds=lower_bounds,
            upper_bounds=lower_bounds,
            main_effects=main_effects,
            hierarchical_values=hierarchical_values,
            clustering=None if clustering is None else Obj(clustering, [0]))

    @property
    def shape(self):
        return _compute_shape(self._s.values)

    @property
    def values(self):
        return self._s.values

    @values.setter
    def values(self, new_values):
        self._s.values = new_values

    @property
    def base_values(self):
        return self._s.base_values

    @base_values.setter
    def base_values(self, new_base_values):
        self._s.base_values = new_base_values

    @property
    def data(self):
        return self._s.data

    @data.setter
    def data(self, new_data):
        self._s.data = new_data

    @property
    def display_data(self):
        return self._s.display_data

    @display_data.setter
    def display_data(self, new_display_data):
        if issubclass(type(new_display_data), pd.DataFrame):
            new_display_data = new_display_data.values
        self._s.display_data = new_display_data

    @property
    def instance_names(self):
        return self._s.instance_names

    @property
    def output_names(self):
        return self._s.output_names

    @output_names.setter
    def output_names(self, new_output_names):
        self._s.output_names = new_output_names

    @property
    def output_indexes(self):
        return self._s.output_indexes

    @property
    def feature_names(self):
        return self._s.feature_names

    @feature_names.setter
    def feature_names(self, new_feature_names):
        self._s.feature_names = new_feature_names

    @property
    def lower_bounds(self):
        return self._s.lower_bounds

    @property
    def upper_bounds(self):
        return self._s.upper_bounds

    @property
    def main_effects(self):
        return self._s.main_effects

    @main_effects.setter
    def main_effects(self, new_main_effects):
        self._s.main_effects = new_main_effects

    @property
    def hierarchical_values(self):
        return self._s.hierarchical_values

    @hierarchical_values.setter
    def hierarchical_values(self, new_hierarchical_values):
        self._s.hierarchical_values = new_hierarchical_values

    @property
    def clustering(self):
        return self._s.clustering

    @clustering.setter
    def clustering(self, new_clustering):
        self._s.clustering = new_clustering

    def cohorts(self, cohorts):
        """ Split this explanation into several cohorts.

        Parameters
        ----------
        cohorts : int or array
            If this is an integer then we auto build that many cohorts using a decision tree. If this is
            an array then we treat that as an array of cohort names/ids for each instance.
        """

        if isinstance(cohorts, int):
            return _auto_cohorts(self, max_cohorts=cohorts)
        elif isinstance(cohorts, (list, tuple, np.ndarray)):
            cohorts = np.array(cohorts)
            return Cohorts(
                **{name: self[cohorts == name]
                   for name in np.unique(cohorts)})
        else:
            raise Exception(
                "The given set of cohort indicators is not recognized! Please give an array or int."
            )

    def __repr__(self):
        out = ".values =\n" + self.values.__repr__()
        if self.base_values is not None:
            out += "\n\n.base_values =\n" + self.base_values.__repr__()
        if self.data is not None:
            out += "\n\n.data =\n" + self.data.__repr__()
        return out

    def __getitem__(self, item):
        """ This adds support for magic string indexes like "rank(0)".
        """
        if not isinstance(item, tuple):
            item = (item, )

        # convert any OpChains or magic strings
        for i, t in enumerate(item):
            orig_t = t
            if issubclass(type(t), OpChain):
                t = t.apply(self)
                if issubclass(
                        type(t),
                    (np.int64,
                     np.int32)):  # because slicer does not like numpy indexes
                    t = int(t)
                elif issubclass(type(t), np.ndarray):
                    t = [int(v) for v in t
                         ]  # slicer wants lists not numpy arrays for indexing
            elif issubclass(type(t), Explanation):
                t = t.values
            elif type(t) is str:
                if is_1d(self.feature_names):
                    ind = np.where(np.array(self.feature_names) == t)[0][0]
                    t = int(ind)
                else:
                    new_values = []
                    new_base_values = []
                    new_data = []
                    if self.output_names is not None and (
                            self.output_names.ndim >= 2
                            or self.output_names.shape[0] >= 2):
                        new_self = copy.deepcopy(self)
                        for i in range(len(self.values)):
                            for j in range(len(self.output_names[i])):
                                s = self.output_names[i][j]
                                if s == t:
                                    new_values.append(
                                        np.array(self.values[i][:, j]))
                                    new_data.append(np.array(self.data[i]))
                                    new_base_values.append(
                                        self.base_values[i][j])
                        new_self = copy.deepcopy(self)
                        new_self.values = np.array(new_values)
                        new_self.base_values = np.array(new_base_values)
                        new_self.data = np.array(new_data)
                        new_self.output_names = t
                        new_self.feature_names = np.array(new_data)
                        new_self.clustering = None
                    else:
                        for i in range(len(self.values)):
                            for s, v, d in zip(self.feature_names[i],
                                               self.values[i], self.data[i]):
                                if s == t:
                                    new_values.append(v)
                                    new_data.append(d)
                        new_self = copy.deepcopy(self)
                        new_self.values = new_values
                        new_self.data = new_data
                        new_self.feature_names = t
                        new_self.clustering = None
                    return new_self
            if issubclass(type(t), (np.int8, np.int16, np.int32, np.int64)):
                t = int(t)

            if t is not orig_t:
                tmp = list(item)
                tmp[i] = t
                item = tuple(tmp)

        # call slicer for the real work
        new_self = copy.copy(self)
        new_self._s = self._s.__getitem__(item)
        new_self.op_history.append({
            "name": "__getitem__",
            "args": (item, ),
            "prev_shape": self.shape
        })

        return new_self

    def __len__(self):
        return self.shape[0]

    def __copy__(self):
        new_exp = Explanation(self.values, self.base_values, self.data,
                              self.display_data, self.instance_names,
                              self.feature_names, self.output_names,
                              self.output_indexes, self.lower_bounds,
                              self.upper_bounds, self.main_effects,
                              self.hierarchical_values, self.clustering)
        new_exp.op_history = copy.copy(self.op_history)
        return new_exp

    def _apply_binary_operator(self, other, binary_op, op_name):
        new_exp = self.__copy__()
        new_exp.op_history = copy.copy(self.op_history)
        new_exp.op_history.append({
            "name": op_name,
            "args": (other, ),
            "prev_shape": self.shape
        })
        if isinstance(other, Explanation):
            new_exp.values = binary_op(new_exp.values, other.values)
            if new_exp.data is not None:
                new_exp.data = binary_op(new_exp.data, other.data)
            if new_exp.base_values is not None:
                new_exp.base_values = binary_op(new_exp.base_values,
                                                other.base_values)
        else:
            new_exp.values = binary_op(new_exp.values, other)
            if new_exp.data is not None:
                new_exp.data = binary_op(new_exp.data, other)
            if new_exp.base_values is not None:
                new_exp.base_values = binary_op(new_exp.base_values, other)
        return new_exp

    def __add__(self, other):
        return self._apply_binary_operator(other, operator.add, "__add__")

    def __radd__(self, other):
        return self._apply_binary_operator(other, operator.add, "__add__")

    def __sub__(self, other):
        return self._apply_binary_operator(other, operator.sub, "__sub__")

    def __rsub__(self, other):
        return self._apply_binary_operator(other, operator.sub, "__sub__")

    def __mul__(self, other):
        return self._apply_binary_operator(other, operator.mul, "__mul__")

    def __rmul__(self, other):
        return self._apply_binary_operator(other, operator.mul, "__mul__")

    def __truediv__(self, other):
        return self._apply_binary_operator(other, operator.truediv,
                                           "__truediv__")

    @property
    def abs(self):
        new_self = copy.copy(self)
        new_self.values = np.abs(new_self.values)
        new_self.op_history.append({"name": "abs", "prev_shape": self.shape})
        return new_self

    def _numpy_func(self, fname, **kwargs):
        new_self = copy.copy(self)
        axis = kwargs.get("axis", None)

        # collapse the slicer to right shape
        if axis == 0:
            new_self = new_self[0]
        elif axis == 1:
            new_self = new_self[1]
        elif axis == 2:
            new_self = new_self[2]
        if axis in [0, 1, 2]:
            new_self.op_history = new_self.op_history[:
                                                      -1]  # pop off the slicing operation we just used

        if self.feature_names is not None and not is_1d(
                self.feature_names) and axis == 0:
            new_values = self._flatten_feature_names()
            new_self.feature_names = np.array(list(new_values.keys()))
            new_self.values = np.array(
                [getattr(np, fname)(v, 0) for v in new_values.values()])
            new_self.clustering = None
        else:
            new_self.values = getattr(np, fname)(np.array(self.values),
                                                 **kwargs)
            if new_self.data is not None:
                try:
                    new_self.data = getattr(np, fname)(np.array(self.data),
                                                       **kwargs)
                except:
                    new_self.data = None
            if new_self.base_values is not None and issubclass(
                    type(axis), int) and len(self.base_values.shape) > axis:
                new_self.base_values = getattr(np, fname)(self.base_values,
                                                          **kwargs)
            elif issubclass(type(axis), int):
                new_self.base_values = None

        if axis == 0 and self.clustering is not None and len(
                self.clustering.shape) == 3:
            if self.clustering.std(0).sum() < 1e-8:
                new_self.clustering = self.clustering[0]
            else:
                new_self.clustering = None

        new_self.op_history.append({
            "name": fname,
            "kwargs": kwargs,
            "prev_shape": self.shape,
            "collapsed_instances": axis == 0
        })

        return new_self

    def mean(self, axis):
        return self._numpy_func("mean", axis=axis)

    def max(self, axis):
        return self._numpy_func("max", axis=axis)

    def min(self, axis):
        return self._numpy_func("min", axis=axis)

    def sum(self, axis=None, grouping=None):
        if grouping is None:
            return self._numpy_func("sum", axis=axis)
        elif axis == 1 or len(self.shape) == 1:
            return group_features(self, grouping)
        else:
            raise Exception(
                "Only axis = 1 is supported for grouping right now...")

    # def reshape(self, *args):
    #     return self._numpy_func("reshape", newshape=args)

    @property
    def abs(self):
        return self._numpy_func("abs")

    @property
    def identity(self):
        return self

    @property
    def argsort(self):
        return self._numpy_func("argsort")

    @property
    def flip(self):
        return self._numpy_func("flip")

    def hclust(self, metric="sqeuclidean", axis=0):
        """ Computes an optimal leaf ordering sort order using hclustering.
        
        hclust(metric="sqeuclidean")
        
        Parameters
        ----------
        metric : string
            A metric supported by scipy clustering.

        axis : int
            The axis to cluster along.
        """
        values = self.values

        if len(values.shape) != 2:
            raise Exception(
                "The hclust order only supports 2D arrays right now!")

        if axis == 1:
            values = values.T

        # compute a hierarchical clustering and return the optimal leaf ordering
        D = sp.spatial.distance.pdist(values, metric)
        cluster_matrix = sp.cluster.hierarchy.complete(D)
        inds = sp.cluster.hierarchy.leaves_list(
            sp.cluster.hierarchy.optimal_leaf_ordering(cluster_matrix, D))
        return inds

    def sample(self, max_samples, replace=False, random_state=0):
        """ Randomly samples the instances (rows) of the Explanation object.

        Parameters
        ----------
        max_samples : int
            The number of rows to sample. Note that if replace=False then less than
            fewer than max_samples will be drawn if explanation.shape[0] < max_samples.
        
        replace : bool
            Sample with or without replacement.
        """
        prev_seed = np.random.seed(random_state)
        inds = np.random.choice(self.shape[0],
                                min(max_samples, self.shape[0]),
                                replace=replace)
        np.random.seed(prev_seed)
        return self[list(inds)]

    def _flatten_feature_names(self):
        new_values = {}
        for i in range(len(self.values)):
            for s, v in zip(self.feature_names[i], self.values[i]):
                if s not in new_values:
                    new_values[s] = []
                new_values[s].append(v)
        return new_values

    def _use_data_as_feature_names(self):
        new_values = {}
        for i in range(len(self.values)):
            for s, v in zip(self.data[i], self.values[i]):
                if s not in new_values:
                    new_values[s] = []
                new_values[s].append(v)
        return new_values

    def percentile(self, q, axis=None):
        new_self = copy.deepcopy(self)
        if self.feature_names is not None and not is_1d(
                self.feature_names) and axis == 0:
            new_values = self._flatten_feature_names()
            new_self.feature_names = np.array(list(new_values.keys()))
            new_self.values = np.array(
                [np.percentile(v, q) for v in new_values.values()])
            new_self.clustering = None
        else:
            new_self.values = np.percentile(new_self.values, q, axis)
            new_self.data = np.percentile(new_self.data, q, axis)
        #new_self.data = None
        new_self.op_history.append({
            "name": "percentile",
            "args": (axis, ),
            "prev_shape": self.shape,
            "collapsed_instances": axis == 0
        })
        return new_self
示例#24
0
#     Process Raw Data
#==============================================================================
    if args.intype[0]=='raw':
        if args.interpolate:
            process_series_files.process_all_in_dir(args.indir[0],
                                                    join(out_dir,'data'))
            data_dir = join(out_dir,'data')
            
        """
        else: #just copy the files
            print "Copying data files to ", data_dir
            for csvf in glob.iglob(join(args.indir[0],"*.csv")):
                shutil.copyfile(csvf, join(data_dir, os.path.basename(csvf)))
        """
        print "Instantiating Slicer and loading series"
        slicer = Slicer(taskfile=join(data_dir,'task.xls'))
        filelist=[join(data_dir,f) for f in os.listdir(data_dir) if \
            re.compile(".*\.csv").match(f)]
        num_subjects = len(filelist)
        slicer.load_series_from_csv('raw', filelist)
        
        if args.stats:
            pp = PdfPages(join(report_dir, 'stats.pdf'))
            stats.plot_all(slicer, pp)
            
            fig, ax = plt.subplots()
            ax.plot(range(1,num_subjects+1))
            plt.title("Number of subjects")            
            pp.savefig(fig)
            pp.close()