def compute_residual(self, save_dir, multi_processing=False, n_processors=1): ''' ''' if not os.path.exists(save_dir): os.makedirs(save_dir) batch_ids = [] fnames_seg = [] for batch_id in range(self.reader.n_batches): batch_ids.append(batch_id) fnames_seg.append( os.path.join(save_dir, 'residual_seg{}.npy'.format(batch_id))) #self.logger.info("computing residuals") if multi_processing: batches_in = np.array_split(batch_ids, n_processors) fnames_in = np.array_split(fnames_seg, n_processors) parmap.starmap(self.subtract_parallel, list(zip(batches_in, fnames_in)), processes=n_processors, pm_pbar=True) else: for ctr in range(len(batch_ids)): self.subtract_parallel( [batch_ids[ctr]], [fnames_seg[ctr]]) self.fnames_seg = fnames_seg
def sen2cor_L2A_batch(res, L1Cdir): """ Batch processing of S1-L1C files in a directory to S1-L2A Args: res (str/num): resolution, accepts 10, 20, 60, or all L1Cdir (str): location of S1 L1C products """ # Put S1 L1C directory names in list L1C_files = filter( re.compile(r'^S2.*L1C.*SAFE$').search, os.listdir(L1Cdir)) print("{} L1C files found in directory".format(str(len(L1C_files)))) l1cList = [] for L1C_file in L1C_files: # Iterate over directory names # Check if the file exists checker = r'S2._MSIL2A_' + L1C_file[11:] checker_list = filter(re.compile(checker).search, os.listdir(L1Cdir)) checker2 = r'S2._USER_PRD_MSIL2A_' + L1C_file[20:] checker_list = checker_list + filter( re.compile(checker2).search, os.listdir(L1Cdir)) if len(checker_list) == 0: # Call sen2cor function for individual product print("{} is set for processing".format(L1C_file)) #sen2cor_L2A(res, L1Cdir+L1C_file) l1cList.append((res, L1Cdir + L1C_file)) else: print("{} was already processed, removing from list".format( L1C_file)) parmap.starmap(sen2cor_L2A, l1cList, pm_chunksize=12)
def download_hub(download_dir, file_path, accounts_file, start_date='NOW-30DAYS', end_date='NOW', downloads_per_account=1, # maximum allowed is 2 max_downloads=10): start_date = str(start_date) end_date = str(end_date) if not os.path.isdir(download_dir): raise ValueError('download_dir: '+ download_dir + ' does not exist or is inaccesible. Your current working directory is '+os.getcwd()+'.') if not os.path.exists(file_path): raise ValueError('file_path: '+ file_path + ' does not exist or is inaccesible. Your current working directory is '+os.getcwd()+'.') if not os.path.exists(accounts_file): raise ValueError('accounts_file: '+ accounts_file + ' does not exist or is inaccesible. Your current working directory is '+os.getcwd()+'.') products, credentials = get_products_aoi(file_path, accounts_file, start_date=start_date, end_date=end_date) # Creates directory for download files owd = os.getcwd() # original working directory (owd) new_dir = download_dir+'/hub%s' % time.strftime('%a%d%b%Y%H%M%S') os.mkdir(new_dir) os.chdir(new_dir) n_accounts = len(credentials) n_products = len(products) n_threads = n_products // n_accounts * downloads_per_account if n_threads < 1: n_threads = 1 if n_threads >= max_downloads: n_threads = max_downloads div_products = dict_divider(products, n_threads) if n_products > 1: div_credentials = credentials.values() * (n_accounts//n_threads) else: div_credentials = [credentials.values()[0]] parmap.starmap(download, izip(div_products, div_credentials)) # try zip #for elem in products: # print elem # print products # os.system('unzip ' + products[elem]['title'] + '.zip') # os.remove(products[elem]['title'] + '.zip') os.chdir(owd) text_file = open(download_dir+'/TIME_DIR.txt','w') text_file.write(new_dir) text_file.close() return new_dir
def run_voltage_treshold(standardized_path, standardized_dtype, output_directory, run_chunk_sec='full'): """Run detection that thresholds on amplitude """ logger = logging.getLogger(__name__) CONFIG = read_config() # get data reader #n_sec_chunk = CONFIG.resources.n_sec_chunk*CONFIG.resources.n_processors batch_length = CONFIG.resources.n_sec_chunk n_sec_chunk = 0.5 print(" batch length to (sec): ", batch_length, " (longer increase speed a bit)") print(" length of each seg (sec): ", n_sec_chunk) buffer = CONFIG.spike_size if run_chunk_sec == 'full': chunk_sec = None else: chunk_sec = run_chunk_sec reader = READER(standardized_path, standardized_dtype, CONFIG, batch_length, buffer, chunk_sec) # number of processed chunks n_mini_per_big_batch = int(np.ceil(batch_length / n_sec_chunk)) total_processing = int(reader.n_batches * n_mini_per_big_batch) # neighboring channels channel_index = make_channel_index(CONFIG.neigh_channels, CONFIG.geom, steps=2) if CONFIG.resources.multi_processing: parmap.starmap(run_voltage_threshold_parallel, list(zip(np.arange(reader.n_batches))), reader, n_sec_chunk, CONFIG.detect.threshold, channel_index, output_directory, processes=CONFIG.resources.n_processors, pm_pbar=True) else: for batch_id in range(reader.n_batches): run_voltage_threshold_parallel(batch_id, reader, n_sec_chunk, CONFIG.detect.threshold, channel_index, output_directory)
def apply_heuristics(self, heuristics, primitive_matrix, feat_combos, beta_opt, mode=None): """ Apply given heuristics to given feature matrix X and abstain by beta heuristics: list of pre-trained logistic regression models feat_combos: primitive indices to apply heuristics to beta: best beta value for associated heuristics """ if f'{heuristics[0].__class__}' == "<class 'program_synthesis.synthesizer.dummyKneighborClassifier'>": if self.cuda: #########gpu L = np.zeros((np.shape(primitive_matrix)[0], len(heuristics))) for i, hf in enumerate(heuristics): L[:, i] = marginals_to_labels_cuda( hf, beta_opt[i], feat_combos[i], primitive_matrix[:, feat_combos[i]], self) return L else: ########using parmap L_ = parmap.starmap(marginals_to_labels, list(zip(heuristics, beta_opt, feat_combos)), primitive_matrix, self, pm_pbar=True) return np.transpose(np.array(L_)) ########single-core # L = np.zeros((np.shape(primitive_matrix)[0], len(heuristics))) # for i, hf in enumerate(heuristics): # L[:, i] = marginals_to_labels(hf, beta_opt[i], feat_combos[i], primitive_matrix, # self, mode=mode) return L else: ########using parmap L_ = parmap.starmap(marginals_to_labels, list(zip(heuristics, beta_opt, feat_combos)), primitive_matrix, self, mode=mode, pm_pbar=True) return np.transpose(np.array(L_))
def append_price(): codes = pd.read_excel('코드리스트_test.xlsx', converters={'종목코드': str})['종목코드'] # codes = ['095570'] pages = range(1, 1000) for code in codes: csv = pd.DataFrame( columns=['날짜', '종가', '전일비', '시가', '고가', '저가', '거래량']) path = "./temp/" + str(code) + ".db" con = sqlite3.connect(path) csv.to_sql('price', con, if_exists='replace') input_list = list(itertools.product([code], pages)) parmap.starmap(get_price, input_list, pm_pbar=True)
def get_evoked_map(mouse): lofiles, lofilenames = get_file_list(base_dir, mouse) print lofilenames lop = get_distance_var(lofiles) all_frames = get_video_frames(lofiles) print "Alligning all video frames..." all_frames = parmap.starmap(shift_frames, zip(all_frames, lop)) all_frames = np.asarray(all_frames, dtype=np.float32) print np.shape(all_frames) new_all_frames = parmap.map(process_frames_evoked, all_frames) all_frames = np.reshape(all_frames, (all_frames.shape[0]*all_frames.shape[1], all_frames.shape[2], all_frames.shape[3])) save_to_file("conc_RAW.raw", all_frames, np.float32) print "Creating array.." new_all_frames = np.asarray(new_all_frames, dtype=np.float32) print "Averaging together..." new_all_frames = np.mean(new_all_frames, axis=0) print np.shape(new_all_frames) save_to_file("evoked_trial_noBP_GSR.raw", new_all_frames, np.float32)
def get_location(self): """ Extracts the location of each pixel in the satellite image """ self.ncols = self.satellite_gdal.RasterXSize / 2 self.nrows = self.satellite_gdal.RasterYSize / 2 self.length_df = self.nrows * self.ncols print 'Columns, rows', self.ncols, self.nrows cols_grid, rows_grid = np.meshgrid(range(0, self.ncols), range(0, self.nrows)) self.cols_grid = cols_grid.flatten() self.rows_grid = rows_grid.flatten() print 'Checking the meshgrid procedure works' # getting a series of lat lon points for each pixel self.geotransform = self.satellite_gdal.GetGeoTransform() print 'Getting locations' self.location_series = np.array( parmap.starmap(pixel_to_coordinates, zip(self.cols_grid, self.rows_grid), self.geotransform, processes=self.processes)) print 'Converting to Points' pool = Pool(self.processes) self.location_series = pool.map(point_wrapper, self.location_series)
def get_location(self): """ Extracts the location of each pixel in the satellite image """ self.ncols = self.satellite_gdal.RasterXSize / 2 self.nrows = self.satellite_gdal.RasterYSize / 2 self.length_df = self.nrows * self.ncols print 'Columns, rows', self.ncols, self.nrows cols_grid, rows_grid = np.meshgrid( range(0, self.ncols), range(0, self.nrows)) self.cols_grid = cols_grid.flatten() self.rows_grid = rows_grid.flatten() print 'Checking the meshgrid procedure works' # getting a series of lat lon points for each pixel self.geotransform = self.satellite_gdal.GetGeoTransform() print 'Getting locations' self.location_series = np.array(parmap.starmap( pixel_to_coordinates, zip(self.cols_grid, self.rows_grid), self.geotransform, processes = self.processes)) print 'Converting to Points' pool = Pool(self.processes) self.location_series = pool.map( point_wrapper, self.location_series)
def pairwise(self, pairwiseList, nprocs=1, **kwargs): """Pairwise read mapping based on list of references and reads to map. Args: pairwiseList -- list of tuples (i,j,refIndex,readFile) 'i' and 'j' are comparison indices nprocs -- max number of parallel mapper calls kwargs -- passsed to mapper method """ # adding kwargs to function new_mapper = partial(self, **kwargs) # making trimmed list of tuples trimmed = [(i[2],i[3],) for i in pairwiseList] # calling mapper samFiles = parmap.starmap(new_mapper, trimmed, processes=nprocs) # creating a numpy array for output #simSamFiles = np.array([['' for i in range(n_refs)] for j in range(n_refs)], dtype=object) # appending samFiles to tuple pairwiseList2 = [] for i,samFile in enumerate(samFiles): pairwiseList2.append(pairwiseList[i] + (samFile,)) # return return pairwiseList2
def parallel(self, names, mg, nprocs=1, **kwargs): """Calling mapper using multiple processors. Args: names -- NameFile instance with iter_names() method mg -- MetaFile instance with readFile attrib nprocs -- number of parallel calls kwargs -- passed to mapper call Return: refSamFile attrib set for each name in names """ # making list of tuples (indexFile, readFile) lt = [(name.get_indexFile(), mg.get_readFile()) for name in names.iter_names()] # altering function kwargs new_mapper = partial(self, **kwargs) # calling mapper samFiles = parmap.starmap(new_mapper, lt, processes=nprocs) # adding samFile attrib to name instances for i, name in enumerate(names.iter_names()): name.set_refSamFile(samFiles[i])
def NCC_best_template_search(c1, c2, im1, im2, width=60, c1_init=None, c2_init=None, search_w=100): ''' Finds the best center according to block matching search ''' searchx, searchy = find_search_pixel(c1, c2, search_w) if c1_init is None: xv, yv = find_template_pixel(c1, c2, width, im1.shape[1], im1.shape[0]) else: xv, yv = find_template_pixel(c1_init, c2_init, width, im1.shape[1], im1.shape[0]) NCC_all = parmap.starmap(get_NCC, zip(np.ravel(searchx), np.ravel(searchy)), im1, im2, width, yv, xv, pm_parallel=True, pm_processes=2) maxNCC = np.max(NCC_all) if np.sum(NCC_all == -1) > 0: print('Number of weird NCC {}'.format(np.sum(NCC_all == -1))) if maxNCC == -1: print('VERY WEIRD ALL NCC ARE -1') idx = np.argmax(NCC_all) best_c1, best_c2 = np.ravel(searchx)[idx], np.ravel(searchy)[idx] return best_c1, best_c2, maxNCC
def compute_norm_dist_to_tissue_parallel(locs, tissue_mat_new, target_df, result_df): num_cores = mp.cpu_count() if num_cores > math.floor(target_df.shape[0] / 2): num_cores = int(math.floor(target_df.shape[0] / 2)) ttt = np.array_split(target_df, num_cores, axis=0) tuples = [(l, d, u, c) for l, d, u, c in zip( repeat(locs, num_cores), repeat(tissue_mat_new, num_cores), ttt, repeat(result_df, num_cores))] # repeat(p_cutoff, num_cores))] dist_results = parmap.starmap(compute_norm_dist_to_tissue, tuples, pm_processes=num_cores, pm_pbar=True) dd = [dist_results[i][0] for i in range(len(dist_results))] dist_val = reduce(operator.add, dd) gg = [dist_results[i][1] for i in range(len(dist_results))] genes = reduce(operator.add, gg) re = [dist_results[i][2] for i in range(len(dist_results))] recal_genes = reduce(operator.add, re) dist_norm = [val / max(dist_val) for val in dist_val] dist_df = pd.DataFrame([genes, dist_val, dist_norm]).T dist_df.columns = ['geneID', 'dist', 'norm_dist'] dist_df.index = genes return dist_df, recal_genes
def pairwise(self, pairwiseList, nprocs=1, **kwargs): """Pairwise read mapping based on list of references and reads to map. Args: pairwiseList -- list of tuples (i,j,refIndex,readFile) 'i' and 'j' are comparison indices nprocs -- max number of parallel mapper calls kwargs -- passsed to mapper method """ # adding kwargs to function new_mapper = partial(self, **kwargs) # making trimmed list of tuples trimmed = [( i[2], i[3], ) for i in pairwiseList] # calling mapper samFiles = parmap.starmap(new_mapper, trimmed, processes=nprocs) # creating a numpy array for output #simSamFiles = np.array([['' for i in range(n_refs)] for j in range(n_refs)], dtype=object) # appending samFiles to tuple pairwiseList2 = [] for i, samFile in enumerate(samFiles): pairwiseList2.append(pairwiseList[i] + (samFile, )) # return return pairwiseList2
def recalc_dist_to_tissue_parallel(locs, data_norm, cellGraph, gmmDict, test_genes, tissue_mat_new, add_sf=30, unary_scale_factor=100, label_cost=10, algorithm='expansion'): num_cores = mp.cpu_count() if num_cores > math.floor(len(test_genes) / 2): num_cores = int(math.floor(len(test_genes) / 2)) ttt = np.array_split(test_genes, num_cores, axis=0) tuples = [ (l, d, c, g, t, ts, a, u, ll, al) for l, d, c, g, t, ts, a, u, ll, al in zip( repeat(locs, num_cores), repeat(data_norm, num_cores), repeat(cellGraph, num_cores), repeat(gmmDict, num_cores), ttt, repeat(tissue_mat_new, num_cores), repeat(add_sf, num_cores), repeat(unary_scale_factor, num_cores), repeat( label_cost, num_cores), repeat(algorithm, num_cores)) ] dist_results = parmap.starmap(recalc_dist_to_tissue, tuples, pm_processes=num_cores, pm_pbar=True) result_df_new = pd.DataFrame() best_dist_df = pd.DataFrame() for i in range(len(dist_results)): result_df_new = pd.concat([result_df_new, dist_results[i][0]]) best_dist_df = pd.concat([best_dist_df, dist_results[i][1]]) return result_df_new, best_dist_df
def identify_spatial_genes(locs, data_norm, cellGraph, gmmDict, smooth_factor=10, unary_scale_factor=100, label_cost=10, algorithm='expansion'): # pool = mp.Pool() ''' main function to identify spatially variable genes :param file:locs: spatial coordinates (n, 2); data_norm: normalized gene expression; smooth_factor=10; unary_scale_factor=100; label_cost=10; algorithm='expansion' :rtype: prediction: a dataframe ''' num_cores = mp.cpu_count() if num_cores > math.floor(data_norm.shape[1]/2): num_cores=int(math.floor(data_norm.shape[1]/2)) ttt = np.array_split(data_norm,num_cores,axis=1) tuples = [(l, d, c, g, s, u, b, a) for l, d, c, g, s, u, b, a in zip( repeat(locs, num_cores), ttt, repeat(cellGraph, num_cores), repeat(gmmDict, num_cores), repeat(smooth_factor, num_cores), repeat(unary_scale_factor, num_cores), repeat(label_cost, num_cores), repeat(algorithm, num_cores))] results = parmap.starmap(compute_spatial_genomewise_optimize_gmm, tuples, pm_processes=num_cores, pm_pbar=True) # pool.close() # p_values, genes, diff_p_values, exp_diff, smooth_factors, pred_labels nnn = [results[i][0] for i in np.arange(len(results))] nodes = reduce(operator.add, nnn) ppp = [results[i][1] for i in np.arange(len(results))] p_values=reduce(operator.add, ppp) ggg = [results[i][2] for i in np.arange(len(results))] genes = reduce(operator.add, ggg) # exp_ppp = [results[i][3] for i in np.arange(len(results))] # exp_pvalues = reduce(operator.add, exp_ppp) # exp_ddd = [results[i][4] for i in np.arange(len(results))] # exp_diffs = reduce(operator.add, exp_ddd) fff = [results[i][3] for i in np.arange(len(results))] s_factors = reduce(operator.add, fff) lll = [results[i][4] for i in np.arange(len(results))] pred_labels = reduce(operator.add, lll) best_p_values=[min(i) for i in p_values] fdr = multi.multipletests(np.array(best_p_values), method='fdr_bh')[1] #exp_fdr = multi.multipletests(np.array(exp_pvalues), method='fdr_bh')[1] labels_array = np.array(pred_labels).reshape(len(genes), pred_labels[0].shape[0]) data_array = np.array((genes, p_values, fdr,s_factors, nodes), dtype=object).T t_array = np.hstack((data_array, labels_array)) c_labels = ['p_value', 'fdr', 'smooth_factor', 'nodes'] for i in np.arange(labels_array.shape[1]) + 1: temp_label = 'label_cell_' + str(i) c_labels.append(temp_label) result_df = pd.DataFrame(t_array[:,1:], index=t_array[:,0], columns=c_labels) return result_df
def sampling(self): """ Constructs a weighted sample of images from the GeoDataFrame Returns: (array) sample_idx: index of sampled images Note: Keras uses the last x percent of data in cross validation Have to shuffle here to ensure that the last ten percent isn't just the southern most rows of information """ # Getting the sum of urban pixels for each patch self.pop_array = self.df_image['pop_density'].fillna(0) self.pop_array = np.array( self.pop_array).reshape((self.nrows, self.ncols)) print 'extract patches' self.image_slicer(self.pop_array) print 'get locations for individual frames' pool = Pool(self.processes) cols_grid = pool.map(adder, self.indices[:,0]) rows_grid = pool.map(adder, self.indices[:,1]) print 'Max of cols grid after slicing:', max(cols_grid) print 'Max of rows grid after slicing:', max(rows_grid) self.frame_location_series = parmap.starmap( pixel_to_coordinates, zip(cols_grid, rows_grid), self.geotransform, processes=self.processes) print 'converting locations to Points' self.frame_location_series = \ pool.map(Point, self.frame_location_series) pop_count = np.array([np.mean(patch) for patch in self.patches]) self.df_sample = pd.DataFrame(pop_count, columns=['pop_ave']) # Getting the locations self.df_sample['location'] = self.frame_location_series # Creating sample weights seed =1975 self.pop_mean_sample = self.df_sample.sample( frac=self.sample_rate, replace=True, weights='pop_ave', random_state = seed) self.sample_idx = np.array(self.pop_mean_sample.index.values) # ensuring that we get some values with zero population self.df_zero_sample = self.df_sample[self.df_sample['pop_ave']==0] self.pop_mean_sample_zero = self.df_zero_sample.sample( frac = self.sample_rate, replace = True, random_state = seed) self.sample_idx_zero = np.array(self.pop_mean_sample_zero.index.values) # combining with >0 sample self.sample_idx = np.concatenate([self.sample_idx, self.sample_idx_zero]) self.pop_output_data = np.concatenate([self.pop_mean_sample, self.pop_mean_sample_zero]).T[0] # shuffling so that we don't have the zero pop areas at end of sample p = np.random.permutation(len(self.sample_idx)) self.sample_idx = self.sample_idx[p] self.pop_output_data = np.array( self.pop_output_data[p]).reshape((len(self.pop_output_data),1))
def exec18(): #date 불러오기 dates = pd.read_excel('workingdays_201912.xlsx', converters={'영업일': str}) date1 = dates['영업일'] # date1 = ['2020.06.03'] ####테스트용 #code 불러오기 codes = pd.read_excel('코드리스트.xlsx', converters={'종목코드': str}) code = codes['종목코드'] # code = ['011790'] ####테스트용 #불러온 date 라인별로 실행 for date in date1: print(date) date = [date] input_list = list(itertools.product(date, code)) parmap.starmap(excute, input_list, pm_pbar=True)
def parallel_starmap(f, args, processes = 1): """ Wrapper function for 'parmap.starmap': Parallises the computations in 'starmap' form if required. If only one process is needed, computations are performed serially """ if processes == 1: return [f(*arg) for arg in args] return parmap.starmap(f, args, processes = processes)
def sampling(self): """ Constructs a weighted sample of images from the GeoDataFrame Returns: (array) sample_idx: index of sampled images Note: Keras uses the last x percent of data in cross validation Have to shuffle here to ensure that the last ten percent isn't just the southern most rows of information """ # Getting the sum of urban pixels for each patch self.pop_array = self.df_image['pop_density'].fillna(0) self.pop_array = np.array(self.pop_array).reshape( (self.nrows, self.ncols)) print 'extract patches' self.image_slicer(self.pop_array) print 'get locations for individual frames' pool = Pool(self.processes) cols_grid = pool.map(adder, self.indices[:, 0]) rows_grid = pool.map(adder, self.indices[:, 1]) print 'Max of cols grid after slicing:', max(cols_grid) print 'Max of rows grid after slicing:', max(rows_grid) self.frame_location_series = parmap.starmap(pixel_to_coordinates, zip(cols_grid, rows_grid), self.geotransform, processes=self.processes) print 'converting locations to Points' self.frame_location_series = \ pool.map(Point, self.frame_location_series) pop_count = np.array([np.mean(patch) for patch in self.patches]) self.df_sample = pd.DataFrame(pop_count, columns=['pop_ave']) # Getting the locations self.df_sample['location'] = self.frame_location_series # Creating sample weights seed = 1975 self.pop_mean_sample = self.df_sample.sample(frac=self.sample_rate, replace=True, weights='pop_ave', random_state=seed) self.sample_idx = np.array(self.pop_mean_sample.index.values) # ensuring that we get some values with zero population self.df_zero_sample = self.df_sample[self.df_sample['pop_ave'] == 0] self.pop_mean_sample_zero = self.df_zero_sample.sample( frac=self.sample_rate, replace=True, random_state=seed) self.sample_idx_zero = np.array(self.pop_mean_sample_zero.index.values) # combining with >0 sample self.sample_idx = np.concatenate( [self.sample_idx, self.sample_idx_zero]) self.pop_output_data = np.concatenate( [self.pop_mean_sample, self.pop_mean_sample_zero]).T[0] # shuffling so that we don't have the zero pop areas at end of sample p = np.random.permutation(len(self.sample_idx)) self.sample_idx = self.sample_idx[p] self.pop_output_data = np.array(self.pop_output_data[p]).reshape( (len(self.pop_output_data), 1))
def calculate_mean_utt_length(raw_wavs, sr): n_worker = int(cpu_count() * cpu_rate) logging.info( "{}% resources ({} cpu core(s)) will be used for mean utterance length calculation" .format(cpu_rate * 100, n_worker)) # return pool.starmap(_calculate_mean_utt_length, zip(raw_wavs, repeat(sr))) return parmap.starmap(_calculate_mean_utt_length, list(zip(raw_wavs, repeat(sr))), pm_processes=n_worker, pm_pbar=True)
def pre_landsat_batch(data_dir, out_dir, ref_raster): # Get a list of S2 L2A product directory names landsat_products = filter( re.compile(r'^L.*[0-9]$').search, os.listdir(data_dir)) for key in landsat_products: ldir = filter(re.compile('tif$').search, os.listdir(data_dir + key)) # Create directories in dest location if not os.path.exists(out_dir + key): os.makedirs(out_dir + key) # Put bands in list bands = list( map( lambda x: (data_dir + key + '/' + x, out_dir + key + '/ref_' + x.split('_')[-2] + x.split('_')[-1], ref_raster), ldir)) parmap.starmap(pre_process_landsat, bands)
def uncompress_files(data_dir, unzip_dir=None): """ Unzips every zipfile in the path, and stores in directory with zipfile name+.SAFE Args: eo_dir (str): directory where zipfiles are located unzip_dir (str): directory where files will be unzipped, default is data_dir """ # List all zip files in directory eo_zip_files = filter(re.compile('zip$').search, os.listdir(data_dir)) # List all tar files files in directory eo_tar_files = filter(re.compile('tar.gz$').search, os.listdir(data_dir)) # Check if a data folder exist if unzip_dir is None: unzip_direc = data_dir else: unzip_direc = unzip_dir if not os.path.exists(unzip_direc): os.makedirs(unzip_direc) print('New directory {} was created'.format(unzip_direc)) # Make sure uncompress path ends with slash #if unzip_direc[-1] != '/': # unzip_direc = unzip_direc + '/' # Put parameter sets in tuples eo_zip_files = list(map(lambda x: (unzip_direc, data_dir, x), eo_zip_files)) eo_tar_files = list(map(lambda x: (unzip_direc, data_dir, x), eo_tar_files)) parmap.starmap(unzip_eo, eo_zip_files) parmap.starmap(untar_eo, eo_tar_files)
def multiGMM(data_norm): num_cores = mp.cpu_count() if num_cores > math.floor(data_norm.shape[1]/2): num_cores=int(math.floor(data_norm.shape[1]/2)) # print(num_cores) ttt = np.array_split(data_norm,num_cores,axis=1) #print(ttt) tuples = [d for d in zip(ttt)] gmmDict_=parmap.starmap(gmm_model, tuples, pm_processes=num_cores, pm_pbar=True) gmmDict={} for i in np.arange(len(gmmDict_)): gmmDict.update(gmmDict_[i]) ## dict.update() add dict to dict return gmmDict
def calculate_speaking_rate(raw_wavs, texts, sr, dictionary): n_worker = int(cpu_count() * cpu_rate) logging.info( "{}% resources ({} cpu core(s)) will be used for speaking_rate calculation" .format(cpu_rate * 100, n_worker)) # rates = pool.starmap(_calculate_speaking_rate, zip(raw_wavs, texts, repeat(sr), repeat(dictionary))) rates = parmap.starmap(_calculate_speaking_rate, list( zip(raw_wavs, texts, repeat(sr), repeat(dictionary))), pm_processes=n_worker, pm_pbar=True) return rates
def sen2_batch (res, dir): # Creates a list of arguments based on number of files to run os.chdir(dir) datafiles = os.listdir(dir) slist = [] for files in datafiles: # checks for L1C folders checker1 = "L1C" if files[7:10] == checker1 or files[16:19] == checker1: slist.append((res, files)) checker2 = "L2A" # checks for unfinished L2A folders an deletes it if files[7:10] == checker2 or files[16:19] == checker2: each_folder_dir = str(os.listdir(dir)) + '/' + str(files) for subf in each_folder_dir: if len(subf) <= 8: shutil.rmtree(files, ignore_errors=True) #resolving the bug that shows error after deleting files if not os.path.isdir(dir): raise ValueError('working_directory: '+ dir + ' does not exist or is inaccesible. Your current working directory is '+os.getcwd()+'.') parmap.starmap(sen2_single, slist) #goes for optimal processing setup since GIPP parallelizing is set to AUTO dir_L1C = dir return dir_L1C
def find_optimal_beta(self, heuristics, X, feat_combos, ground): """ Returns optimal beta for given heuristics heuristics: list of pre-trained logistic regression models X: primitive matrix feat_combos: feature indices to apply heuristics to ground: ground truth associated with X data """ if f'{heuristics[0].__class__}' == "<class 'program_synthesis.synthesizer.dummyKneighborClassifier'>": ########single-core, gpu if self.cuda: beta_opt = [] # for i, hf in enumerate(heuristics): # X_.append(all_pair_dist_cuda(self.val_primitive_matrix[:, feat_combos[i]], X[:, feat_combos[i]], feat_combos[i])) # # marginals = parmap.map(heuristics[0].predict_proba, X_, self.val_ground) # # for i, hf in enumerate(heuristics): # beta_opt.append((self.beta_optimizer(marginals[i], ground))) for i, hf in enumerate(heuristics): X_ = all_pair_dist_cuda(cp.array(self.val_primitive_matrix[:, feat_combos[i]]), cp.array(X[:, feat_combos[i]]), feat_combos[i]) marginals = hf.predict_proba_cuda(X_, self.val_ground) beta_opt.append(self.beta_optimizer_cuda(marginals, ground)) self.betas.append = beta_opt[-1] else: #######with parmap # beta_opt = parmap.starmap(self.find_dist_and_proba_and_beta, list(zip(heuristics, feat_combos)), X, ground, # pm_pbar=True) # self.betas = beta_opt #######single-core, numba beta_opt = [] for i, hf in enumerate(heuristics): #print(i, end='\r') X_ = all_pair_dist(self.val_primitive_matrix[:, feat_combos[i]], X[:, feat_combos[i]], feat_combos[i]) marginals = hf.predict_proba(X_, self.val_ground) beta_opt.append(self.beta_optimizer(marginals, ground)) self.betas.append = beta_opt[-1] else: #######with parmap beta_opt = parmap.starmap(self.find_proba_and_beta, list(zip(heuristics, feat_combos)), X, ground, pm_pbar=True) self.betas = beta_opt return beta_opt
def cross_val_proba_score_parmap(estimator, X, y, scoring=multilabel_prec, scoring_arg1=1, scoring_arg2=5, n_splits=5): kf = KFold(n_splits=n_splits, shuffle=True) scores = parmap.starmap(parmap_wrap, kf.split(X), X, y, estimator, scoring=scoring, scoring_arg1=scoring_arg1, scoring_arg2=scoring_arg2) cv_score = np.asarray(scores).mean() return cv_score
def calc_silhouette_per_gene(genes=None, expr=None, dissim=None, examine_top=0.1, seed=-1, num_cores=8, bar=True): if genes is None or expr is None or dissim is None: sys.stderr.write("Need genes, expr, dissim\n") return if seed != -1 and seed >= 0: np.random.seed(seed) sys.stdout.write("Started 2 " + "\n") sil = [] ncell = expr.shape[1] ex = int((1.0 - examine_top) * 100.0) subexpr = np.array_split(expr, num_cores, axis=0) subgenes = np.array_split(np.array(genes), num_cores) subgenes = [i.tolist() for i in subgenes] print('number of cores : ' + str(num_cores)) dis_list = [] for i in range(num_cores): dis_list.append(dissim) assert len(subexpr) == len(subgenes) tuples = [(expr, ncell, genes, exa, dis) for expr, ncell, genes, exa, dis in zip( subexpr, repeat(ncell, num_cores), subgenes, repeat(examine_top, num_cores), dis_list)] results = parmap.starmap(process, tuples, pm_processes=num_cores, pm_pbar=bar) for i in np.arange(len(results)): sil += results[i] res = [] for ig, g in enumerate(genes): this_avg = sil[ig][1] this_sil = sil[ig][2] res.append((g, this_sil)) res.sort(key=itemgetter(1), reverse=True) return res
def apply_rotation_correction(SHAPE_ROTATION_RESULTS_PATH, APPLY_SHAPE_ROTATION_RESULTS_PATH): ''' > 2.4 SHAPE_ROTATION_RESULTS_PATH - source APPLY_SHAPE_ROTATION_RESULTS_PATH - export ''' import parmap ## read correction results df_corrections = pd.read_csv(SHAPE_ROTATION_RESULTS_PATH) #display(df_corrections.head()) #display(df_corrections.Rotate.value_counts()) # apply only once if not os.path.exists(APPLY_SHAPE_ROTATION_RESULTS_PATH): print(APPLY_SHAPE_ROTATION_RESULTS_PATH, ' does not exist -> apply rotation on all images') ''' apply rotation to ALL files tskes 2 min (threadded with setting below) ''' N_CPU = multiprocessing.cpu_count() - 10 args_1 = df_corrections.file.to_list() args_2 = df_corrections.Rotate.to_list() all_args = list(zip(args_1, args_2)) results = parmap.starmap(correct_img_rotation, all_args, pm_pbar=True, pm_parallel=True, pm_processes=N_CPU) pd.DataFrame(list(zip(args_1, args_2, results)), columns=['file', 'was_rotated', 'outcome' ]).to_csv(APPLY_SHAPE_ROTATION_RESULTS_PATH, index=False) else: print('already applied rotation correction to data!')
def main(): parser = argparse.ArgumentParser() parser.add_argument("-ann_dir", help="Annotation directory") parser.add_argument("-clueweb_dir", help="Clueweb directory") parser.add_argument("-output_dir", help="Output directory") parser.add_argument("-num_processes", help="Number of processes to run") args = parser.parse_args() num_processes = int(args.num_processes) # Iterate over each subdirectory in the clueweb dir for subdir, dirs, files in os.walk(args.clueweb_dir): for folder in dirs: ann_dir = os.path.join(args.ann_dir, folder) clueweb_dir = os.path.join(args.clueweb_dir, folder) output_dir = os.path.join(args.output_dir, folder) ann_iter = (sorted(os.listdir(ann_dir))) clueweb_iter = (sorted(os.listdir(clueweb_dir))) # Ann_dir and clueweb_dir sometimes have different number of files # Loop through, and create new ann list with correct files ann_list = [] for cw_file in clueweb_iter: for ann_file in ann_iter: # Split to only get filename, and not file extensions if cw_file.split(".")[0] == ann_file.split(".")[0]: ann_list.append(ann_file) kwargs = {'processes': num_processes} start = time.time() # Read and clean files in parallel results = parmap.starmap(read_and_clean_files, zip(clueweb_iter, ann_list), clueweb_dir, ann_dir, **kwargs) end = time.time() print "Time used reading and cleaning all files", end - start start = time.time() # Initiate indexer indexer = Indexer(output_dir) # Index all the cleaned records indexer.index_files(results) end = time.time() print "Time used indexing all files", end - start
def calculate_f0(raw_wavs, sr, frame_period=5, parallel=True): f0s = [] logging.info("Calculating f0 ...") if not parallel: for wav in raw_wavs: f0s.append(_calculate_f0(wav, sr, frame_period)) else: n_worker = int(cpu_count() * cpu_rate) logging.info( "{}% resources ({} cpu core(s)) will be used for f0 calculation". format(cpu_rate * 100, n_worker)) # f0s = pool.starmap(_calculate_f0, zip(raw_wavs, repeat(sr), repeat(frame_period))) f0s = parmap.starmap(_calculate_f0, list( zip(raw_wavs, repeat(sr), repeat(frame_period))), pm_processes=n_worker, pm_pbar=True) return f0s
def max_posterior(gmm, U, coords, covar=None): import multiprocessing, parmap pool = multiprocessing.Pool() n_chunks, chunksize = gmm._mp_chunksize() log_p = [[] for k in range(gmm.K)] log_S = np.zeros(len(coords)) H = np.zeros(len(coords), dtype="bool") k = 0 for log_p[k], U[k], _ in \ parmap.starmap(pygmmis._Estep, zip(range(gmm.K), U), gmm, data, covar, None, pool=pool, pm_chunksize=chunksize): log_S[U[k]] += np.exp(log_p[k]) # actually S, not logS H[U[k]] = 1 k += 1 log_S[H] = np.log(log_S[H]) max_q = np.zeros(len(coords)) max_k = np.zeros(len(coords), dtype='uint32') for k in range(gmm.K): q_k = np.exp(log_p[k] - log_S[U[k]]) max_k[U[k]] = np.where(max_q[U[k]] < q_k, k, max_k[U[k]]) max_q[U[k]] = np.maximum(max_q[U[k]],q_k) return max_k
def parallel(self, names, mg, nprocs=1, **kwargs): """Calling mapper using multiple processors. Args: names -- NameFile instance with iter_names() method mg -- MetaFile instance with readFile attrib nprocs -- number of parallel calls kwargs -- passed to mapper call Return: refSamFile attrib set for each name in names """ # making list of tuples (indexFile, readFile) lt = [(name.get_indexFile(), mg.get_readFile()) for name in names.iter_names()] # altering function kwargs new_mapper = partial(self, **kwargs) # calling mapper samFiles = parmap.starmap(new_mapper, lt, processes=nprocs) # adding samFile attrib to name instances for i,name in enumerate(names.iter_names()): name.set_refSamFile(samFiles[i])
def test_warn_wrong_argument_starmap(self): with warnings.catch_warnings(record=True) as w: parmap.starmap(range, [(0,2), (2,5)], processes=-3) assert len(w) == 1
def training_and_testing(ARGS): # Check conditions if ARGS['list_columns']: list_columns = list(sorted(ARGS['list_columns'])) if not ARGS['list_columns']: list_columns = [ 'CADD_phred', 'SIFTval', 'VEST4_score', 'gnomAD_exomes_AF' ] if ARGS['flag']: flag = list(sorted(ARGS['flag'])) if not ARGS['flag']: flag = [ "REVEL_score", "ClinPred_score", "M-CAP_score", "fathmm-XF_coding_score", "Eigen-raw_coding", "PrimateAI_score", ] if not os.path.exists(ARGS['output_dir'] + '/TRAIN/training.csv.gz') or not os.path.exists( ARGS['output_dir'] + '/TEST/testing.csv.gz'): logger.warn( '--train_and_test mode selected but training and testing file not found, creation with following parameters :' '--ratio : ' + str(ARGS['ratio']) + ', --proportion : ' + str(ARGS['proportion'])) ARGS['force_datasets'] = True if os.path.exists(ARGS['output_dir'] + '/TRAIN/training.csv.gz') or os.path.exists( ARGS['output_dir'] + '/TEST/testing.csv.gz'): logger.info('Training and testing file found') if ARGS['combinatory'] is True: pass # if enable, erase previously generated training and testing file from a global dataframe to creating other ones if ARGS['force_datasets'] is True: utils.mkdir(ARGS['output_dir']) utils.mkdir(ARGS['output_dir'] + '/TRAIN') utils.mkdir(ARGS['output_dir'] + '/TEST') logger.warn('Creating new files or overwriting old ones') prop = ARGS['proportion'] t = float(round(prop / (1 - prop), 2)) ratio = ARGS['ratio'] tmp = pd.read_csv(filepath_or_buffer=ARGS['input'], sep='\t', compression='gzip', encoding='utf-8', low_memory=False) if list_columns and flag: # Selection of specific columns to be used from a global dataframe # Example : df with 10 columns, --list_columns column1 column2 column5 tmp = select_columns_pandas.select_columns_pandas( tmp, list_columns, flag) logger.info(tmp) # Use of input parameters to build training and testing dataframes (proportion, ratio of data between train and test) # Special attention is paid to remove overlap between evaluation|test sets and training dataset to prevent any overfitting complete_data_path = tmp.loc[tmp['True_Label'] == 1] complete_data_path = complete_data_path.sample(frac=1) complete_data_begn = tmp.loc[tmp['True_Label'] == -1] complete_data_begn = complete_data_begn.sample(frac=1) max_size = max(complete_data_path.shape[0], complete_data_begn.shape[0]) min_size = min(complete_data_path.shape[0], complete_data_begn.shape[0]) if max_size > (t * min_size): max_size = min_size * t elif max_size < (t * min_size): min_size = max_size / t if min_size < 1000 and min(complete_data_path.shape[0], complete_data_begn.shape[0]) == \ complete_data_path.shape[0]: logger.warn( 'CAREFUL : Size of the pathogenic dataset will be < 1000 samples' ) eval_test_size = ratio train_path = complete_data_path.head( n=int(round(min_size * (1 - eval_test_size)))) train_begn = complete_data_begn.head( n=int(round(max_size * (1 - eval_test_size)))) eval_path = complete_data_path.tail( n=int(round(min_size * eval_test_size))) eval_begn = complete_data_begn.tail( n=int(round(min_size * eval_test_size))) eval_path.dropna(inplace=True) eval_begn.dropna(inplace=True) complete_training = pd.concat([train_path, train_begn ]).drop_duplicates(keep='first') complete_training = complete_training[complete_training.columns.drop( list(complete_training.filter(regex='pred|flag')))] complete_training.dropna(inplace=True) # Some stats on Pathogenic and Benign variant numbers in both training and testing dataframes logger.info('Training - Path : ' + str(complete_training[ complete_training['True_Label'] == 1].shape[0])) logger.info('Training - Benign : ' + str(complete_training[ complete_training['True_Label'] == -1].shape[0])) min_size_eval = min(eval_path.shape[0], eval_begn.shape[0]) complete_eval = pd.concat([ eval_path.sample(frac=1).head(min_size_eval), eval_begn.sample(frac=1).head(min_size_eval) ]).drop_duplicates(keep='first') logger.info( 'Testing - Path : ' + str(complete_eval[complete_eval['True_Label'] == 1].shape[0])) logger.info( 'Testing - Benign : ' + str(complete_eval[complete_eval['True_Label'] == -1].shape[0])) # Dumping data complete_training.to_csv(path_or_buf=ARGS['output_dir'] + '/TRAIN/training.csv.gz', sep='\t', compression='gzip', encoding='utf-8', index=False) complete_eval.to_csv(path_or_buf=ARGS['output_dir'] + '/TEST/testing.csv.gz', sep='\t', compression='gzip', encoding='utf-8', index=False) check_dir_train = False if os.path.isdir(ARGS['output_dir'] + '/TRAIN/Models'): check_dir_train = True if (ARGS['force_training'] is True) or (check_dir_train is False): # Training model # TrainingClassification(input_data=ARGS['output_dir'] + '/TRAIN/training.csv.gz', # classifiers=classifiers, # standardize=ARGS['standardize'], # output=ARGS["output_dir"], # logger=logger, # cv=ARGS['cross_validation'] # ) TestingClassification(input_data=ARGS['output_dir'] + '/TEST/testing.csv.gz', standardize=ARGS['standardize'], output_dir=ARGS["output_dir"], model_dir=ARGS['model'], logger=logger, threshold=ARGS['threshold']) # Generation of a histogram to see most important features used in builded model # histo_weights.histo_and_metrics(folder=ARGS['output_dir'], logger=logger) # This parameter, if enabled, will build all possible combinations from a single dataframe if sources are mentionned # Example : A global dataframe based on 3 databases (2 pathogenic : Clinvar and HGMD and 1 benign : gnomAD) was generated # The following lines will generate 2 evaluation sets : (clinvar|gnomAD) and (HGMD|gnomAD) with various MAF thresholds (<0.01, <0.001, 0.0001, AC=1(singleton), AF=0) # and each of these combinations will be tested with the previously generated outputs. (Overlapping is checked between these combinations and training dataset) if ARGS['eval'] and ARGS['eval'].endswith('.csv.gz'): # TODO : CHANGE NAME print('\n\n') logger.info('--BUILDING & TESTING ON EVALUATION SETS--') output_dir = ARGS['output_dir'] eval_output_dir = output_dir eval_output_dir = eval_output_dir.split('/') eval_output_dir[-1] = 'EVALUATION_SETS_' + eval_output_dir[-1] eval_output_dir = "/".join(eval_output_dir) if os.path.isdir(eval_output_dir): pass else: utils.mkdir(eval_output_dir) # if ARGS['list_columns'] and ARGS['flag']: combination_pandas.combination_pandas( ARGS['eval'], output_dir + '/TRAIN/training.csv.gz', eval_output_dir, logger, list_columns, flag, CV=ARGS['cross_validation_evaluation']) # else: # combination_pandas.combination_pandas(ARGS['eval'], ARGS['output_dir'] + '/TRAIN/training.csv.gz', output_dir, CV=ARGS['cross_validation_evaluation']) l_dir = os.listdir(eval_output_dir) print(list(zip(l_dir))) parmap.starmap(test_eval_mp, list(zip(l_dir)), pm_pbar=True, pm_processes=ARGS['threads']) # Plots are automatically generated to visualize performance across various scenario for the different combinations print('\n\n') logger.info('--GENERATING PLOTS & STATS--') utils.mkdir(eval_output_dir + '/PLOTS_AND_MEAN_TABLE') # maf_plot.violin_plot_scores(eval_output_dir, logger) # maf_plot.maf_plot_maf_0(eval_output_dir, ARGS['cross_validation_evaluation'], logger) maf_plot.maf_plot_others(eval_output_dir, ARGS['cross_validation_evaluation'], logger)
dt = [0.2] N = [1] sigma = [0.5, 1, 2] mu = [0] corr_time = [1000] repetitions = 1000 n_tau = 10 tau_list = [np.linspace(1, 20, n_tau)] # seed_list = np.arange(n_tau * repetitions).reshape((repetitions, n_tau)) seed_list = np.arange(repetitions) values = list(product([H], tau_list, dt, N, mu, sigma, corr_time, seed_list)) results = parmap.starmap(dd_wrapper, values, pm_chunksize=3, pm_pbar=True) results = np.array(results) print(results.shape) # Adapt results to input results = results.reshape((3, repetitions, n_tau)) results_mean = results.mean(axis=-2) results_std = results.std(axis=-2) / np.sqrt(repetitions - 1) plt.errorbar(tau_list[0], results_mean[0, :], results_std[0, :]) plt.errorbar(tau_list[0], results_mean[1, :], results_std[1, :]) plt.errorbar(tau_list[0], results_mean[2, :], results_std[1, :]) plt # plt.errorbar(tau_list[0], results_mean, results_std)
def test_starmap(self): items = [(1, 2), (3, 4), (5, 6)] pfalse = parmap.starmap(_identity, items, 5, 6, pm_parallel=False) ptrue = parmap.starmap(_identity, items, 5, 6, pm_parallel=True) self.assertEqual(pfalse, ptrue)
def main(): """ Demostration Options """ logging.basicConfig(level = logging.DEBUG) # If using parallel functionality, you must call this to set the appropriate # logging level gp.classifier.set_multiclass_logging_level(logging.DEBUG) # np.random.seed(50) # Feature Generation Parameters and Demonstration Options SAVE_OUTPUTS = True # We don't want to make files everywhere for a demo. SHOW_RAW_BINARY = True test_range_min = -2.0 test_range_max = +2.0 test_ranges = (test_range_min, test_range_max) n_train = 100 n_query = 1000 n_dims = 2 # <- Must be 2 for vis n_cores = None # number of cores for multi-class (None -> default: c-1) walltime = 300.0 approxmethod = 'laplace' # 'laplace' or 'pls' multimethod = 'OVA' # 'AVA' or 'OVA', ignored for binary problem fusemethod = 'EXCLUSION' # 'MODE' or 'EXCLUSION', ignored for binary responsename = 'probit' # 'probit' or 'logistic' batch_start = False entropy_threshold = None n_draws = 6 n_draws_est = 2500 rows_subplot = 2 cols_subplot = 3 assert rows_subplot * cols_subplot >= n_draws # Decision boundaries db1 = lambda x1, x2: (((x1 - 1)**2 + x2**2/4) * (0.9*(x1 + 1)**2 + x2**2/2) < 1.6) & \ ((x1 + x2) < 1.5) db2 = lambda x1, x2: (((x1 - 1)**2 + x2**2/4) * (0.9*(x1 + 1)**2 + x2**2/2) > 0.3) db3 = lambda x1, x2: ((x1 + x2) < 2) & ((x1 + x2) > -2.2) db4 = lambda x1, x2: ((x1 - 0.75)**2 + (x2 + 0.8)**2 > 0.3**2) db5 = lambda x1, x2: ((x1/2)**2 + x2**2 > 0.3) db6 = lambda x1, x2: (((x1)/8)**2 + (x2 + 1.5)**2 > 0.2**2) db7 = lambda x1, x2: (((x1)/8)**2 + ((x2 - 1.4)/1.25)**2 > 0.2**2) db4a = lambda x1, x2: ((x1 - 1.25)**2 + (x2 - 1.25)**2 > 0.5**2) & ((x1 - 0.75)**2 + (x2 + 1.2)**2 > 0.6**2) & ((x1 + 0.75)**2 + (x2 + 1.2)**2 > 0.3**2) & ((x1 + 1.3)**2 + (x2 - 1.3)**2 > 0.4**2) db5a = lambda x1, x2: ((x1/2)**2 + x2**2 > 0.3) & (x1 > 0) db5b = lambda x1, x2: ((x1/2)**2 + x2**2 > 0.3) & (x1 < 0) & ((x1 + 0.75)**2 + (x2 - 1.2)**2 > 0.6**2) db1a = lambda x1, x2: (((x1 - 1)**2 + x2**2/4) * (0.9*(x1 + 1)**2 + x2**2/2) < 1.6) & \ ((x1 + x2) < 1.6) | ((x1 + 0.75)**2 + (x2 + 1.2)**2 < 0.6**2) db1b = lambda x1, x2: (((x1 - 1)**2 + x2**2/4) * (0.9*(x1 + 1)**2 + x2**2/2) < 1.6) & ((x1/2)**2 + (x2)**2 > 0.4**2) & \ ((x1 + x2) < 1.5) | ((x1 + 0.75)**2 + (x2 - 1.5)**2 < 0.4**2) | ((x1 + x2) > 2.25) & (x1 < 1.75) & (x2 < 1.75) # | (((x1 + 0.25)/4)**2 + (x2 + 1.5)**2 < 0.32**2) # & (((x1 + 0.25)/4)**2 + (x2 + 1.5)**2 > 0.18**2) db1c = lambda x1, x2: (((x1 - 1)**2 + x2**2/4) * (0.9*(x1 + 1)**2 + x2**2/2) < 1.6) & ((x1/2)**2 + (x2)**2 > 0.4**2) & \ ((x1 + x2) < 1.5) | ((x1 + 0.75)**2 + (x2 - 1.5)**2 < 0.4**2) | ((x1 + x2) > 2.25) & (x1 < 1.75) & (x2 < 1.75) | (((x1 + 0.25)/4)**2 + (x2 + 1.75)**2 < 0.32**2) & (((x1 + 0.25)/4)**2 + (x2 + 1.75)**2 > 0.18**2) db8 = lambda x1, x2: (np.sin(2*x1 + 3*x2) > 0) | (((x1 - 1)**2 + x2**2/4) * (0.9*(x1 + 1)**2 + x2**2/2) < 1.4) & \ ((x1 + x2) < 1.5) | (x1 < -1.9) | (x1 > +1.9) | (x2 < -1.9) | (x2 > +1.9) | ((x1 + 0.75)**2 + (x2 - 1.5)**2 < 0.3**2) # db9 = lambda x1, x2: ((x1)**2 + (x2)**2 < 0.3**2) | ((x1)**2 + (x2)**2 > 0.5**2) | decision_boundary = [db5b, db1c, db4a] # [db5b, db1c, db4a, db8, db6, db7] """ Data Generation """ # # # Training Points # shrink = 0.8 # test_range_min *= shrink # test_range_max *= shrink # X1 = np.random.normal(loc = np.array([test_range_min, test_range_min]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X2 = np.random.normal(loc = np.array([test_range_min, test_range_max]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X3 = np.random.normal(loc = np.array([test_range_max, test_range_min]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X4 = np.random.normal(loc = np.array([test_range_max, test_range_max]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X5 = np.random.normal(loc = np.array([0, test_range_min]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X6 = np.random.normal(loc = np.array([test_range_min, 0]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X7 = np.random.normal(loc = np.array([test_range_max, 0]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # X8 = np.random.normal(loc = np.array([0, test_range_max]), scale = 0.9*np.ones(n_dims), size = (int(n_train/8), n_dims)) # test_range_min /= shrink # test_range_max /= shrink # X = np.concatenate((X1, X2, X3, X4, X5, X6, X7, X8), axis = 0) # X = np.random.uniform(test_range_min, test_range_max, # size = (n_train, n_dims)) X_s = np.array([[0.0, 0.0], [-0.2, 0.3], [-0.1, -0.1], [0.05, 0.25], [-1.1, 0.0], [-0.5, 0.0], [-0.4, -0.7], [-0.1, -0.1], [test_range_min, test_range_min], [test_range_min, test_range_max], [test_range_max, test_range_max], [test_range_max, test_range_min]]) X_f = np.array([[1.4, 1.6], [1.8, 1.2], [-1.24, 1.72], [-1.56, -1.9], [-1.9, 1.0], [-0.5, -1.2], [-1.4, -1.9], [0.4, -1.2], [test_range_min, test_range_max], [test_range_max, test_range_max], [test_range_max, test_range_min], [test_range_min, test_range_min]]) n_track = 25 X_s = np.random.uniform(test_range_min, test_range_max, size = (n_track, n_dims)) X_f = test_range_max * np.random.standard_cauchy(n_track * n_dims).reshape(n_track, n_dims) X_f = np.random.uniform(test_range_min, test_range_max, size = (n_track, n_dims)) X = generate_line_paths(X_s, X_f, n_points = 15) x1 = X[:, 0] x2 = X[:, 1] # Query Points Xq = np.random.uniform(test_range_min, test_range_max, size = (n_query, n_dims)) xq1 = Xq[:, 0] xq2 = Xq[:, 1] n_train = X.shape[0] n_query = Xq.shape[0] logging.info('Training Points: %d' % n_train) # Training Labels y = gp.classifier.utils.make_decision(X, decision_boundary) y_unique = np.unique(y) assert y_unique.dtype == int if y_unique.shape[0] == 2: mycmap = cm.get_cmap(name = 'bone', lut = None) mycmap2 = cm.get_cmap(name = 'BrBG', lut = None) else: mycmap = cm.get_cmap(name = 'gist_rainbow', lut = None) mycmap2 = cm.get_cmap(name = 'gist_rainbow', lut = None) """ Classifier Training """ # Training fig = plt.figure() gp.classifier.utils.visualise_decision_boundary( test_range_min, test_range_max, decision_boundary) plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.title('Training Labels') plt.xlabel('x1') plt.ylabel('x2') cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) plt.gca().patch.set_facecolor('gray') print('Plotted Training Set') plt.show() # Training print('===Begin Classifier Training===') optimiser_config = gp.OptConfig() optimiser_config.sigma = gp.auto_range(kerneldef) optimiser_config.walltime = walltime # User can choose to batch start each binary classifier with different # initial hyperparameters for faster training if batch_start: if y_unique.shape[0] == 2: initial_hyperparams = [100, 0.1, 0.1] elif multimethod == 'OVA': initial_hyperparams = [ [356.468, 0.762, 0.530], \ [356.556, 0.836, 0.763], \ [472.006, 1.648, 1.550], \ [239.720, 1.307, 0.721] ] elif multimethod == 'AVA': initial_hyperparams = [ [14.9670, 0.547, 0.402], \ [251.979, 1.583, 1.318], \ [420.376, 1.452, 0.750], \ [780.641, 1.397, 1.682], \ [490.353, 2.299, 1.526], \ [73.999, 1.584, 0.954]] else: raise ValueError batch_config = gp.batch_start(optimiser_config, initial_hyperparams) else: batch_config = optimiser_config # Obtain the response function responsefunction = gp.classifier.responses.get(responsename) # Train the classifier! learned_classifier = gp.classifier.learn(X, y, kerneldef, responsefunction, batch_config, multimethod = multimethod, approxmethod = approxmethod, train = True, ftol = 1e-6, processes = n_cores) # Print learned kernels print_function = gp.describer(kerneldef) gp.classifier.utils.print_learned_kernels(print_function, learned_classifier, y_unique) # Print the matrix of learned classifier hyperparameters logging.info('Matrix of learned hyperparameters') gp.classifier.utils.print_hyperparam_matrix(learned_classifier) """ Classifier Prediction """ # Prediction yq_prob = gp.classifier.predict(Xq, learned_classifier, fusemethod = fusemethod) yq_pred = gp.classifier.classify(yq_prob, y) yq_entropy = gp.classifier.entropy(yq_prob) logging.info('Caching Predictor...') predictors = gp.classifier.query(learned_classifier, Xq) logging.info('Computing Expectance...') yq_exp_list = gp.classifier.expectance(learned_classifier, predictors) logging.info('Computing Covariance...') yq_cov_list = gp.classifier.covariance(learned_classifier, predictors) logging.info('Drawing from GP...') yq_draws = gp.classifier.draws(n_draws, yq_exp_list, yq_cov_list, learned_classifier) logging.info('Computing Linearised Entropy...') yq_linearised_entropy = gp.classifier.linearised_entropy( yq_exp_list, yq_cov_list, learned_classifier) logging.info('Linearised Entropy is {0}'.format(yq_linearised_entropy)) """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" THE GAP BETWEEN ANALYSIS AND PLOTS """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" """ Classifier Prediction Results (Plots) """ logging.info('Plotting... please wait') Xq_plt = gp.classifier.utils.query_map(test_ranges, n_points = 250) yq_truth_plt = gp.classifier.utils.make_decision(Xq_plt, decision_boundary) """ Plot: Ground Truth """ # Training fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map(yq_truth_plt, test_ranges, cmap = mycmap) plt.title('Ground Truth') plt.xlabel('x1') plt.ylabel('x2') cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) gp.classifier.utils.visualise_decision_boundary( test_range_min, test_range_max, decision_boundary) logging.info('Plotted Prediction Labels') """ Plot: Training Set """ # Training fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_decision_boundary( test_range_min, test_range_max, decision_boundary) plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.title('Training Labels') plt.xlabel('x1') plt.ylabel('x2') cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) plt.gca().patch.set_facecolor('gray') logging.info('Plotted Training Set') """ Plot: Query Computations """ # Compute Linearised and True Entropy for plotting logging.info('Plot: Caching Predictor...') predictor_plt = gp.classifier.query(learned_classifier, Xq_plt) logging.info('Plot: Computing Expectance...') expectance_latent_plt = \ gp.classifier.expectance(learned_classifier, predictor_plt) logging.info('Plot: Computing Variance...') variance_latent_plt = \ gp.classifier.variance(learned_classifier, predictor_plt) logging.info('Plot: Computing Linearised Entropy...') entropy_linearised_plt = gp.classifier.linearised_entropy( expectance_latent_plt, variance_latent_plt, learned_classifier) logging.info('Plot: Computing Equivalent Standard Deviation') eq_sd_plt = gp.classifier.equivalent_standard_deviation( entropy_linearised_plt) logging.info('Plot: Computing Prediction Probabilities...') yq_prob_plt = gp.classifier.predict_from_latent( expectance_latent_plt, variance_latent_plt, learned_classifier, fusemethod = fusemethod) logging.info('Plot: Computing True Entropy...') yq_entropy_plt = gp.classifier.entropy(yq_prob_plt) logging.info('Plot: Computing Class Predicitons') yq_pred_plt = gp.classifier.classify(yq_prob_plt, y_unique) if isinstance(learned_classifier, list): logging.info('Plot: Computing Naive Linearised Entropy...') args = [(expectance_latent_plt[i], variance_latent_plt[i], learned_classifier[i]) for i in range(len(learned_classifier))] entropy_linearised_naive_plt = \ np.array(parmap.starmap(gp.classifier.linearised_entropy, args)).sum(axis = 0) Xq_meas = gp.classifier.utils.query_map(test_ranges, n_points = 10) predictor_meas = gp.classifier.query(learned_classifier, Xq_meas) exp_meas = gp.classifier.expectance(learned_classifier, predictor_meas) cov_meas = gp.classifier.covariance(learned_classifier, predictor_meas) logging.info('Objective Measure: Computing Joint Linearised Entropy...') entropy_linearised_meas = gp.classifier.linearised_entropy( exp_meas, cov_meas, learned_classifier) logging.info('Objective Measure: Computing Monte Carlo Joint Entropy...') # start_time = time.clock() # entropy_monte_carlo_meas = gp.classifier.monte_carlo_joint_entropy(exp_meas, cov_meas, learned_classifier, n_draws = n_draws_est) # logging.info('Sampling took %.4f seconds' % (time.clock() - start_time)) entropy_linearised_mean_meas = entropy_linearised_plt.mean() entropy_true_mean_meas = yq_entropy_plt.mean() mistake_ratio = (yq_truth_plt - yq_pred_plt).nonzero()[0].shape[0] / yq_truth_plt.shape[0] if isinstance(learned_classifier, list) & False: """ Plot: Latent Function Expectance """ for i in range(len(expectance_latent_plt)): fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map( expectance_latent_plt[i], test_ranges, levels = [0.0], vmin = -np.max(np.abs(expectance_latent_plt[i])), vmax = np.max(np.abs(expectance_latent_plt[i])), cmap = cm.coolwarm) plt.title('Latent Funtion Expectance %s' % gp.classifier.utils.binary_classifier_name( learned_classifier[i], y_unique)) plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Latent Function Expectance on Training Set') """ Plot: Latent Function Variance """ for i in range(len(variance_latent_plt)): fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map( variance_latent_plt[i], test_ranges, cmap = cm.coolwarm) plt.title('Latent Funtion Variance %s' % gp.classifier.utils.binary_classifier_name( learned_classifier[i], y_unique)) plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Latent Function Variance on Training Set') """ Plot: Prediction Probabilities """ for i in range(len(yq_prob_plt)): fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map(yq_prob_plt[i], test_ranges, levels = [0.5], cmap = cm.coolwarm) plt.title('Prediction Probabilities (Class %d)' % y_unique[i]) plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Prediction Probabilities on Training Set') """ Plot: Prediction Labels """ # Query (Prediction Map) fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map(yq_pred_plt, test_ranges, boundaries = True, cmap = mycmap) plt.title('Prediction [Miss Ratio: %.3f %s]' % (100 * mistake_ratio, '%')) plt.xlabel('x1') plt.ylabel('x2') cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) logging.info('Plotted Prediction Labels') """ Plot: Prediction Entropy onto Training Set """ # Query (Prediction Entropy) fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map(yq_entropy_plt, test_ranges, threshold = entropy_threshold, cmap = cm.coolwarm) plt.title('Prediction Entropy [ACE = %.4f]' % (entropy_true_mean_meas)) plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Prediction Entropy on Training Set') """ Plot: Linearised Prediction Entropy onto Training Set """ # Query (Linearised Entropy) fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map(entropy_linearised_plt, test_ranges, threshold = entropy_threshold, cmap = cm.coolwarm) plt.title('Linearised Prediction Entropy [FLE = %.4f, ALE = %.4f]' % (entropy_linearised_meas, entropy_linearised_mean_meas)) plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Linearised Prediction Entropy on Training Set') """ Plot: Exponentiated Linearised Prediction Entropy onto Training Set """ # Query (Linearised Entropy) fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map(eq_sd_plt, test_ranges, threshold = entropy_threshold, cmap = cm.coolwarm) plt.title('Equivalent standard deviation') plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Exponentiated Linearised Prediction Entropy (Equivalent Standard Deviation) on Training Set') """ Plot: Naive Linearised Prediction Entropy onto Training Set """ if isinstance(learned_classifier, list): # Query (Naive Linearised Entropy) fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_map( entropy_linearised_naive_plt, test_ranges, threshold = entropy_threshold, cmap = cm.coolwarm) plt.title('Naive Linearised Prediction Entropy') plt.xlabel('x1') plt.ylabel('x2') plt.colorbar() plt.scatter(x1, x2, c = y, marker = 'x', cmap = mycmap) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) logging.info('Plotted Naive Linearised Prediction Entropy on Training Set') """ Plot: Sample Query Predictions """ # Visualise Predictions fig = plt.figure(figsize = (15, 15)) gp.classifier.utils.visualise_decision_boundary( test_range_min, test_range_max, decision_boundary) plt.scatter(xq1, xq2, c = yq_pred, marker = 'x', cmap = mycmap) plt.title('Predicted Query Labels') plt.xlabel('x1') plt.ylabel('x2') cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) plt.gca().patch.set_facecolor('gray') logging.info('Plotted Sample Query Labels') """ Plot: Sample Query Draws """ # Visualise Predictions fig = plt.figure(figsize = (19.2, 10.8)) for i in range(n_draws): plt.subplot(rows_subplot, cols_subplot, i + 1) gp.classifier.utils.visualise_decision_boundary( test_range_min, test_range_max, decision_boundary) plt.scatter(xq1, xq2, c = yq_draws[i], marker = 'x', cmap = mycmap) plt.title('Query Label Draws') plt.xlabel('x1') plt.ylabel('x2') cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) plt.gca().patch.set_facecolor('gray') logging.info('Plotted Sample Query Draws') """ Save Outputs """ # Save all the figures if SAVE_OUTPUTS: save_directory = "response_%s_approxmethod_%s" \ "_training_%d_query_%d_walltime_%d" \ "_method_%s_fusemethod_%s/" \ % ( responsename, approxmethod, n_train, n_query, walltime, multimethod, fusemethod) full_directory = gp.classifier.utils.create_directories( save_directory, home_directory = 'Figures/', append_time = True) gp.classifier.utils.save_all_figures(full_directory) shutil.copy2('./receding_horizon_path_planning.py', full_directory) logging.info('Modeling Done') """ Path Planning """ """ Setup Path Planning """ xq_now = np.array([[0., 0.]]) horizon = (test_range_max - test_range_min) + 0.5 n_steps = 30 theta_bound = np.deg2rad(30) theta_add_init = -np.deg2rad(10) * np.ones(n_steps) theta_add_init[0] = np.deg2rad(180) theta_add_low = -theta_bound * np.ones(n_steps) theta_add_high = theta_bound * np.ones(n_steps) theta_add_low[0] = 0.0 theta_add_high[0] = 2 * np.pi r = horizon/n_steps choice_walltime = 1500.0 xtol_rel = 1e-2 ftol_rel = 1e-4 k_step = 1 """ Initialise Values """ # The observed data till now X_now = X.copy() y_now = y.copy() # Observe the current location yq_now = gp.classifier.utils.make_decision(xq_now[[-1]], decision_boundary) # Add the observed data to the training set X_now = np.concatenate((X_now, xq_now[[-1]]), axis = 0) y_now = np.append(y_now, yq_now) # Add the new location to the array of travelled coordinates xq1_nows = xq_now[:, 0] xq2_nows = xq_now[:, 1] yq_nows = yq_now.copy() # Plot the current situation fig1 = plt.figure(figsize = (15, 15)) fig2 = plt.figure(figsize = (15, 15)) fig3 = plt.figure(figsize = (15, 15)) fig4 = plt.figure(figsize = (15, 15)) fig5 = plt.figure(figsize = (20, 20)) # Start exploring i_trials = 0 n_trials = 2000 entropy_linearised_array = np.nan * np.ones(n_trials) # entropy_monte_carlo_array = np.nan * np.ones(n_trials) entropy_linearised_mean_array = np.nan * np.ones(n_trials) entropy_true_mean_array = np.nan * np.ones(n_trials) entropy_opt_array = np.nan * np.ones(n_trials) mistake_ratio_array = np.nan * np.ones(n_trials) m_step = 0 while i_trials < n_trials: """ Path Planning """ print(m_step) print(k_step) if m_step <= k_step: # Propose a place to observe xq_abs_opt, theta_add_opt, entropy_opt = \ go_optimised_path(theta_add_init, xq_now[-1], r, learned_classifier, test_ranges, theta_add_low = theta_add_low, theta_add_high = theta_add_high, walltime = choice_walltime, xtol_rel = xtol_rel, ftol_rel = ftol_rel, globalopt = False, objective = 'LE', n_draws = n_draws_est) logging.info('Optimal Joint Entropy: %.5f' % entropy_opt) m_step = keep_going_until_surprise(xq_abs_opt, learned_classifier, decision_boundary) logging.info('Taking %d steps' % m_step) else: m_step -= 1 theta_add_opt = theta_add_init.copy() xq_abs_opt = forward_path_model(theta_add_init, r, xq_now[-1]) logging.info('%d steps left' % m_step) xq_now = xq_abs_opt[:k_step] theta_add_init = initiate_with_continuity(theta_add_opt, k_step = k_step) np.clip(theta_add_init, theta_add_low + 1e-4, theta_add_high - 1e-4, out = theta_add_init) # Observe the current location yq_now = gp.classifier.utils.make_decision(xq_now, decision_boundary) # Add the observed data to the training set X_now = np.concatenate((X_now, xq_now), axis = 0) y_now = np.append(y_now, yq_now) # Add the new location to the array of travelled coordinates xq1_nows = np.append(xq1_nows, xq_now[:, 0]) xq2_nows = np.append(xq2_nows, xq_now[:, 1]) yq_nows = np.append(yq_nows, yq_now) # Update that into the model logging.info('Learning Classifier...') batch_config = \ gp.classifier.batch_start(optimiser_config, learned_classifier) try: learned_classifier = gp.classifier.learn(X_now, y_now, kerneldef, responsefunction, batch_config, multimethod = multimethod, approxmethod = approxmethod, train = True, ftol = 1e-6, processes = n_cores) except Exception as e: logging.warning(e) try: learned_classifier = gp.classifier.learn(X_now, y_now, kerneldef, responsefunction, batch_config, multimethod = multimethod, approxmethod = approxmethod, train = False, ftol = 1e-6, processes = n_cores) except Exception as e: logging.warning(e) pass logging.info('Finished Learning') # This is the finite horizon optimal route xq1_proposed = xq_abs_opt[:, 0][k_step:] xq2_proposed = xq_abs_opt[:, 1][k_step:] yq_proposed = gp.classifier.classify(gp.classifier.predict(xq_abs_opt, learned_classifier), y_unique)[k_step:] """ Computing Analysis Maps """ # Compute Linearised and True Entropy for plotting logging.info('Plot: Caching Predictor...') predictor_plt = gp.classifier.query(learned_classifier, Xq_plt) logging.info('Plot: Computing Expectance...') expectance_latent_plt = \ gp.classifier.expectance(learned_classifier, predictor_plt) logging.info('Plot: Computing Variance...') variance_latent_plt = \ gp.classifier.variance(learned_classifier, predictor_plt) logging.info('Plot: Computing Linearised Entropy...') entropy_linearised_plt = gp.classifier.linearised_entropy( expectance_latent_plt, variance_latent_plt, learned_classifier) logging.info('Plot: Computing Equivalent Standard Deviation') eq_sd_plt = gp.classifier.equivalent_standard_deviation( entropy_linearised_plt) logging.info('Plot: Computing Prediction Probabilities...') yq_prob_plt = gp.classifier.predict_from_latent( expectance_latent_plt, variance_latent_plt, learned_classifier, fusemethod = fusemethod) logging.info('Plot: Computing True Entropy...') yq_entropy_plt = gp.classifier.entropy(yq_prob_plt) logging.info('Plot: Computing Class Predicitons') yq_pred_plt = gp.classifier.classify(yq_prob_plt, y_unique) predictor_meas = gp.classifier.query(learned_classifier, Xq_meas) exp_meas = gp.classifier.expectance(learned_classifier, predictor_meas) cov_meas = gp.classifier.covariance(learned_classifier, predictor_meas) logging.info('Objective Measure: Computing Linearised Joint Entropy') start_time = time.clock() entropy_linearised_meas = gp.classifier.linearised_entropy( exp_meas, cov_meas, learned_classifier) logging.info('Computation took %.4f seconds' % (time.clock() - start_time)) logging.info('Linearised Joint Entropy: %.4f' % entropy_linearised_meas) # logging.info('Objective Measure: Computing Monte Carlo Joint Entropy...') # start_time = time.clock() # entropy_monte_carlo_meas = gp.classifier.monte_carlo_joint_entropy(exp_meas, cov_meas, learned_classifier, n_draws = n_draws_est) # logging.info('Computation took %.4f seconds' % (time.clock() - start_time)) # logging.info('Monte Carlo Joint Entropy: %.4f' % entropy_monte_carlo_meas) entropy_linearised_mean_meas = entropy_linearised_plt.mean() entropy_true_mean_meas = yq_entropy_plt.mean() mistake_ratio = (yq_truth_plt - yq_pred_plt).nonzero()[0].shape[0] / yq_truth_plt.shape[0] entropy_linearised_array[i_trials] = entropy_linearised_meas # entropy_monte_carlo_array[i_trials] = entropy_monte_carlo_meas entropy_linearised_mean_array[i_trials] = entropy_linearised_mean_meas entropy_true_mean_array[i_trials] = entropy_true_mean_meas entropy_opt_array[i_trials] = entropy_opt mistake_ratio_array[i_trials] = mistake_ratio # Find the bounds of the entropy predictions vmin1 = entropy_linearised_plt.min() vmax1 = entropy_linearised_plt.max() vmin2 = yq_entropy_plt.min() vmax2 = yq_entropy_plt.max() vmin3 = eq_sd_plt.min() vmax3 = eq_sd_plt.max() """ Linearised Entropy Map """ # Prepare Figure 1 plt.figure(fig1.number) plt.clf() plt.title('Linearised entropy [horizon = %.2f, FLE = %.2f, ALE = %.2f, ACE = %.2f, TPE = %.2f]' % (horizon, entropy_linearised_meas, entropy_linearised_mean_meas, entropy_true_mean_meas, entropy_opt)) plt.xlabel('x1') plt.ylabel('x2') plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) # Plot linearised entropy gp.classifier.utils.visualise_map(entropy_linearised_plt, test_ranges, cmap = cm.coolwarm, vmin = vmin1, vmax = vmax1) plt.colorbar() # Plot training set on top plt.scatter(x1, x2, c = y, s = 40, marker = 'x', cmap = mycmap) # Plot the path on top plt.scatter(xq1_nows, xq2_nows, c = yq_nows, s = 60, facecolors = 'none', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_nows, xq2_nows, c = 'w') plt.scatter(xq_now[:, 0], xq_now[:, 1], c = yq_now, s = 120, vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) # Plot the proposed path plt.scatter(xq1_proposed, xq2_proposed, c = yq_proposed, s = 60, marker = 'D', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_proposed, xq2_proposed, c = 'w') # Plot the horizon gp.classifier.utils.plot_circle(xq_now[-1], horizon, c = 'k', marker = '.') # Save the plot plt.gca().set_aspect('equal', adjustable = 'box') plt.savefig('%sentropy_linearised_step%d.png' % (full_directory, i_trials + 1)) """ Equivalent Standard Deviation Map """ # Prepare Figure 2 plt.figure(fig2.number) plt.clf() plt.title('Equivalent SD [horizon = %.2f, FLE = %.2f, ALE = %.2f, ACE = %.2f, TPE = %.2f]' % (horizon, entropy_linearised_meas, entropy_linearised_mean_meas, entropy_true_mean_meas, entropy_opt)) plt.xlabel('x1') plt.ylabel('x2') plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) # Plot linearised entropy gp.classifier.utils.visualise_map(eq_sd_plt, test_ranges, cmap = cm.coolwarm, vmin = vmin3, vmax = vmax3) plt.colorbar() # Plot training set on top plt.scatter(x1, x2, c = y, s = 40, marker = 'x', cmap = mycmap) # Plot the path on top plt.scatter(xq1_nows, xq2_nows, c = yq_nows, s = 60, facecolors = 'none', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_nows, xq2_nows, c = 'w') plt.scatter(xq_now[:, 0], xq_now[:, 1], c = yq_now, s = 120, vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) # Plot the proposed path plt.scatter(xq1_proposed, xq2_proposed, c = yq_proposed, s = 60, marker = 'D', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_proposed, xq2_proposed, c = 'w') # Plot the horizon gp.classifier.utils.plot_circle(xq_now[-1], horizon, c = 'k', marker = '.') # Save the plot plt.gca().set_aspect('equal', adjustable = 'box') plt.savefig('%seq_sd_step%d.png' % (full_directory, i_trials + 1)) """ True Entropy Map """ # Prepare Figure 3 plt.figure(fig3.number) plt.clf() plt.title('True entropy [horizon = %.2f, FLE = %.2f, ALE = %.2f, ACE = %.2f, TPE = %.2f]' % (horizon, entropy_linearised_meas, entropy_linearised_mean_meas, entropy_true_mean_meas, entropy_opt)) plt.xlabel('x1') plt.ylabel('x2') plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) # Plot true entropy gp.classifier.utils.visualise_map(yq_entropy_plt, test_ranges, cmap = cm.coolwarm, vmin = vmin2, vmax = vmax2) plt.colorbar() # Plot training set on top plt.scatter(x1, x2, c = y, s = 40, marker = 'x', cmap = mycmap) # Plot the path on top plt.scatter(xq1_nows, xq2_nows, c = yq_nows, s = 60, facecolors = 'none', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_nows, xq2_nows, c = 'w') plt.scatter(xq_now[:, 0], xq_now[:, 1], c = yq_now, s = 120, vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) # Plot the proposed path plt.scatter(xq1_proposed, xq2_proposed, c = yq_proposed, s = 60, marker = 'D', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_proposed, xq2_proposed, c = 'w') # Plot the horizon gp.classifier.utils.plot_circle(xq_now[-1], horizon, c = 'k', marker = '.') # Save the plot plt.gca().set_aspect('equal', adjustable = 'box') plt.savefig('%sentropy_true_step%d.png' % (full_directory, i_trials + 1)) """ Class Prediction Map """ # Prepare Figure 4 plt.figure(fig4.number) plt.clf() plt.title('Class predictions [Miss Ratio: %.3f %s]' % (100 * mistake_ratio, '%')) plt.xlabel('x1') plt.ylabel('x2') plt.xlim((test_range_min, test_range_max)) plt.ylim((test_range_min, test_range_max)) # Plot class predictions gp.classifier.utils.visualise_map(yq_pred_plt, test_ranges, boundaries = True, cmap = mycmap2, vmin = y_unique[0], vmax = y_unique[-1]) cbar = plt.colorbar() cbar.set_ticks(y_unique) cbar.set_ticklabels(y_unique) # Plot training set on top plt.scatter(x1, x2, c = y, s = 40, marker = 'v', cmap = mycmap) # Plot the path on top plt.scatter(xq1_nows, xq2_nows, c = yq_nows, s = 60, marker = 'o', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_nows, xq2_nows, c = 'w') plt.scatter(xq_now[:, 0], xq_now[:, 1], c = yq_now, s = 120, vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) # Plot the proposed path plt.scatter(xq1_proposed, xq2_proposed, c = yq_proposed, s = 60, marker = 'D', vmin = y_unique[0], vmax = y_unique[-1], cmap = mycmap) plt.plot(xq1_proposed, xq2_proposed, c = 'w') # Plot the horizon gp.classifier.utils.plot_circle(xq_now[-1], horizon, c = 'k', marker = '.') # Save the plot plt.gca().set_aspect('equal', adjustable = 'box') plt.savefig('%sclass_prediction_step%d.png' % (full_directory, i_trials + 1)) # Prepare Figure 5 plt.figure(fig5.number) plt.clf() plt.subplot(5, 1, 1) plt.plot(np.arange(i_trials + 1), entropy_linearised_array[:(i_trials + 1)]) plt.title('Field Linearised Entropy') plt.ylabel('Field Linearised Entropy') # plt.subplot(6, 1, 2) # plt.plot(np.arange(i_trials + 1), entropy_monte_carlo_array[:(i_trials + 1)]) # plt.title('Field True Entropy through Monte Carlo') # plt.ylabel('Field True Entropy') plt.subplot(5, 1, 2) plt.plot(np.arange(i_trials + 1), entropy_linearised_mean_array[:(i_trials + 1)]) plt.title('Average Linearised Entropy') plt.ylabel('Average Linearised Entropy') plt.subplot(5, 1, 3) plt.plot(np.arange(i_trials + 1), entropy_true_mean_array[:(i_trials + 1)]) plt.title('Average True Entropy') plt.ylabel('Average True Entropy') plt.subplot(5, 1, 4) plt.plot(np.arange(i_trials + 1), entropy_opt_array[:(i_trials + 1)]) plt.title('Joint entropy of path chosen each iteration') plt.ylabel('Joint Entropy') plt.subplot(5, 1, 5) plt.gca().get_xaxis().get_major_formatter().set_useOffset(False) plt.plot(np.arange(i_trials + 1), 100 * mistake_ratio_array[:(i_trials + 1)]) plt.title('Prediction Miss Ratio') plt.ylabel('Prediction Miss Ratio (%)') plt.xlabel('Steps') # Save the plot plt.savefig('%sentropy_history%d.png' % (full_directory, i_trials + 1)) logging.info('Plotted and Saved Iteration') # Move on to the next step i_trials += 1 # Save the learned classifier if i_trials % 50 == 0: np.savez('%slearned_classifier_trial%d.npz' % (full_directory, i_trials), learned_classifier = learned_classifier) np.savez('%sentropy_linearised_array_trial%d.npz' % (full_directory, i_trials), entropy_linearised_array = entropy_linearised_array) np.savez('%sentropy_linearised_mean_array_trial%d.npz' % (full_directory, i_trials), entropy_linearised_mean_array = entropy_linearised_mean_array) np.savez('%sentropy_true_mean_array_trial%d.npz' % (full_directory, i_trials), entropy_true_mean_array = entropy_true_mean_array) np.savez('%sentropy_opt_array_trial%d.npz' % (full_directory, i_trials), entropy_opt_array = entropy_opt_array) np.savez('%smistake_ratio_array_trial%d.npz' % (full_directory, i_trials), mistake_ratio_array = mistake_ratio_array) # When finished, save the learned classifier np.savez('%slearned_classifier_final.npz' % full_directory, learned_classifier = learned_classifier) # Show everything! plt.show()
def main(fasta, output_dir, bacterial_query, email, viral_db, keep_tmp=False, threads=20, evalue=0.001, outfmt=5): verbose_logging(**locals()) fasta_name = op.basename(fasta) sample = get_sample(fasta_name) try: tmpdir = tf.mkdtemp("_tmp", "%s_" % sample, tf.tempdir) # rename the fa headers while writing to temp working dir tmpfasta = preprocess_fasta(fasta, tmpdir) blastp_xml = op.join(tmpdir, "%s.blastp.xml" % sample) blast_db = op.join(tmpdir, "%s.blastdb" % sample) tmp_viral_db = op.join(tmpdir, op.basename(viral_db)) copytree(viral_db, tmp_viral_db) tmpquery = op.join(tmpdir, op.basename(bacterial_query)) shutil.copyfile(bacterial_query, tmpquery) # prodigal p_proteins, p_genes, p_genbank, p_score = prodigal(tmpfasta, sample, tmpdir) # tRNAscan-SE trna_output = os.path.join(tmpdir, "%s.trnascan" % sample) trna_output = trnascan(tmpfasta, o=trna_output, B=None) # gc content and skew gc_output = gc_content(tmpfasta) # tetramerPCA tetramerPCA_output = tetramerPCA(tmpfasta, window=1600, step=200) # blastp blastp(query=p_proteins, db='nr', out=blastp_xml, evalue=evalue, num_alignments=10, num_threads=threads, outfmt=outfmt) # blast_db makeblastdb(**{'in':p_proteins, 'parse_seqids':None, 'dbtype':'prot', 'out':blast_db}) # blastx; viral viral_xmls, viral_queries = batch_blastx(blast_db, tmp_viral_db, tmpdir, evalue, threads, outfmt) viral_bams = parmap.starmap(xml_to_bam, zip(viral_xmls, viral_queries), p_proteins, processes=12) viral_coverages = parmap.map(per_contig_coverage, viral_bams, p_proteins, processes=12) viral_pileups = parmap.map(samtools_mpileup, viral_bams, processes=12) # blastx; bacterial bacterial_xml = os.path.join(tmpdir, "%s.bacterial_fraction.xml" % sample) blastx(query=tmpquery, db=blast_db, out=bacterial_xml, evalue=evalue, max_target_seqs=1, num_threads=threads, outfmt=5, query_gencode=11) bacterial_bam = xml_to_bam(bacterial_xml, bacterial_query, p_proteins) bacterial_coverage = per_contig_coverage(bacterial_bam, p_proteins) bacterial_pileup = samtools_mpileup(bacterial_bam) except Exception as e: print e.__doc__ print e.message raise finally: # always remove viral fastas; don't copy back from ram if op.exists(tmp_viral_db): shutil.rmtree(tmp_viral_db) # gzip all of the files in the temp dir gzip_all(tmpdir, ignore=['pdf', 'bam', 'bai']) # copy over the files runcmd("cp -R -v {src}/* {dst}".format(src=tmpdir, dst=output_dir)) if not keep_tmp: # delete the temp working directory shutil.rmtree(tmpdir) if email: send_email(to=email, subject=op.basename(__file__), message="finished processing %s; results were copied to %s" % (fasta, output_dir)) logging.info("Complete.")
arg.get("rows"), arg.get("columns"), arg.get("data_type") if op != "sm" else "float", arg["values"]), file=tf) except Exception as e: print("Failed to generate", test_file, "with error", traceback.format_exc()) try: os.unlink(test_file) except: raise if __name__ == "__main__": op_to_args = defaultdict(list) for dirpath, dirnames, filenames in os.walk("."): if dirpath == ".": continue operation = os.path.basename(dirpath) for i, output_filename in enumerate(filenames): if ".out" not in output_filename: continue output_filepath = os.path.join(dirpath, output_filename) args = get_args(output_filepath) op_to_args[operation].append(args) file_params = chain.from_iterable( [(op, arg) for arg in args] for op, args in op_to_args.items() ) list(parmap.starmap(gen_test_file, file_params))
def align_frames(mouse, dir, freq): lofiles, lofilenames = get_file_list(dir+"Videos/", mouse) print lofilenames lop = get_distance_var(lofiles) all_frames = np.asarray(get_video_frames(lofiles), dtype=np.uint8) print "Alligning all video frames..." all_frames = parmap.starmap(shift_frames, zip(all_frames, lop)) ## for i in range(len(lop)): ## for frame in all_frames[i]: ## frame = image_registration.fft_tools.shift2d(frame, lop[i].dx, lop[i].dy) print np.shape(all_frames) count = 0 new_all_frames = parmap.map(process_frames, all_frames, freq, mouse, dir) ''' for frames in all_frames: print np.shape(frames) save_to_file("Green/"+lofilenames[count][:-4]+"_aligned.raw", frames, np.float32) print "Calculating mean..." avg_pre_filt = calculate_avg(frames) print "Temporal filter..." frames = cheby_filter(frames) frames += avg_pre_filt save_to_file("Green/Cheby/"+lofilenames[count][:-4]+"_BPFilter_0.1-1Hz.raw", frames, np.float32) print "Calculating DF/F0..." frames = calculate_df_f0(frames) save_to_file("Green/DFF/"+lofilenames[count][:-4]+"_DFF.raw", frames, np.float32) print "Applying MASKED GSR..." #frames = gsr(frames) frames = masked_gsr(frames, save_dir+"202_mask.raw") save_to_file("Green/GSR/"+lofilenames[count][:-4]+"_GSR.raw", frames, np.float32) print "Getting SD map..." sd = standard_deviation(frames) save_to_file("Green/SD_maps/"+lofilenames[count][:-4]+"_SD.raw", frames, np.float32) new_all_frames.append(frames) count += 1 ''' print "Creating array..." new_all_frames = np.asarray(new_all_frames, dtype=np.float32) all_frames = np.asarray(all_frames, dtype=np.float32) print "Joining Files..." new_all_frames = np.reshape(new_all_frames, (new_all_frames.shape[0]*new_all_frames.shape[1], new_all_frames.shape[2], new_all_frames.shape[3])) all_frames = np.reshape(all_frames, (all_frames.shape[0]*all_frames.shape[1], all_frames.shape[2], all_frames.shape[3])) print "Shapes: " print np.shape(all_frames) print np.shape(new_all_frames) where_are_NaNs = np.isnan(new_all_frames) new_all_frames[where_are_NaNs] = 0 save_to_file("FULL_conc.raw", new_all_frames, np.float32) save_to_file("conc_RAW.raw", all_frames, np.float32) sd = standard_deviation(new_all_frames) save_to_file("FULL_SD.raw", sd, np.float32) print "Displaying correlation map..." mapper = CorrelationMapDisplayer(new_all_frames) mapper.display('spectral', -0.3, 1.0)