def serial_gradcam(model,smpl_census_imnames,indices,ind_batched_list,class_): # CAM full_grad_cams, full_grad_cam_bgs = [], [] # Guided BackPropation print("Guided BackPropagation") guided_bprop = GuidedBackprop(model,output_index=class_); print("Batching") for i in tqdmn(range(len(ind_batched_list)-1)): batch_smpl_census_imnames = smpl_census_imnames[ind_batched_list[i]:ind_batched_list[i+1]] batch_sample_census_cell_imgs = [load_prepared_img(im) for im in batch_smpl_census_imnames] batch_classes = [class_ for j in range(ind_batched_list[i],ind_batched_list[i+1])] grad_cams, grad_cam_rzs = grad_cam_batch(model, np.stack(batch_sample_census_cell_imgs), batch_classes, conv_name) masks = [guided_bprop.get_mask(img) for img in tqdmn(batch_sample_census_cell_imgs)] images = np.stack([np.sum(np.abs(mask), axis=2) for mask in tqdmn(masks)]) # Combination gradcam_bgs = np.multiply(grad_cam_rzs,images) upper_percs = np.percentile(gradcam_bgs,99,(1,2)) gradcam_bgs = np.minimum(gradcam_bgs,np.stack([k *np.ones((W,H)) for k in upper_percs])) full_grad_cams.append(grad_cams) full_grad_cam_bgs.append(gradcam_bgs) # full_grad_cams = np.vstack(full_grad_cams) if len(full_grad_cams) > 1 else full_grad_cams
def matrix_factorization(X, P, Q, K, steps, alpha, beta): nonnull = np.where(~np.isnan(X)) Q = Q.T for step in tqdmn(range(steps)): for idx in tqdmn(range(nonnull[0].size), leave=False): i, j = nonnull[0][idx], nonnull[1][idx] #calculate the error of the element eij = X[i][j] - np.dot(P[i, :], Q[:, j]) #second norm of P and Q for regularilization sum_of_norms = LA.norm(P) + LA.norm(Q) #print sum_of_norms eij += ((beta / 2.0) * sum_of_norms) #compute the gradient from the error P[i, :] += alpha * (2 * eij * Q[:, j] - (beta * P[i, :])) Q[:, j] += alpha * (2 * eij * P[i, :] - (beta * Q[:, j])) V = P.dot(Q) error = np.sum(np.power(X[nonnull] - V[nonnull], 2)) if error < 0.001: break return P, Q.T
def smiles2mol( df: pd.DataFrame, smiles_column_name, mols_column_name, drop_nulls: bool = True, progressbar: Union[None, str] = None, ) -> pd.DataFrame: """ Convert a column of SMILES strings into RDKit Mol objects. Automatically drops invalid SMILES, as determined by RDKIT. Method chaining usage: .. code-block:: python df = ( pd.DataFrame(...) .smiles2mol(smiles_column_name='smiles', mols_column_name='mols') ) A progressbar can be optionally used. - Pass in "notebook" to show a tqdm notebook progressbar. (ipywidgets must be enabled with your Jupyter installation.) - Pass in "terminal" to show a tqdm progressbar. Better suited for use with scripts. - "none" is the default value - progress bar will be not be shown. :param df: pandas DataFrame. :param smiles_column_name: Name of column that holds the SMILES strings. :param mols_column_name: Name to be given to the new mols column. :param drop_nulls: Whether to drop rows whose mols failed to be constructed. :param progressbar: Whether to show a progressbar or not. """ valid_progress = ["notebook", "terminal", None] if progressbar not in valid_progress: raise ValueError(f"progressbar kwarg must be one of {valid_progress}") if progressbar is None: df[mols_column_name] = df[smiles_column_name].apply( lambda x: Chem.MolFromSmiles(x)) else: if progressbar == "notebook": tqdmn().pandas(desc="mols") elif progressbar == "terminal": tqdm.pandas(desc="mols") df[mols_column_name] = df[smiles_column_name].progress_apply( lambda x: Chem.MolFromSmiles(x)) if drop_nulls: df.dropna(subset=[mols_column_name], inplace=True) df.reset_index(inplace=True, drop=True) return df
def parallel_make_dataset(im_data, CPU_USE, null_thresh=1): # Extract all images in chunks distributed according to CPU_USE if CPU_USE > 1: pre_full = Parallel(n_jobs=CPU_USE)( delayed(parallel_folder_extraction)(im_arr, null_thresh=null_thresh) for im_arr in tqdmn(chunks(im_data, CPU_USE))) else: pre_full = [ parallel_folder_extraction(im_arr, null_thresh=null_thresh) for im_arr in tqdmn(chunks(im_data, CPU_USE)) ] return [data for pre in pre_full for data in pre]
def smiles2mol( df: pd.DataFrame, smiles_col: str, mols_col: str, drop_nulls: bool = True, progressbar: Union[None, str] = None, ): """ Convert a column of SMILES strings into RDKit Mol objects. Automatically drops invalid SMILES, as determined by RDKIT. Method chaining usage: .. code-block:: python df = ( pd.DataFrame(...) .smiles2mol(smiles_col='smiles', mols_col='mols') ) :param df: pandas DataFrame. :param smiles_col: Name of column that holds the SMILES strings. :param mols_col: Name to be given to the new mols column. :param drop_nulls: Whether to drop rows whose mols failed to be constructed. """ valid_progress = ["notebook", "terminal", None] if progressbar not in valid_progress: raise ValueError(f"progressbar kwarg must be one of {valid_progress}") if progressbar is None: df[mols_col] = df[smiles_col].apply(lambda x: Chem.MolFromSmiles(x)) else: if progressbar == "notebook": tqdmn().pandas(desc="mols") elif progressbar == "terminal": tqdm.pandas(desc="mols") df[mols_col] = df[smiles_col].progress_apply( lambda x: Chem.MolFromSmiles(x)) if drop_nulls: df.dropna(subset=[mols_col], inplace=True) df.reset_index(inplace=True, drop=True) return df
def serialize_batch(model,ua_data,gdf_full_im_df,indices,ind_list, class_poor,class_rich,ideal_workload): print("Overlaying") test_cores = [gpd.overlay(ua_data.iloc[indices[ind]], gdf_full_im_df.iloc[ind:(ind+1)], how='intersection') for ind in tqdmn(ind_list)] print("Bounding") ts = [from_bounds(gdf_full_im_df[ind:(ind+1)].bounds.minx.values[0]+0, gdf_full_im_df[ind:(ind+1)].bounds.miny.values[0]+0, gdf_full_im_df[ind:(ind+1)].bounds.maxx.values[0]+0, gdf_full_im_df[ind:(ind+1)].bounds.maxy.values[0]+0, W, H) for ind in tqdmn(ind_list)] print("Generating Images") sample_datas = [gdf_full_im_df.iloc[ind] for ind in tqdmn(ind_list) ] smpl_census_imnames = [IMG_OUTPUT_DIR + val.path2im for val in tqdmn(sample_datas)] # data = [] for batch_idx in range(0,len(ind_list),ideal_workload): batch_ind_list = ind_list[batch_idx:(batch_idx+ideal_workload)] batch_smpl_census_imnames = smpl_census_imnames[batch_idx:(batch_idx+ideal_workload)] batch_indices = indices[batch_idx:(batch_idx+ideal_workload)] ind_batched_list = list(np.arange(0,len(batch_ind_list),MAX_BS)) if ind_batched_list[-1] != len(batch_ind_list): ind_batched_list.append(len(batch_ind_list)) # print("GradCaming POOR") gcams_poor, gbgs_poor = serial_gradcam(model,batch_smpl_census_imnames, batch_indices,ind_batched_list, class_poor) print("GradCaming RICH") gcams_rich, gbgs_rich = serial_gradcam(model,batch_smpl_census_imnames, batch_indices,ind_batched_list, class_rich) print("Computing raster statistics") for j,ind in tqdmn(enumerate(batch_ind_list)): val_idINSPIRE = gdf_full_im_df.iloc[ind:(ind+1)].idINSPIRE.values[0] data.append( compute_statistics(gbgs_poor[j],gbgs_rich[j], gcams_poor[j],gcams_rich[j], ts[ind],test_cores[ind], val_idINSPIRE,class_poor,class_rich)) return data
ts[ind],test_cores[ind], val_idINSPIRE,class_poor,class_rich)) return data if __name__ == '__main__': print("Generating Full DataSet") full_im_df = generate_full_idINSPIRE(UA_DIR, AERIAL_DIR, CENSUS_DIR, IMG_OUTPUT_DIR) city_assoc = pd.read_csv(IMG_OUTPUT_DIR + "city_assoc.csv") full_im_df_ua = pd.merge(full_im_df,city_assoc,on="idINSPIRE"); full_im_df_ua = full_im_df_ua[full_im_df_ua.FUA_NAME == city] # gdf_full_im_df = full_im_df_ua.to_crs({'init': 'epsg:3035'}) # print("Generating UA DataSet") ua_data = gpd.GeoDataFrame(pd.concat([gpd.read_file(d) for d in tqdmn(glob.glob(UA_DIR+"**/Shapefiles/*UA2012.shp"))])) ua_data.crs = {'init': 'epsg:3035'} # print("Joining UA + Full") indices = sjoin(ua_data,gdf_full_im_df) # print("Loading Model") #indices to distribute among cores folds_data = pd.concat( [pd.read_csv(fold_file,header=0,sep=",") for fold_file in glob.glob(MODEL_OUTPUT_DIR+"/*last_best_models.csv")], axis=0).reset_index(drop=True) best_model_city = folds_data.ix[folds_data["Validation loss"].idxmin()]["Model file"] print("Loading Weights {}".format(best_model_city)) # eff_model = load_model(MODEL_OUTPUT_DIR + best_model_city,