def multi_rollout(envs, pause=.2): #print('###############################################################') states, rewards, log_probs, entropies = [], [], [], [] # play an episode def helper(env): state = env.reset() while True: # Don't infinite loop while learning # select an action action, log_prob, entropy = select_action(to_array(state)) #print(action) #if action_dict.get(tuple(action), 0 ) ==0: # action_dict [tuple(action)] = 1 #else: # action_dict [tuple(action)] += 1 states.append(list(to_array(state))) log_probs.append(log_prob) entropies.append(entropy) # take the action and move to next state state, reward, done = env.step(action) # print(action,'\n', reward) # print('warning', done) rewards.append(reward) if done: break for myenv in envs: sol = helper(myenv) set_loky_pickler('pickle') parallel(delayed(helper)(myenv) for myenv in envs) return states, rewards, log_probs, entropies
def test_bias_batch_recommend_pickle5(ml_folds: MLFolds): algo = Bias(damping=5) algo = TopN(algo) from joblib.externals import loky old = loky.backend.reduction.get_loky_pickler_name() try: loky.set_loky_pickler('pickle5') recs = ml_folds.eval_all(algo, n_jobs=2) finally: loky.set_loky_pickler(old) ml_folds.check_positive_ndcg(recs)
import numpy as np import pandas as pd import psutil from scipy.stats import norm as normal from tqdm import tqdm # PKG from deepsig.conversion import ( ArrayLike, ScoreCollection, score_pair_conversion, ) from deepsig.utils import _progress_iter, _get_num_models # MISC set_loky_pickler("dill") # Avoid weird joblib error with multi_aso @score_pair_conversion def aso( scores_a: ArrayLike, scores_b: ArrayLike, confidence_level: float = 0.95, num_comparisons: int = 1, num_samples: int = 1000, num_bootstrap_iterations: int = 1000, dt: float = 0.005, num_jobs: int = 1, show_progress: bool = True, seed: Optional[int] = None, _progress_bar: Optional[tqdm] = None,
def cluster_datasets_rsa(args) -> None: # Get executor if args.processor == "joblib": set_loky_pickler("pickle") executor = ExecutorJoblib(int(args.n_procs)) elif args.processor == "map": executor = ExecutorMap() executor_map = ExecutorMap() print("defined executor") # Get Dataset print("Loading datasets") dataset = MultiCrystalDataset.mcd_from_pandda_input_dir( p.Path(args.data_dirs)) # dataset.datasets = {key: value for key, value in list(dataset.datasets.items())[:150]} print("got datasets") print("\tBefore tuncation on res there are {} datasets".format( len(dataset.datasets))) dataset = truncate_dataset_on_resolution(dataset) print("\tAfter truncationt on res here are {} datasets".format( len(dataset.datasets))) # Select lowest res dataset min_res = get_min_res(dataset) # Load xmaps print("Getting exmpas") xmaps = mapdict( Loader(min_res=min_res), dataset.datasets, executor, ) # Align models to ref reference_dtag = list(dataset.datasets.keys())[0] reference_model = dataset.datasets[reference_dtag].structure.structure # Get model alignmetns aligners = {} for dtag, d in dataset.datasets.items(): # print("\tAligning {} to {}".format(dtag, reference_dtag)) aligners[dtag] = StructureAligner( reference_model, d.structure.structure, ) alignments = mapdict( wrap_call, aligners, executor_map, ) # Sample Xmaps uniformly # grid_params = [50,50,50] reference_cell = xmaps[reference_dtag].xmap.cell grid_params = ( int(reference_cell.a), int(reference_cell.b), int(reference_cell.c), ) print("Grid params are: {}".format(grid_params)) samplers = {} for dtag, xmap in xmaps.items(): rtop = alignments[dtag] mobile_to_ref_translation = rtop[1] mobile_to_ref_rotation = rtop[0] # print("\tDataset {} translation is: {}".format(dtag, mobile_to_ref_translation)) # print("\tDataset {} rotation is: {}".format(dtag, mobile_to_ref_rotation.flatten())) samplers[dtag] = Sampler( xmap, grid_params, mobile_to_ref_translation, mobile_to_ref_rotation, ) nxmaps = mapdict( wrap_call, samplers, executor, ) # Convert nxmaps to np print("Converting xmaps to np") xmaps_np = mapdict( nxmap_to_numpy, nxmaps, executor, ) # Rescale rescaler = Rescaler() xmaps_np = mapdict( rescaler, xmaps_np, executor, ) postcondtion_scaling(xmaps_np) # Align xmaps print("aligning xmaps") static_image = xmaps_np[list(xmaps_np.keys())[0]] aligners = {} for dtag, xmap_np in xmaps_np.items(): aligners[dtag] = ImageAligner( static_image, xmap_np, ) xmaps_aligned = mapdict( wrap_call, aligners, executor, ) # Embed the xmaps into a latent space print("Dimension reducing") xmap_embedding = embed_xmaps(xmaps_np.values()) # Cluster the embedding print("clustering") clustered_xmaps = cluster_embedding(xmap_embedding) # Make dataframe with cluster and position print("getting cluster dataframe") cluster_df = get_cluster_df( xmaps_np, xmap_embedding, clustered_xmaps, ) cluster_df_summary(cluster_df) # Get clusters print("associating xmaps with clusters") map_clusters = get_map_clusters( cluster_df, xmaps_aligned, ) # Make mean maps executor_seriel = ExecutorMap() mean_maps_np = mapdict( lambda x: make_mean_map(x), map_clusters, executor_seriel, ) # Output the mean maps print("Outputting mean maps") template_map = xmaps[list(xmaps.keys())[0]] # print(template_map) # cell = dataset.datasets[list(dataset.datasets.keys())[0]].reflections.hkl_info.cell cell = clipper_python.Cell( clipper_python.Cell_descr( grid_params[0], grid_params[1], grid_params[2], np.pi / 2, np.pi / 2, np.pi / 2, )) for cluster_num, mean_map_np in mean_maps_np.items(): output_mean_nxmap( mean_map_np, cell, p.Path(args.out_dir) / "{}.ccp4".format(cluster_num), grid_params, ) # output_mean_nxmap(mean_map_np_list, # cell, # p.Path(args.out_dir) / "{}.ccp4".format(cluster_num), # ) # Ouptut the csv print("Outputting csv") output_labeled_embedding_csv( cluster_df, str(p.Path(args.out_dir) / "labelled_embedding.csv"), ) # Output the graph # output_cluster_graph(cluster_df, str(out_dir / "output_graph.png")) return cluster_df
def cluster_datasets(args) -> None: # Get executor if args.processor == "joblib": set_loky_pickler("pickle") executor = ExecutorJoblib(20) elif args.processor == "map": executor = ExecutorMap() executor_map = ExecutorMap() print("defined executor") # Get Dataset print("Loading datasets") dataset = MultiCrystalDataset.mcd_from_pandda_input_dir( p.Path(args.data_dirs)) # dataset.datasets = {key: value for key, value in list(dataset.datasets.items())[:150]} print("got datasets") print("\tBefore tuncation on res there are {} datasets".format( len(dataset.datasets))) dataset = truncate_dataset_on_resolution(dataset) print("\tAfter truncationt on res here are {} datasets".format( len(dataset.datasets))) # # # unit_cell_clustering_labels = cluster_unit_cell_params(dataset) # print("\tBefore tuncation there are {} datasets".format(len(dataset.datasets))) # dataset = truncate_dataset_on_clustering(dataset, unit_cell_clustering_labels) # print("\tAfter truncationt here are {} datasets".format(len(dataset.datasets))) # Select lowest res dataset min_res = get_min_res(dataset) # dataset = truncate_on_res(dataset) # d = dataset.datasets[list(dataset.datasets.keys())[0]] # xmap = MCDXMap.xmap_from_dataset(d, # resolution=Resolution(min_res), # ) # print(xmap) # xmap_np = xmap_to_numpy_crystalographic_axis(xmap) # print(np.std(xmap_np)) # # with open("test.pkl", "wb") as f: # pickle.dump(xmap, f) # # with open("test.pkl", "rb") as f: # xmap_reloaded = pickle.load(f) # # # xmap_reloaded_np = xmap_to_numpy_crystalographic_axis(xmap_reloaded) # print(np.std(xmap_reloaded_np)) # # exit() # Load xmaps print("Getting exmpas") # xmaps = mapdict(lambda d: MCDXMap.xmap_from_dataset(d, # resolution=Resolution(min_res), # ), # dataset.datasets, # executor, # ) reference_dataset = dataset.datasets[list(dataset.datasets.keys())[0]] # print(dir(reference_dataset)) reference_grid = clipper_python.Grid_sampling( reference_dataset.reflections.hkl_info.spacegroup, reference_dataset.reflections.hkl_info.cell, Resolution(min_res), ) grid_params = ( reference_grid.nu, reference_grid.nv, reference_grid.nw, ) xmaps = mapdict( Loader( min_res=None, grid_params=grid_params, ), dataset.datasets, executor, ) # print("Xmap std is: {}".format(np.std(xmaps[list(xmaps.keys())[0]].xmap.export_numpy()))) # Convert xmaps to np print("Converting xmaps to np") # xmaps_np = mapdict(xmap_to_numpy_crystalographic_axis, # xmaps, # executor, # ) # postcondition_alignment(xmaps_np) # xmaps_np = mapdict(interpolate_uniform_grid, # xmaps, # executor, # ) # print("\tXmap representative shape is: {}".format(xmaps_np[list(xmaps_np.keys())[0]].shape)) static_structure = dataset.datasets[list( dataset.datasets.keys())[0]].structure.structure # print("Got static structure") aligners = {} for dtag, xmap in xmaps.items(): aligners[dtag] = StructureAligner( static_structure, dataset.datasets[dtag].structure.structure, xmap, ) xmaps_aligned = mapdict( wrap_call, aligners, executor, ) # Rescale rescaler = Rescaler() xmaps_np = mapdict( rescaler, xmaps_aligned, executor, ) postcondtion_scaling(xmaps_np) # Align xmaps print("aligning xmaps") # static_map = xmaps_np[list(xmaps_np.keys())[0]] # aligner = Aligner(static_map) # xmaps_aligned = mapdict(aligner, # xmaps_np, # executor, # ) xmaps_np = filter_on_grid(xmaps_np) # Embed the xmaps into a latent space print("Dimension reducing") xmap_embedding = embed_xmaps(xmaps_np.values()) # Cluster the embedding print("clustering") clustered_xmaps = cluster_embedding(xmap_embedding) # Make dataframe with cluster and position print("getting cluster dataframe") cluster_df = get_cluster_df( xmaps_np, xmap_embedding, clustered_xmaps, ) cluster_df_summary(cluster_df) # Get clusters print("associating xmaps with clusters") map_clusters = get_map_clusters( cluster_df, xmaps_aligned, ) # print("Map clusters: {}".format(map_clusters)) # Make mean maps executor_seriel = ExecutorMap() mean_maps_np = mapdict( lambda x: make_mean_map(x), map_clusters, executor_seriel, ) # print("Mean maps mp: {}".format(mean_maps_np)) # Output the mean maps print("Outputting mean maps") template_map = xmaps[list(xmaps.keys())[0]] # print(template_map) # cell = dataset.datasets[list(dataset.datasets.keys())[0]].reflections.hkl_info.cell cell = clipper_python.Cell( clipper_python.Cell_descr(100, 100, 100, np.pi / 2, np.pi / 2, np.pi / 2)) for cluster_num, mean_map_np_list in mean_maps_np.items(): output_mean_map( template_map, mean_map_np_list, p.Path(args.out_dir) / "{}.ccp4".format(cluster_num), ) # output_mean_nxmap(mean_map_np_list, # cell, # p.Path(args.out_dir) / "{}.ccp4".format(cluster_num), # ) # Ouptut the csv print("Outputting csv") output_labeled_embedding_csv( cluster_df, str(p.Path(args.out_dir) / "labelled_embedding.csv"), ) # Output the graph # output_cluster_graph(cluster_df, str(out_dir / "output_graph.png")) return cluster_df
def cluster_datasets_tm(args) -> None: # Get executor if args.processor == "joblib": set_loky_pickler("pickle") executor = ExecutorJoblib(20) elif args.processor == "map": executor = ExecutorMap() executor_map = ExecutorMap() print("defined executor") # Get Dataset print("Loading datasets") dataset = MultiCrystalDataset.mcd_from_pandda_input_dir( p.Path(args.data_dirs)) print("got datasets") print("\tBefore tuncation on res there are {} datasets".format( len(dataset.datasets))) dataset = truncate_dataset_on_resolution(dataset) print("\tAfter truncationt on res here are {} datasets".format( len(dataset.datasets))) # Select lowest res dataset min_res = get_min_res(dataset) # # Align structures # aligners = # alignments = mapdict(wrap_call, # alginers, # dataset.datasets.items(), # ) # # # # Load xmaps # print("Getting exmpas") # xmaps = mapdict(Loader(min_res=None, # grid_params=grid_params, # ), # dataset.datasets, # executor, # ) # Sample to reference frame cell = clipper_python.Cell( clipper_python.Cell_descr(100, 100, 100, np.pi / 2, np.pi / 2, np.pi / 2)) spacegroup = clipper_python.Spacegroup(clipper_python.Spgr_descr("1")) # Get reference model reference_model = list(dataset.datasets.items())[0][1].structure.structure io = PDBIO() io.set_structure(reference_model) io.save('out_before.pdb') atoms_list = np.array( [atom.get_coord() for atom in reference_model.get_atoms()]) print(atoms_list) mean_coords = np.mean(atoms_list, axis=0) print(mean_coords) # rotation = rotmat(Vector(0,1,0), Vector(1, 0, 0)) rotation = np.eye(3) translation = np.array(mean_coords, 'f') for atom in reference_model.get_atoms(): atom.transform(rotation, -translation) io = PDBIO() io.set_structure(reference_model) io.save('out.pdb') # exit() # Get model alignmetns aligners = { dtag: lambda: align(ref_structure=reference_model, mobile_structure=d.structure.structure) for dtag, d in dataset.datasets.items() } print(aligners) alignments = mapdict( wrap_call, aligners, executor_map, ) # align structures def align_structures(alignment, structure, output_path): alignment.apply(structure) io = PDBIO() io.set_structure(structure) print("Saving to {}".format(output_path)) io.save(str(output_path)) return structure structure_aligners = { dtag: lambda: align_structures( alignments[dtag], d.structure.structure, p.Path(args.out_dir) / dtag, ) for dtag, d in dataset.datasets.items() } print(structure_aligners) print("aligning and outputting structures") aligned_structures = mapdict( wrap_call, structure_aligners, executor_map, ) # structure_aligners = {dtag: lambda: alignment.apply(dataset.dataset[dtag].structure.structure) # for dtag, alignment # in alignments.items() # } # aligned_structures = mapdict(wrap_call, # structure_aligners, # executor_map, # ) # # # ouput aligned structures # structure_outputters = {dtag: lambda: PDBIO().set_structure()} exit() # Convert xmaps to np print("Converting xmaps to np") static_structure = dataset.datasets[list( dataset.datasets.keys())[0]].structure.structure # print("Got static structure") aligners = {} for dtag, xmap in xmaps.items(): aligners[dtag] = StructureAligner( static_structure, dataset.datasets[dtag].structure.structure, xmap, ) xmaps_aligned = mapdict( wrap_call, aligners, executor, ) # Rescale rescaler = Rescaler() xmaps_np = mapdict( rescaler, xmaps_aligned, executor, ) postcondtion_scaling(xmaps_np) # Align xmaps print("aligning xmaps") xmaps_np = filter_on_grid(xmaps_np) # Embed the xmaps into a latent space print("Dimension reducing") xmap_embedding = embed_xmaps(xmaps_np.values()) # Cluster the embedding print("clustering") clustered_xmaps = cluster_embedding(xmap_embedding) # Make dataframe with cluster and position print("getting cluster dataframe") cluster_df = get_cluster_df( xmaps_np, xmap_embedding, clustered_xmaps, ) cluster_df_summary(cluster_df) # Get clusters print("associating xmaps with clusters") map_clusters = get_map_clusters( cluster_df, xmaps_aligned, ) # Make mean maps executor_seriel = ExecutorMap() mean_maps_np = mapdict( lambda x: make_mean_map(x), map_clusters, executor_seriel, ) # Output the mean maps print("Outputting mean maps") template_map = xmaps[list(xmaps.keys())[0]] # print(template_map) # cell = dataset.datasets[list(dataset.datasets.keys())[0]].reflections.hkl_info.cell cell = clipper_python.Cell( clipper_python.Cell_descr(100, 100, 100, np.pi / 2, np.pi / 2, np.pi / 2)) for cluster_num, mean_map_np_list in mean_maps_np.items(): output_mean_map( template_map, mean_map_np_list, p.Path(args.out_dir) / "{}.ccp4".format(cluster_num), ) # output_mean_nxmap(mean_map_np_list, # cell, # p.Path(args.out_dir) / "{}.ccp4".format(cluster_num), # ) # Ouptut the csv print("Outputting csv") output_labeled_embedding_csv( cluster_df, str(p.Path(args.out_dir) / "labelled_embedding.csv"), ) # Output the graph # output_cluster_graph(cluster_df, str(out_dir / "output_graph.png")) return cluster_df
def cluster_datasets_luigi(ccp4_map_paths, out_dir, processor, n_procs) -> pd.DataFrame: # Get executor if processor == "joblib": set_loky_pickler("pickle") executor = ExecutorJoblib(int(n_procs)) elif processor == "map": executor = ExecutorMap() # Load xmaps to np print("Getting exmpas") loaders = {} for dtag in ccp4_map_paths: loaders[dtag] = LoaderCCP4(ccp4_map_paths[dtag]["path"]) xmaps_np = mapdict( wrap_call, loaders, executor, ) # Truncate to same shape dims = np.vstack([xmap.shape for xmap in xmaps_np.values()]) print(dims) truncation_shape = np.min( dims, axis=0, ) print("The truncation shape is: {}".format(truncation_shape)) xmaps_np = { dtag: xmap_np[:truncation_shape[0], :truncation_shape[1], : truncation_shape[2], ] for dtag, xmap_np in xmaps_np.items() } # Embed the xmaps into a latent space print("Dimension reducing") xmap_embedding = embed_xmaps(xmaps_np.values()) # Cluster the embedding print("clustering") # clustered_xmaps = cluster_embedding(xmap_embedding) clustering = cluster_embedding(xmap_embedding) exemplars = clustering.exemplars_ clustered_xmaps = clustering.labels_ outlier_scores = clustering.outlier_scores_ print("Exemplars: {}".format(exemplars)) print("Outlier scores: {}".format(outlier_scores)) print("Labels: {}".format(clustered_xmaps)) dtags = np.array(list(xmaps_np.keys())) cluster_exemplars = {} for cluster_num in np.unique(clustered_xmaps): if cluster_num == -1: continue cluster_dtags = dtags[clustered_xmaps == cluster_num] cluster_outlier_scores = outlier_scores[clustered_xmaps == cluster_num] cluster_exemplars[cluster_num] = cluster_dtags[np.argmin( cluster_outlier_scores)] print("Outlier scores dict: {}".format({ dtag: outlier_score for dtag, outlier_score in zip( list(cluster_dtags), list(cluster_outlier_scores), ) })) print("Cluster exemplars: {}".format(cluster_exemplars)) # Make dataframe with cluster and position print("getting cluster dataframe") cluster_df = get_cluster_df( xmaps_np, xmap_embedding, clustered_xmaps, ) cluster_df_summary(cluster_df) # Get clusters print("associating xmaps with clusters") map_clusters = get_map_clusters( cluster_df, xmaps_np, ) # Make mean maps executor_seriel = ExecutorMap() mean_maps_np = mapdict( lambda x: make_mean_map(x), map_clusters, executor_seriel, ) # Output the mean maps print("Outputting mean maps") template_nxmap = load_ccp4_map(list(ccp4_map_paths.values())[0]["path"]) for cluster_num, mean_map_np in mean_maps_np.items(): dataset_clustering.xmap_utils.save_nxmap_from_template( template_nxmap, mean_map_np, p.Path(out_dir) / "{}.ccp4".format(cluster_num), ) # Ouptut the csv print("Outputting csv") output_labeled_embedding_csv( cluster_df, str(p.Path(out_dir) / "labelled_embedding.csv"), ) print("Cluster DF is: \n{}".format(cluster_df)) return cluster_df
import matplotlib.colors as mcolors from PIL import Image import multiprocessing as mp from multiprocessing import Process, freeze_support, Manager from time import sleep from collections import deque from multiprocessing.managers import BaseManager import contextlib from multiprocessing import Manager from multiprocessing import Pool import dill as Pickle from joblib import Parallel, delayed from joblib.externals.loky import set_loky_pickler set_loky_pickler('pickle') from multiprocessing import Pool, RLock # idtrajectory; # idterm; # idtrace_o; # idtrace_d; # latitude; # longitude; # timedate; # tripdistance_m; # triptime_s; # checkcode; # breaktime_s; # geom;
# has been developped to mitigate these issues. # # To have fast pickling with ``loky``, it is possible to rely on ``pickle`` to # serialize all communications between the main process and the workers with # the ``loky`` backend. This can be done by setting the environment variable # ``LOKY_PICKLER=pickle`` before the script is launched. Here we use an # internal programmatic switch ``loky.set_loky_pickler`` for demonstration # purposes but it has the same effect as setting ``LOKY_PICKLER``. Note that # this switch should not be used as it has some side effects with the workers. # # Now set the `loky_pickler` to use the pickle serialization from stdlib. Here, # we do not pass the desired function ``func_async`` as it is not picklable # but it is replaced by ``id`` for demonstration purposes. set_loky_pickler('pickle') t_start = time.time() Parallel(n_jobs=2)(delayed(id)(large_list) for _ in range(1)) print("With pickle serialization: {:.3f}s".format(time.time() - t_start)) ############################################################################### # However, the function and objects defined in ``__main__`` are not # serializable anymore using ``pickle`` and it is not possible to call # ``func_async`` using this pickler. # def func_async(i, *args): return 2 * i