def __init__(self, config): DataContainer.__init__(self, config) try: self.timepoint = int(config['timepoint']) except KeyError as key: raise EnrichError("Missing required config value '%s'" % key, self.name) except ValueError as value: raise EnrichError("Invalid parameter value %s" % value, self.name) if 'align variants' in config: if config['align variants']: self.aligner = Aligner() else: self.aligner = None else: self.aligner = None if 'report filtered reads' in config: self.report_filtered_reads = config['report filtered reads'] else: self.report_filtered_reads = self.verbose # initialize data self.counts = dict() # pandas dataframes self.counts_file = dict() # paths to saved counts self.filters = None # dictionary self.filter_stats = None # dictionary
def __init__(self, config): DataContainer.__init__(self, config) self.conditions = dict() self.control = None self.use_scores = True self.normalize_wt = False try: if 'normalize wt' in config: if config['normalize wt'] is True: self.normalize_wt = True for cnd in config['conditions']: if not cnd['label'].isalnum(): raise EnrichError( "Alphanumeric label required for condition '{label}'". format(label=cnd['label']), self.name) for sel_config in cnd[ 'selections']: # assign output base if not present if 'output directory' not in sel_config: sel_config['output directory'] = self.output_base if cnd['label'] not in self.conditions: self.conditions[cnd['label']] = [ selection.Selection(x) for x in cnd['selections'] ] else: raise EnrichError( "Non-unique condition label '{label}'".format( label=cnd['label']), self.name) if 'control' in cnd: if cnd['control']: if self.control is None: self.control = self.conditions[cnd['label']] else: raise EnrichError("Multiple control conditions", self.name) except KeyError as key: raise EnrichError( "Missing required config value {key}".format(key=key), self.name) all_selections = list() for key in self.conditions: all_selections.extend(self.conditions[key]) for dtype in all_selections[0].df_dict: if all(dtype in x.df_dict for x in all_selections): self.df_dict[dtype] = True if len(self.df_dict.keys()) == 0: raise EnrichError( "No enrichment data present across all selections", self.name) # ensure consistency for score usage if not all(x.use_scores for x in all_selections): self.use_scores = False # ensure consistency for wild type normalization for sel in all_selections: sel.normalize_wt = self.normalize_wt
def records_to_dm(records): dc_dict = {} for activity, records in records.items(): fa_array = [e[1] for e in records] fn_array = [e[2] for e in records] threshold = [e[0] for e in records] dc = DataContainer(fa_array, fn_array, threshold, label=activity) dc.line_options['color'] = None dc_dict[activity] = dc return dc_dict
def __init__(self, config): DataContainer.__init__(self, config) try: self.timepoint = int(config['timepoint']) except KeyError as key: raise EnrichError("Missing required config value '{key}'".format(key=key), self.name) except ValueError as value: raise EnrichError("Invalid parameter value {value}".format(value=value), self.name) if 'report filtered reads' in config: self.report_filtered = config['report filtered reads'] else: self.report_filtered = False
def __init__(self, config): DataContainer.__init__(self, config) self.conditions = dict() self.control = None self.use_scores = True self.normalize_wt = False try: if 'normalize wt' in config: if config['normalize wt'] is True: self.normalize_wt = True for cnd in config['conditions']: if not cnd['label'].isalnum(): raise EnrichError("Alphanumeric label required for condition '{label}'".format(label=cnd['label']), self.name) for sel_config in cnd['selections']: # assign output base if not present if 'output directory' not in sel_config: sel_config['output directory'] = self.output_base if cnd['label'] not in self.conditions: self.conditions[cnd['label']] = [selection.Selection(x) for x in cnd['selections']] else: raise EnrichError("Non-unique condition label '{label}'".format(label=cnd['label']), self.name) if 'control' in cnd: if cnd['control']: if self.control is None: self.control = self.conditions[cnd['label']] else: raise EnrichError("Multiple control conditions", self.name) except KeyError as key: raise EnrichError("Missing required config value {key}".format(key=key), self.name) all_selections = list() for key in self.conditions: all_selections.extend(self.conditions[key]) for dtype in all_selections[0].df_dict: if all(dtype in x.df_dict for x in all_selections): self.df_dict[dtype] = True if len(self.df_dict.keys()) == 0: raise EnrichError("No enrichment data present across all selections", self.name) # ensure consistency for score usage if not all(x.use_scores for x in all_selections): self.use_scores = False # ensure consistency for wild type normalization for sel in all_selections: sel.normalize_wt = self.normalize_wt
def call_loader(path, logger): try: if os.path.isfile( path ): # Use this instead of catching FileNotFoundError for Python2 support dc = DataContainer.load(path) if hasattr(dc, "data_container_version" ) and dc.data_container_version == "2.0": return dc else: logger.error( "Error: This type of data container is not supported (data_container_version not found or < 2.0)" ) DMRenderExit(logger) else: logger.error( "FileNotFoundError: No such file or directory: '{}'". format(path)) DMRenderExit(logger) except IOError as e: logger.error("IOError: {}".format(str(e))) DMRenderExit(logger) except UnicodeDecodeError as e: logger.error("UnicodeDecodeError: {}\n".format(str(e))) DMRenderExit(logger)
def _export_records(records, prefix): opts = {} if (len(records) > 0): dc_dict = records_to_dm(records) for activity, dc in dc_dict.items(): dc.activity = activity dc.fa_label = prefix dc.fn_label = "PMISS" save_dm(dc, dm_dir, "{}_{}.dm".format(prefix, activity)) log(1, "[Info] Plotting {} DET curve for {}".format(prefix, activity)) opts['title'] = activity save_DET(dc, figure_dir, "DET_{}_{}.png".format(prefix, activity), no_ppf, opts) mean_label = "{}_mean_byfa".format(prefix) dc_agg = DataContainer.aggregate(dc_dict.values(), output_label=mean_label, average_resolution=500) dc_agg.activity = "AGGREGATED" dc_agg.fa_label = prefix dc_agg.fn_label = "PMISS" save_dm(dc_agg, dm_dir, "{}.dm".format(mean_label)) log(1, "[Info] Plotting mean {} curve for {} activities".format(prefix, len(dc_dict.values()))) save_DET(dc_agg, figure_dir, "DET_{}.png".format(mean_label), no_ppf, opts) log(1, "[Info] Plotting combined {} DET curves".format(prefix)) opts['title'] = "All Activities" save_DET(dc_dict.values(), figure_dir, "DET_{}_{}.png".format(prefix, "COMBINED"), no_ppf, opts) opts['title'] = "All Activities and Aggregate" save_DET(list(dc_dict.values()) + [dc_agg], figure_dir, "DET_{}_{}.png".format(prefix, "COMBINEDAGG"), no_ppf, opts)
def __init__(self, config): DataContainer.__init__(self, config) self.conditions = dict() self.control = None self.use_scores = True try: for cnd in config['conditions']: if not cnd['label'].isalnum(): raise EnrichError( "Alphanumeric label required for condition '%s'" % cnd['label'], self.name) for sel_config in cnd[ 'selections']: # assign output base if not present if 'output directory' not in sel_config: sel_config['output directory'] = self.output_base self.conditions[cnd['label']] = [ selection.Selection(x) for x in cnd['selections'] ] if cnd['control']: if self.control is None: self.control = self.conditions[cnd['label']] else: raise EnrichError("Multiple control conditions", self.name) except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name) all_selections = list() for key in self.conditions: all_selections.extend(self.conditions[key]) for dtype in all_selections[0].df_dict: if all(dtype in x.df_dict for x in all_selections): self.df_dict[dtype] = True if len(self.df_dict.keys()) == 0: raise EnrichError( "No enrichment data present across all selections", self.name) for key in self.conditions: if any(len(x.timepoints) == 2 for x in self.conditions[key]): self.use_scores = False
def compute_auc(self, output_dir): prefix = ["RFA", "TFA"] auc_data = [] mean_auc = [] for p in prefix: for activity, activity_properties in self.activity_index.items(): try: dm_data = DataContainer.load(output_dir+"/dm/"+"{}_{}.dm".format(p, activity)) auc_data = auc_data + get_auc_new(dm_data, p, activity) except Exception as E: # Raise exception for protocols which don't compute TFA metrics pass mean_auc = get_auc_mean(auc_data) return auc_data, mean_auc
def compute_auc(self, output_dir): prefix = ["RFA", "TFA"] auc_data = [] mean_auc = [] for p in prefix: for activity, activity_properties in self.activity_index.items(): try: dm_data = DataContainer.load( output_dir + "/dm/" + "{}_{}.dm".format(p, activity)) auc_data = auc_data + get_auc_new(dm_data, p, activity) except Exception as E: print(E) print(output_dir + "/dm/" + "{}_{}.dm".format(p, activity) + "DNE") mean_auc = get_auc_mean(auc_data) return auc_data, mean_auc
def single_point_dm(fa_point, fn_point, threshold, file_name, label=None, fa_label=None, fn_label=None): my_dm = DataContainer(fa_array=[fa_point], fn_array=[fn_point], threshold=[threshold], label=label, fa_label=fa_label, fn_label=fn_label) my_dm.validate_array_input() my_dm.dump(file_name)
def __init__(self, config): DataContainer.__init__(self, config) self.libraries = dict() self.timepoints = list() self.use_scores = True self.normalize_wt = False self.ns_carryover_fn = None self.ns_carryover_kwargs = None self.use_barcode_variation = False try: if 'barcodes' in config: if 'map file' in config['barcodes']: self.barcode_map = BarcodeMap(config['barcodes'] ['map file']) else: self.barcode_map = None else: self.barcode_map = None libnames = list() bcmfiles = list() for lib in config['libraries']: if 'output directory' not in lib: lib['output directory'] = self.output_base libtype = seqlib_type(lib) if libtype is None: raise EnrichError("Unrecognized SeqLib config", self.name) elif libtype == "BarcodeVariantSeqLib": new = BarcodeVariantSeqLib(lib, barcode_map=self.barcode_map) bcmfiles.append(new.barcode_map.filename) else: new = globals()[libtype](lib) if new.output_base is None: new.set_output_base(self.output_base) if new.timepoint not in self.libraries: self.libraries[new.timepoint] = list() self.libraries[new.timepoint].append(new) libnames.append(new.name) self.timepoints = sorted(self.libraries.keys()) if len(set(libnames)) != len(libnames): raise EnrichError("Non-unique library names", self.name) if len(bcmfiles) == len(libnames): # all BarcodeVariant if len(set(bcmfiles)) == 1: # all the same BarcodeMap self.use_barcode_variation = True unify_barcode_maps = False if self.barcode_map is None: # same BarcodeMap specified for all SeqLibs unify_barcode_maps = True elif bcmfiles[0] != self.barcode_map.filename: # all SeqLibs are overriding the Selection BarcodeMap unify_barcode_maps = True else: # this BarcodeMap is being used for all SeqLibs pass if unify_barcode_maps: self.barcode_map = self.libraries[0][0].barcode_map for tp in self.timepoints: for lib in self.libraries[tp]: lib.barcode_map = self.barcode_map self.set_filters(config['filters'], {'min count' : 0, 'min input count' : 0, 'min rsquared' : 0.0, 'max barcode variation' : None}) if 'carryover correction' in config: if config['carrover correction']['method'] == "nonsense": self.ns_carryover_fn = nonsense_ns_carryover_apply_fn self.ns_carryover_kwargs = {'position' : int(config['carryover correction']['position'])} # add additional methods here using "elif" blocks else: raise EnrichError("Unrecognized nonspecific carryover correction", self.name) if 'normalize wt' in config: if config['normalize wt'] is True: self.normalize_wt = True except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name) except ValueError as value: raise EnrichError("Invalid parameter value %s" % value, self.name) if len(self.timepoints) < 2: raise EnrichError("Insufficient number of timepoints", self.name) elif len(self.timepoints) == 2: self.use_scores = False if 0 not in self.timepoints: raise EnrichError("Missing timepoint 0", self.name) if self.timepoints[0] != 0: raise EnrichError("Invalid negative timepoint", self.name) # identify what kind of counts data is present in all timepoints dtype_counts = list() for tp in self.timepoints: for lib in self.libraries[tp]: dtype_counts.extend(lib.df_dict.keys()) dtype_counts = Counter(dtype_counts) for dtype in dtype_counts: if dtype_counts[dtype] == len(config['libraries']): self.df_dict[dtype] = True if 'barcodes_unmapped' in self.df_dict.keys(): # special case for BarcodeVariantSeqLib del self.df_dict['barcodes_unmapped'] if 'barcodes_low_abuncande' in self.df_dict.keys(): # special case for BarcodeVariantSeqLib del self.df_dict['barcodes_low_abuncande'] if len(self.df_dict.keys()) == 0: raise EnrichError("No count data present across all timepoints", self.name)
def evaluate_input(args): """This function parse and evaluate the argument from command line interface, it returns the list of DM files loaded with also potential custom plot and lines options provided. The functions parse the input argument and the potential custom options arguments (plot and lines). It first infers the type of input provided. The following 3 input type are supported: - type 1: A .txt file containing a pass of .dm file per lines - type 2: A single .dm path - type 3: A custom list of pairs of dictionnaries (see the input help from the command line parser) Then it loads custom (or defaults if not provided) plot and lines options per DM file. Args: args (argparse.Namespace): the result of the call of parse_args() on the ArgumentParser object Returns: Result (tuple): A tuple containing - DM_list (list): list of DM objects - opts_list (list): list of dictionnaries for the lines options - plot_opts (dict): dictionnary of plot options """ def call_loader(path, logger): try: # Python2 support if os.path.isfile(path): dc = DataContainer.load(path) if hasattr(dc, "data_container_version") and \ dc.data_container_version == "2.0": return dc else: logger.error("Error: This type of data container is not \ supported (data_container_version not found or < 2.0)") DMRenderExit(logger) else: logger.error("FileNotFoundError: No such file or directory: '\ {}'".format(path)) DMRenderExit(logger) except IOError as e: logger.error("IOError: {}".format(str(e))) DMRenderExit(logger) except UnicodeDecodeError as e: logger.error("UnicodeDecodeError: {}\n".format(str(e))) DMRenderExit(logger) logger = logging.getLogger('DMlog') DM_list = list() # Case 1: text file containing one path per line if args.input.endswith('.txt'): logger.debug("Input of type 1 detected") input_type = 1 if os.path.isfile(args.input): with open(args.input) as f: fp_list = f.read().splitlines() else: logger.error("FileNotFoundError: No such file or directory: '{}'\ ".format(args.input)) DMRenderExit(logger) for dm_file_path in fp_list: label = dm_file_path # We handle a potential label provided if ':' in dm_file_path: dm_file_path, label = dm_file_path.rsplit(':', 1) dm_obj = call_loader(dm_file_path, logger) dm_obj.path = dm_file_path dm_obj.label = label if dm_obj.label is None else dm_obj.label dm_obj.show_label = True DM_list.append(dm_obj) # Case 2: One dm pickled file elif args.input.endswith('.dm'): logger.debug("Input of type 2 detected") input_type = 2 dm_obj = call_loader(args.input, logger) dm_obj.path = args.input dm_obj.label = args.input if dm_obj.label is None else dm_obj.label dm_obj.show_label = True DM_list = [dm_obj] # Case 3: String containing a list of input with their metadata elif args.input.startswith('[[') and args.input.endswith(']]'): logger.debug("Input of type 3 detected") input_type = 3 try: input_list = literal_eval(args.input) for dm_data, dm_opts in input_list: logger.debug("dm_data: {}".format(dm_data)) logger.debug("dm_opts: {}".format(dm_opts)) dm_file_path = dm_data['path'] dm_obj = call_loader(dm_file_path, logger) dm_obj.path = dm_file_path dm_obj.label = dm_data['label'] if dm_data['label'] is not \ None else dm_obj.label dm_obj.show_label = dm_data['show_label'] dm_obj.line_options = dm_opts dm_obj.line_options['label'] = dm_obj.label DM_list.append(dm_obj) except ValueError as e: if not all([len(x) == 2 for x in input_list]): logger.error("ValueError: Invalid input format. All sub-lists \ must be a pair of two dictionaries.\n-> {}".format(str(e))) else: logger.error("ValueError: {}".format(str(e))) DMRenderExit(logger) except SyntaxError as e: logger.error("SyntaxError: The input provided is invalid.\n-> {}\ ".format(str(e))) DMRenderExit(logger) else: logger.error("The input type does not match any of the following \ inputs:\n- .txt file containing one file path per line\n- .dm file\ \n- a list of pair [{'path':'path/to/dm_file','label':str,'\ show_label':bool}, **{any matplotlib.lines.Line2D properties}].\n") DMRenderExit(logger) # Assertions: All the fa_labels and fn_labels MUST by unique fa_label = set([x.fa_label for x in DM_list]) fn_label = set([x.fn_label for x in DM_list]) assert (len(fa_label) == 1), "Error: DM files have mixed FA_labels {}\ ".format(fa_label) assert (len(fn_label) == 1), "Error: DM files have mixed FN_labels {}\ ".format(fn_label) if (args.aggregate is not None): logger.debug("Creating aggregated Line") try: dm_data, dm_opts = literal_eval(args.aggregate) dm_obj = DataContainer.aggregate(DM_list, output_label="TFA_mean_byfa", average_resolution=500) dm_obj.label = dm_data['label'] if dm_data['label'] is not None \ else dm_obj.label dm_obj.activity = dm_obj.label dm_obj.fa_label = fa_label.pop() dm_obj.fn_label = fn_label.pop() dm_obj.show_label = dm_data['show_label'] dm_obj.line_options = dm_opts dm_obj.line_options['label'] = dm_obj.label DM_list.append(dm_obj) if dm_data['path'] is not None: fname = "{}/{}".format(args.outputFolder, dm_data['path']) logger.debug("Writing aggregated Line to {}".format(fname)) dm_obj.dump(fname) except ValueError as e: logger.error("ValueError: The aggrgate option had a value error {}\ ".format(str(e))) DMRenderExit(logger) except SyntaxError as e: logger.error("SyntaxError: The aggregate option provided is \ invalid.\n-> {}".format(str(e))) DMRenderExit(logger) # *-* Options Processing *-* # General plot options if not args.plotOptionJsonFile: logger.info("Generating the default plot options...") plot_opts = Render.gen_default_plot_options(args.plotType, DM_list[0].fa_label, DM_list[0].fn_label, plot_title=args.plotTitle) else: logger.info("Loading of the plot options from the json config file...") if os.path.isfile(args.plotOptionJsonFile): with open(args.plotOptionJsonFile, 'r') as f: plot_opts = json.load(f) validate_plot_options(plot_opts) else: logger.error("FileNotFoundError: No such file or directory: '{}'\ ".format(args.plotOptionJsonFile)) DMRenderExit(logger) # line options if args.lineOptionJsonFile and input_type != 3: logger.info("Loading of the lines options from the json config file \ and overriding data container line settings...") if os.path.isfile(args.lineOptionJsonFile): with open(args.lineOptionJsonFile, 'r') as f: opts_list = json.load(f) if len(opts_list) != len(DM_list): print("ERROR: the number of the line options is different \ with the number of the DM objects: ({} < {})".format( len(opts_list), len(DM_list))) DMRenderExit(logger) else: for dm, line_options in zip(DM_list, opts_list): dm.line_options = line_options else: logger.error("FileNotFoundError: No such file or directory: '{}'\ ".format(args.lineOptionJsonFile)) DMRenderExit(logger) if args.confidenceInterval: plot_opts['confidence_interval'] = True return DM_list, plot_opts
def run(self): agstart = time.time() for i in xrange(self.no_sims): logging.info("Going for simulation %d"%(i+1)) gc.collect() run_id = str(uuid4()) with DataContainer(self.config,run_id,self.aggregate_id) as dc: p = Progress(self.config['model']['no_steps']) model_class = None if(self.market_type == 1): logging.info("Using default Market") model_class = Market elif(self.market_type == 2): logging.info("Using ShuffleIRSMarket") model_class = ShuffleIRSMarket elif(self.market_type == 3): logging.info("Using SortedIRSMarket") model_class = SortedIRSMarket elif(self.market_type == 4): logging.info("Using RandomSortedIRSMarket") model_class = SortedRandomIRSMarket elif(self.market_type == 5): logging.info("Using RandomShuffleIRSMarket") model_class = ShuffleRandomIRSMarket elif(self.market_type == 6): logging.info("Using ConstantRandomShuffleIRSMarket") model_class = ConstShuffleIRSMarket elif(self.market_type == 7): logging.info("Using quick CRS-IRS-Mkt") model_class = sim else: raise "No such market type" p.start() start = time.time() with model_class(self.config['model'],dc,p.update) as m: m.run() t = time.time()-start p.finish() print "" logging.info("Run took %f seconds"%t) if(self.config['analysis']['do_analysis']): start = time.time() self.do_analysis(dc,run_id) t = time.time()-start logging.info("Analysis took %f seconds"%t) if(self.save_data): start = time.time() dc.save_data() t = time.time()-start logging.info("Saving data took %f seconds"%t) gc.collect() print "" print "" gc.collect() dt = (time.time() - agstart) / 60 logging.info("Experiment took %f minutes"%dt) if(self.config['aggregate']['do_aggregate'] and self.save_data): start = time.time() self.do_aggregate(dc,run_id) logging.info('Aggregation took %f seconds'%(time.time()-start))
def __init__(self, config): DataContainer.__init__(self, config) self.libraries = dict() self.timepoints = list() try: if 'barcodes' in config: if 'map file' in config['barcodes']: self.barcode_map = BarcodeMap(config['barcodes'] ['map file']) else: self.barcode_map = None else: self.barcode_map = None libnames = list() for lib in config['libraries']: if 'output directory' not in lib: lib['output directory'] = self.output_base libtype = seqlib_type(lib) if libtype is None: raise EnrichError("Unrecognized SeqLib config", self.name) elif libtype == "BarcodeVariantSeqLib": new = BarcodeVariantSeqLib(lib, barcode_map=self.barcode_map) else: new = globals()[libtype](lib) if new.output_base is None: new.set_output_base(self.output_base) if new.timepoint not in self.libraries: self.libraries[new.timepoint] = list() self.libraries[new.timepoint].append(new) libnames.append(new.name) self.timepoints = sorted(self.libraries.keys()) if len(set(libnames)) != len(libnames): raise EnrichError("Non-unique library names", self.name) self.set_filters(config['filters'], {'min count' : 0, 'min input count' : 0, 'min rsquared' : 0.0, 'max barcode variation' : None}) if 'carryover correction' in config: if config['carrover correction']['method'] == "nonsense": self.ns_carryover_fn = nonsense_ns_carryover_apply_fn self.ns_carryover_kwargs = {'position' : int(config['carryover correction']['position'])} # add additional methods here using "elif" blocks else: raise EnrichError("Unrecognized nonspecific carryover correction", self.name) else: self.ns_carryover_fn = None self.ns_carryover_kwargs = None except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name) except ValueError as value: raise EnrichError("Invalid parameter value %s" % value, self.name) if len(self.libraries.keys()) < 2: raise EnrichError("Insufficient number of timepoints", self.name) if 0 not in self.timepoints: raise EnrichError("Missing timepoint 0", self.name) if self.timepoints[0] != 0: raise EnrichError("Invalid negative timepoint", self.name) # identify what kind of counts data is present in all timepoints dtype_counts = list() for tp in self.timepoints: for lib in self.libraries[tp]: dtype_counts.extend(lib.counts.keys()) dtype_counts = Counter(dtype_counts) for dtype in dtype_counts: if dtype_counts[dtype] == len(config['libraries']): self.df_dict[dtype] = True if 'barcodes_unmapped' in self.df_dict.keys(): # special case for BarcodeVariantSeqLib del self.df_dict['barcodes_unmapped'] if len(self.df_dict.keys()) == 0: raise EnrichError("No count data present across all timepoints", self.name) try: if 'correction' in config: if config['correction']['method'] == "stop": if not self.libraries[0].is_coding(): raise EnrichError("Invalid correction method for " "noncoding sequences", self.name) else: config['correction']['length percentile'] # must exist self.correction = config['correction'] else: self.correction = None except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name)
def initialize(self): DataContainer.initialize(self) self.__style = self.get_style() self.__style &= ~ListView.STYLE_WRAP self.set_style(self.__style)
def do_run(steps, no_banks, threshold, max_tenure, max_irs_value, avalanche_fraction=0.9): #steps = 10000 save = False save_risk = False save_risk_avalanche_time_series = False save_dist = False save_giant_component = False save_avalanche_progression = False save_critical_info = False save_avalanche_tree = False save_degree_distribution = False no_connection_scatter_moments = 0 connection_scatter_moments = np.random.randint( 0, steps, no_connection_scatter_moments) seed = np.random.randint(0, 1000) dcconfig = { 'model': { 'no_banks': no_banks, 'no_steps': steps, 'threshold': threshold, 'sigma': 1, 'max_irs_value': max_irs_value, 'irs_threshold': -1, 'dissipation': 0.0, 'max_tenure': max_tenure }, 'analysis': { 'data_to_save': ['defaults'] }, 'file_root': './simulation_data/', 'market_type': 7, 'seed': seed } measure_no_steps = 2 * dcconfig['model']['max_tenure'] ########################################################################### dc = DataContainer(dcconfig, str(uuid4()), str(uuid4())) p = Progress(steps) s = sim(dcconfig['model'], dc, p.update, save_risk, save_dist, connection_scatter_moments, seed, avalanche_fraction=avalanche_fraction) s.save_degree_distribution = save_degree_distribution if (s.save_degree_distribution): s.degrees = np.zeros((steps, dcconfig['model']['no_banks'])) s.no_irs = np.zeros((steps, dcconfig['model']['no_banks'])) s.save_avalanche_progression = save_avalanche_progression s.save_risk_avalanche_time_series = save_risk_avalanche_time_series s.collect_critical_info = save_critical_info s.save_giant_component = save_giant_component s.save_avalanche_tree = save_avalanche_tree s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id s.irs_creations = np.zeros(steps) s.irs_removals = np.zeros(steps) if (s.save_avalanche_tree): os.makedirs(s.avalanche_tree_file_path) if (save_giant_component): s.giant_components = np.zeros(s.no_steps) ########################################################################### start = time.time() p.start() tme, size = s.run() print p.finish() defaulting_bank = s.defaulting_bank_no start_at = tme - measure_no_steps + 1 print "Large enough avalanche found at %d of size %d" % (tme, size) print print "Run took %d seconds" % (time.time() - start) print print "Going for the analysis" ########################################################################### ## Actual stuff thats needed dc = DataContainer(dcconfig, str(uuid4()), str(uuid4())) p = Progress(steps) s = sim(dcconfig['model'], dc, p.update, save_risk, save_dist, connection_scatter_moments, seed, start_at, defaulting_bank, avalanche_fraction=avalanche_fraction) nb = dcconfig['model']['no_banks'] s.measured_balances = np.zeros((measure_no_steps, nb)) s.measured_gross_balances = np.zeros((measure_no_steps, nb)) s.degrees = np.zeros((measure_no_steps, nb)) s.no_irs = np.zeros((measure_no_steps, nb)) #s.giant_component = [] s.defaulted_nodes = [] s.irs_pb = [] s.network = np.zeros((nb, nb)) s.irs_creations = np.zeros(steps) s.irs_removals = np.zeros(steps) ################# s.save_degree_distribution = save_degree_distribution s.save_avalanche_progression = save_avalanche_progression s.save_risk_avalanche_time_series = save_risk_avalanche_time_series s.collect_critical_info = save_critical_info s.save_giant_component = save_giant_component s.save_avalanche_tree = save_avalanche_tree s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id if (s.save_avalanche_tree): os.makedirs(s.avalanche_tree_file_path) if (save_giant_component): s.giant_components = np.zeros(s.no_steps) ########################################################################### start = time.time() p.start() tme, size = s.run() p.finish() print print "Large enough avalanche found at %d of size %d" % (tme, size) if s.save_avalanche_progression: print "Saving avalanche progression" file_path = './simulation_data/avalanche_progression/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.avalanche_progressions, fp) pickle.dump(dcconfig, fp) if s.collect_critical_info: print "Critical info" file_path = './simulation_data/critical/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.critical_info, fp) pickle.dump(s.max_default_size_t.tolist(), fp) if (s.save_giant_component): pickle.dump(s.giant_components.tolist(), fp) pickle.dump(dcconfig, fp) if len(connection_scatter_moments) > 0: print "Connection Scatters" file_path = './simulation_data/connection_scatters/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.connection_scatters, fp) if save_dist: file_path = './simulation_data/dists/%s.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.trials, fp) pickle.dump(dcconfig['model']['no_banks'], fp) if (True): os.makedirs("./simulation_data/large_avalanche_data/%s" % dc.aggregate_id) print "Saving stuff" file_path = './simulation_data/large_avalanche_data/%s/degrees.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.degrees.tolist(), fp) file_path = './simulation_data/large_avalanche_data/%s/no_irs.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.no_irs.tolist(), fp) pickle.dump(s.irs_pb, fp) file_path = './simulation_data/large_avalanche_data/%s/balances.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.measured_balances.tolist(), fp) pickle.dump(s.measured_gross_balances.tolist(), fp) #file_path = './simulation_data/large_avalanche_data/%s/gc.bin'%dc.aggregate_id #with file(file_path,'wb') as fp: # pickle.dump(s.giant_component,fp) file_path = './simulation_data/large_avalanche_data/%s/network.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.network.tolist(), fp) file_path = './simulation_data/large_avalanche_data/%s/defaulted.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.defaulted_nodes, fp) file_path = './simulation_data/large_avalanche_data/%s/irs_data.bin' % dc.aggregate_id with file(file_path, 'wb') as fp: pickle.dump(s.irs_creations.tolist(), fp) pickle.dump(s.irs_removals.tolist(), fp) dcconfig['failed_bank'] = s.defaulting_bank_no file_path = './simulation_data/large_avalanche_data/%s/config.json' % dc.aggregate_id with open(file_path, 'w') as fp: json.dump(dcconfig, fp, indent=4) print dc.aggregate_id
if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) c = {'sigma' : 1, 'no_banks' : 30, 'no_steps' : 200, 'irs_threshold' : 15, 'max_irs_value' : 20, 'max_tenure' : 80, 'no_sims' : 1, 'threshold' : 4} mkt = ShuffleIRSMarket(c, DataContainer({'file_root':'./test','model':{'no_steps':1}},str(uuid4()),str(uuid4())), None) banks = mkt.banks b1 = banks[0] b1.__balance__ = -5 b2 = banks[1] b2.__balance__ = 5 b3 = banks[2] b3.__balance__ = 3 b1.set_dirty() b2.set_dirty() b3.set_dirty()
'no_steps': steps, 'threshold': 10, 'sigma': 1, 'max_irs_value': 7, #4, 'irs_threshold': -1, 'dissipation': 0.0, 'max_tenure': 400 }, 'analysis': { 'data_to_save': ['defaults'] }, 'file_root': './simulation_data/', 'market_type': 7 } dc = DataContainer(dcconfig, str(uuid4()), str(uuid4())) p = Progress(steps) s = sim(dcconfig['model'], dc, p.update, save_risk, save_dist, connection_scatter_moments) s.save_degree_distribution = save_degree_distribution if (s.save_degree_distribution): s.degrees = np.zeros((steps, dcconfig['model']['no_banks'])) s.no_irs = np.zeros((steps, dcconfig['model']['no_banks'])) s.save_avalanche_progression = save_avalanche_progression s.save_risk_avalanche_time_series = save_risk_avalanche_time_series s.collect_critical_info = save_critical_info s.save_giant_component = save_giant_component s.save_avalanche_tree = save_avalanche_tree s.avalanche_tree_file_path = './simulation_data/trees/%s/' % dc.aggregate_id s.save_degree_on_default = save_degree_on_default
cur_config = copy.deepcopy(dcconfig) cur_config['model']['no_banks'] = no_banks cur_config['model']['max_irs_value'] = max_irs_value cur_config['model']['max_tenure'] = tenure cur_config['model']['threshold'] = threshold aggregate_id = str(uuid4()) for i in xrange(no_reps): print "Startin run %d of %d" % (cnt, nosims) run_id = str(uuid4()) p = Progress(steps) start = time.time() with DataContainer(cur_config, run_id, aggregate_id) as dc: with sim(cur_config['model'], dc, p.update, save_risk, False) as cursim: config_sim(cursim) p.start() cursim.run() p.finish() if (save): dc.save_defaults() dc.save_run() if cursim.save_degree_distribution: directory = './simulation_data/k/irs_value_%s' % max_irs_value file_path = '%s/%s_%s.bin' % (directory, dc.aggregate_id, i)
def __init__(self, config): DataContainer.__init__(self, config) self.libraries = dict() self.timepoints = list() try: if 'barcodes' in config: if 'map file' in config['barcodes']: self.barcode_map = BarcodeMap( config['barcodes']['map file']) else: self.barcode_map = None else: self.barcode_map = None libnames = list() for lib in config['libraries']: if 'output directory' not in lib: lib['output directory'] = self.output_base libtype = seqlib_type(lib) if libtype is None: raise EnrichError("Unrecognized SeqLib config", self.name) elif libtype == "BarcodeVariantSeqLib": new = BarcodeVariantSeqLib(lib, barcode_map=self.barcode_map) else: new = globals()[libtype](lib) if new.output_base is None: new.set_output_base(self.output_base) if new.timepoint not in self.libraries: self.libraries[new.timepoint] = list() self.libraries[new.timepoint].append(new) libnames.append(new.name) self.timepoints = sorted(self.libraries.keys()) if len(set(libnames)) != len(libnames): raise EnrichError("Non-unique library names", self.name) self.set_filters( config['filters'], { 'min count': 0, 'min input count': 0, 'min rsquared': 0.0, 'max barcode variation': None }) if 'carryover correction' in config: if config['carrover correction']['method'] == "nonsense": self.ns_carryover_fn = nonsense_ns_carryover_apply_fn self.ns_carryover_kwargs = { 'position': int(config['carryover correction']['position']) } # add additional methods here using "elif" blocks else: raise EnrichError( "Unrecognized nonspecific carryover correction", self.name) else: self.ns_carryover_fn = None self.ns_carryover_kwargs = None except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name) except ValueError as value: raise EnrichError("Invalid parameter value %s" % value, self.name) if len(self.libraries.keys()) < 2: raise EnrichError("Insufficient number of timepoints", self.name) if 0 not in self.timepoints: raise EnrichError("Missing timepoint 0", self.name) if self.timepoints[0] != 0: raise EnrichError("Invalid negative timepoint", self.name) # identify what kind of counts data is present in all timepoints dtype_counts = list() for tp in self.timepoints: for lib in self.libraries[tp]: dtype_counts.extend(lib.counts.keys()) dtype_counts = Counter(dtype_counts) for dtype in dtype_counts: if dtype_counts[dtype] == len(config['libraries']): self.df_dict[dtype] = True if 'barcodes_unmapped' in self.df_dict.keys( ): # special case for BarcodeVariantSeqLib del self.df_dict['barcodes_unmapped'] if len(self.df_dict.keys()) == 0: raise EnrichError("No count data present across all timepoints", self.name) try: if 'correction' in config: if config['correction']['method'] == "stop": if not self.libraries[0].is_coding(): raise EnrichError( "Invalid correction method for " "noncoding sequences", self.name) else: config['correction']['length percentile'] # must exist self.correction = config['correction'] else: self.correction = None except KeyError as key: raise EnrichError("Missing required config value %s" % key, self.name)