def train_hmm(clr: cooler.Cooler, mix_num: int = 3, discore_fn=di_score): """ :param clr: :param mix_num: :param discore_fn: :return: """ model = ghmm_model(STATES, INIT_TRANSITION, INIT_PROB, END_PROB, init_mean_fn(mix_num), init_var_fn(mix_num)) di_dict = {} for chrom in clr.chromnames: matrix = clr.matrix(sparse=True).fetch(chrom).tocsr() di_array = discore_fn(matrix) gap_mask = remove_small_gap( np.isnan(clr.bins().fetch(chrom)['weight'].values)) di_dict[chrom] = split_diarray(di_array, gap_mask) train_data = [] for chrom_di in di_dict.values(): train_data.extend(di for di in chrom_di.values()) model.fit( train_data, algorithm='baum-welch', max_iterations=10000, stop_threshold=1e-5, n_jobs=CPU_CORE - 5, verbose=False ) return model
def make_slices( clr: Cooler, regions: Dict[str, np.ndarray], names:Optional[dict] = {}, force_disjoint:Optional[bool] = False ) -> List[np.ndarray]: # Fetch relevant bin_ids from the cooler file b_ids, n_ids = fetch_bins_from_cooler(cooler=clr, regions=regions, names=names) if force_disjoint: # Identify unique bin_ids and isolate disjoint regions slices = {chrom: get_unique_bins(b_ids=b_ids[chrom]) for chrom in b_ids} n_ids = {} for chrom in slices: n_ids[chrom] = [] for sl in slices[chrom]: # start, end, bins and node names for region stl = clr.bins()[sl[0]]["start"].values[0] el = clr.bins()[sl[-1] + 1]["end"].values[0] sl_id = f"{chrom}:{stl}-{el}" n_ids[chrom].append(sl_id) else: slices = {chrom:[np.array(item) for item in b_ids[chrom]] for chrom in b_ids} return slices, n_ids
def __init__(self, fan_pin, peltier_pin, step_pin, thermometer_pin): self.cooler = Cooler(Pin(fan_pin, Pin.OUT), Pin(peltier_pin, Pin.OUT)) self.cooler.fanOn() self.cooler.coolerHigh() self.pid = Pid(17, 1, 0.02, 0, 0.995) #(temperature, P, I, D, memoryFactor) self.thermometer = Thermometer(thermometer_pin) self.pump = PWMPump(Pin(step_pin, Pin.OUT)) for i in range(10): self.pump.pwm.freq(i * 1500) utime.sleep(0.1) self.pump.pwm.freq(15000)
def make_bins( clr: Cooler, sites: Dict[str,np.ndarray], names: Optional[Dict[str,str]]=None ) -> Dict[str, np.ndarray]: bins = {} outnames = {} bad_sites = {} for chrom in sites: cbins = clr.bins().fetch(chrom) start = cbins['start'].values[0] site_locs = ((sites[chrom] - start)/clr.binsize).astype('int') good_sites = site_locs < cbins.shape[0] bad_sites[chrom] = np.where(~good_sites)[0] bins[chrom] = site_locs[good_sites] if names is not None: outnames[chrom] = np.array(names[chrom])[good_sites] if names is not None: return bins, outnames, bad_sites else: return bins, bad_sites
def __init__(self, source_uri, bins, chunksize, batchsize, map=map): from cooler.api import Cooler self._map = map self.source_uri = source_uri self.chunksize = chunksize self.batchsize = batchsize clr = Cooler(source_uri) self._size = clr.info['nnz'] self.old_binsize = clr.binsize self.old_chrom_offset = clr._load_dset('indexes/chrom_offset') self.old_bin1_offset = clr._load_dset('indexes/bin1_offset') self.gs = GenomeSegmentation(clr.chromsizes, bins) self.new_binsize = get_binsize(bins) assert self.new_binsize % self.old_binsize == 0 self.factor = self.new_binsize // self.old_binsize
def get_coolers(self, table, res=1000000): names = table['lib_name'].values cool_dict = defaultdict(list) for name in names: if name not in self.metadata['lib_name'].values: print(f'Name: {name} not found in metadata. Skipping') continue cool_dict['lib_name'].append(name) flag = True for cpath in self.cooler_paths: if f'{name}.hg38.mapq_30.1000.mcool' in os.listdir(cpath): flag = False cool = Cooler( cpath + f'{name}.hg38.mapq_30.1000.mcool::/resolutions/{res}') cool_dict[f'cooler_{res}'].append(cool) if flag: print( f'Cooler not found matching {name}. Appending np.nan to appropriate row' ) cool_dict[f'cooler_{res}'].append(np.nan) df = pd.DataFrame(cool_dict) df = table.copy(deep=True).merge(df, on='lib_name', how='outer') return df
def fetch_bins_from_cooler( cooler: Cooler, regions: Dict[str, np.ndarray], names: Optional[dict] = {} ) -> List[List[np.int64]]: # Fetch relevant bin_ids from the cooler file b_ids = {} n_ids = {} for chrom in regions: b_ids[chrom] = [] n_ids[chrom] = [] for idx, row in enumerate(regions[chrom]): b_add = list( cooler.bins() .fetch("{}:{}-{}".format(chrom, row[0], row[1])) .index.values ) try: n_ids[chrom].append(names[chrom][idx]) except: n_ids[chrom].append("{}:{}-{}".format(chrom, row[0], row[1])) b_ids[chrom].append( b_add ) return b_ids, n_ids
def coords_to_bins(clr: cooler.Cooler, coords: pd.DataFrame) -> np.ndarray: """ Converts genomic coordinates to a list of bin ids based on the whole genome contact map. Parameters ---------- coords : pandas.DataFrame Table of genomic coordinates, with columns chrom, pos. Returns ------- numpy.array of ints : Indices in the whole genome matrix contact map. """ coords.pos = (coords.pos // clr.binsize) * clr.binsize # Coordinates are merged with bins, both indices are kept in memory so that # the indices of matching bins can be returned in the order of the input # coordinates idx = (clr.bins()[:].reset_index().rename(columns={ "index": "bin_idx" }).merge( coords.reset_index().rename(columns={"index": "coord_idx"}), left_on=["chrom", "start"], right_on=["chrom", "pos"], how="right", ).set_index("bin_idx").sort_values("coord_idx").index.values) return idx
def matrix_balance(cool_uri, nproc=1, chunksize=int(1e7), mad_max=5, min_nnz=10, min_count=0, ignore_diags=1, tol=1e-5, max_iters=1000): ''' Perform separate matrix balancing for regions with different copy numbers and output the bias vector in the "sweight" column. ''' cool_path, group_path = util.parse_cooler_uri(cool_uri) # Overwrite the existing sweight column with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] if 'sweight' in grp['bins']: del grp['bins']['sweight'] clr = Cooler(cool_uri) try: if nproc > 1: pool = balance.Pool(nproc) map_ = pool.imap_unordered else: map_ = map bias, stats = iterative_correction(clr, chunksize=chunksize, tol=tol, min_nnz=min_nnz, min_count=min_count, mad_max=mad_max, max_iters=max_iters, ignore_diags=ignore_diags, rescale_marginals=True, use_lock=False, map=map_) finally: if nproc > 1: pool.close() if not stats['converged']: logger.error('Iteration limit reached without convergence') logger.error('Storing final result. Check log to assess convergence.') with h5py.File(cool_path, 'r+') as h5: grp = h5[group_path] # add the bias column to the file h5opts = dict(compression='gzip', compression_opts=6) grp['bins'].create_dataset('sweight', data=bias, **h5opts) grp['bins']['sweight'].attrs.update(stats)
def correlate(x_mcool, y_mcool, output_prefix, resolution): from pore_c.analyses.matrix import correlate from cooler import Cooler file_paths = catalogs.MatrixCorrelationCatalog.generate_paths( output_prefix) x_cool = Cooler(str(x_mcool) + f"::/resolutions/{resolution}") y_cool = Cooler(str(y_mcool) + f"::/resolutions/{resolution}") x_chrom_names = set(x_cool.chromnames) y_chrom_names = set(y_cool.chromnames) if x_chrom_names != y_chrom_names: x_not_y = x_chrom_names - y_chrom_names y_not_x = y_chrom_names - x_chrom_names if x_not_y and y_not_x: raise ValueError( f"Chromosomes are not sub/supersets x:{x_not_y}, y:{y_not_x}") elif x_not_y: logger.warning( f"Extra chromosomes in x, will not be included in calculations: {x_not_y}" ) else: logger.warning( f"Extra chromosomes in y, will not be included in calculations: {y_not_x}" ) metadata = correlate(x_cool, y_cool, xy_path=file_paths["xy"], coefficients_path=file_paths["coefficients"], resolution=resolution) metadata["resolution"] = resolution metadata["x"]["path"] = str(x_mcool) metadata["y"]["path"] = str(y_mcool) matrix_cat = catalogs.MatrixCorrelationCatalog.create( file_paths, metadata, {}) logger.info(str(matrix_cat))
def parse_cooler( cooler_file: str, regions: Dict[str, np.ndarray] ) -> Tuple[Cooler, List[np.ndarray]]: # Load cooler c = Cooler(cooler_file) # Fetch relevant bin_ids from the cooler file b_ids = fetch_bins_from_cooler(cooler, regions) # Identify unique bin_ids and isolate disjoint regions slices = get_unique_bins(b_ids) return c, slices
def preprocess_hic( clr: cooler.Cooler, min_contacts: Optional[int] = None, region: Optional[str] = None, ) -> sp.csr_matrix: """ Given an input cooler object, returns the preprocessed Hi-C matrix. Preprocessing involves (in that order): subsetting region, subsampling contacts, normalisation, detrending (obs / exp). Balancing weights must be pre-computer in the referenced cool file. Region must be in UCSC format. """ # Load raw matrix and subset region if requested mat = clr.matrix(sparse=True, balance=False) bins = clr.bins() if region is None: mat = mat[:] bins = bins[:] else: mat = mat.fetch(region) bins = bins.fetch(region) try: biases = bins["weight"].values except KeyError as err: sys.stderr.write("Error: Input cooler must be balanced.\n") raise err # get to same coverage if requested and matrix is not empty if mat.sum() and (min_contacts is not None): mat = cup.subsample_contacts(mat, min_contacts).tocoo() valid = cup.get_detectable_bins(mat, n_mads=5) # balance region with weights precomputed on the whole matrix mat.data = mat.data * biases[mat.row] * biases[mat.col] # Detrend for P(s) mat = cup.detrend(mat.tocsr(), smooth=False, detectable_bins=valid[0]) # Replace NaNs by 0s mat.data = np.nan_to_num(mat.data) mat.eliminate_zeros() return mat
def _aggregate(self, span): from cooler.api import Cooler lo, hi = span clr = Cooler(self.source_uri) # convert_enum=False returns chroms as raw ints table = clr.pixels(join=True, convert_enum=False) chunk = table[lo:hi] # logger.info('{} {}'.format(lo, hi)) print('{} {}'.format(lo, hi)) # use the "start" point as anchor for re-binning # XXX - alternatives: midpoint anchor, proportional re-binning binsize = self.gs.binsize chrom_binoffset = self.gs.chrom_binoffset chrom_abspos = self.gs.chrom_abspos start_abspos = self.gs.start_abspos chrom_id1 = chunk['chrom1'].values chrom_id2 = chunk['chrom2'].values start1 = chunk['start1'].values start2 = chunk['start2'].values if binsize is None: abs_start1 = chrom_abspos[chrom_id1] + start1 abs_start2 = chrom_abspos[chrom_id2] + start2 chunk['bin1_id'] = np.searchsorted( start_abspos, abs_start1, side='right') - 1 chunk['bin2_id'] = np.searchsorted( start_abspos, abs_start2, side='right') - 1 else: rel_bin1 = np.floor(start1 / binsize).astype(int) rel_bin2 = np.floor(start2 / binsize).astype(int) chunk['bin1_id'] = chrom_binoffset[chrom_id1] + rel_bin1 chunk['bin2_id'] = chrom_binoffset[chrom_id2] + rel_bin2 grouped = chunk.groupby(['bin1_id', 'bin2_id'], sort=False) return grouped['count'].sum().reset_index()
def main(): """Balance of plant of a boiling water nuclear reactor. Attributes ---------- end_time: float End of the flow time in SI unit. time_step: float Size of the time step between port communications in SI unit. use_mpi: bool If set to `True` use MPI otherwise use Python multiprocessing. """ # Preamble end_time = 30.0 * unit.minute time_step = 30.0 # seconds show_time = (True, 5 * unit.minute) use_mpi = False # True for MPI; False for Python multiprocessing plot_results = True # True for enabling plotting section below params = get_params() # parameters for BoP BWR #***************************************************************************** # Define Cortix system # System top level plant = Cortix(use_mpi=use_mpi, splash=True) # Network plant_net = plant.network = Network() params['start-time'] = 0.0 params['end-time'] = end_time params['shutdown-time'] = 999.0 * unit.hour params['shutdown-mode'] = False #***************************************************************************** # Create reactor module reactor = BWR(params) reactor.name = 'BWR' reactor.save = True reactor.time_step = time_step reactor.end_time = end_time reactor.show_time = show_time reactor.RCIS = True # Add reactor module to network plant_net.module(reactor) #***************************************************************************** # Create turbine high pressure module params['turbine_inlet_pressure'] = 2 params['turbine_outlet_pressure'] = 0.5 params['high_pressure_turbine'] = True #params_turbine = reactor.params #params_turbine.inlet_pressure = 2 #params.turbine_outlet_pressure = 0.5 turbine_hp = Turbine(params) turbine_hp.name = 'High Pressure Turbine' turbine_hp.save = True turbine_hp.time_step = time_step turbine_hp.end_time = end_time # Add turbine high pressure module to network plant_net.module(turbine_hp) #***************************************************************************** # Create turbine low pressure module params['turbine_inlet_pressure'] = 0.5 params['turbine_outlet_pressure'] = 0.005 params['high_pressure_turbine'] = False params['steam flowrate'] = params['steam flowrate'] / 2 turbine_lp1 = Turbine(params) turbine_lp1.name = 'Low Pressure Turbine 1' turbine_lp1.save = True turbine_lp1.time_step = time_step turbine_lp1.end_time = end_time plant_net.module(turbine_lp1) #***************************************************************************** # Create turbine low pressure module params['turbine_inlet_pressure'] = 0.5 params['turbine_outlet_pressure'] = 0.005 params['high_pressure_turbine'] = False turbine_lp2 = Turbine(params) turbine_lp2.name = 'Low Pressure Turbine 2' turbine_lp2.save = True turbine_lp2.time_step = time_step turbine_lp2.end_time = end_time plant_net.module(turbine_lp2) #***************************************************************************** # Create condenser module params['steam flowrate'] = params['steam flowrate'] * 2 condenser = Condenser() condenser.name = 'Condenser' condenser.save = True condenser.time_step = time_step condenser.end_time = end_time plant_net.module(condenser) #***************************************************************************** params['RCIS-shutdown-time'] = 5 * unit.minute rcis = Cooler(params) rcis.name = 'RCIS' rcis.save = True rcis.time_step = time_step rcis.end_time = end_time plant_net.module(rcis) #***************************************************************************** # Create the BoP network connectivity plant_net.connect([reactor, 'coolant-outflow'], [turbine_hp, 'inflow']) plant_net.connect([turbine_hp, 'outflow-1'], [turbine_lp1, 'inflow']) plant_net.connect([turbine_hp, 'outflow-2'], [turbine_lp2, 'inflow']) plant_net.connect([turbine_lp1, 'outflow-1'], [condenser, 'inflow-1']) plant_net.connect([turbine_lp2, 'outflow-1'], [condenser, 'inflow-2']) plant_net.connect([condenser, 'outflow'], [reactor, 'coolant-inflow']) plant_net.connect([reactor, 'RCIS-outflow'], [rcis, 'coolant-inflow']) plant_net.connect([rcis, 'coolant-outflow'], [reactor, 'RCIS-inflow']) #plant_net.connect([rcis, 'signal-in'], [reactor, 'signal-out']) plant_net.draw(engine='dot', node_shape='folder') #***************************************************************************** # Run network dynamics simulation plant.run() #***************************************************************************** # Plot results if plot_results and (plant.use_multiprocessing or plant.rank == 0): # Reactor plots reactor = plant_net.modules[0] (quant, time_unit ) = reactor.neutron_phase.get_quantity_history('neutron-dens') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('startup-neutron-dens.png', dpi=300) (quant, time_unit ) = reactor.neutron_phase.get_quantity_history('delayed-neutrons-cc') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('startup-delayed-neutrons-cc.png', dpi=300) (quant, time_unit ) = reactor.coolant_outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('startup-coolant-outflow-temp.png', dpi=300) (quant, time_unit) = reactor.reactor_phase.get_quantity_history('fuel-temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('startup-fuel-temp.png', dpi=300) # Turbine high pressure plots turbine_hp = plant_net.modules[1] (quant, time_unit) = turbine_hp.outflow_phase.get_quantity_history('power') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='High Pressure Turbine Power') plt.grid() plt.savefig('startup-turbine-hp-power.png', dpi=300) (quant, time_unit) = turbine_hp.outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='High Pressure Turbine Outflow Temperature') plt.grid() plt.savefig('startup-turbine-hp-outflow-temp.png', dpi=300) # Turbine low pressure graphs turbine_lp1 = plant_net.modules[2] (quant, time_unit) = turbine_lp1.outflow_phase.get_quantity_history('power') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='Lower Pressure Turbine 1 Power') plt.grid() plt.savefig('startup-turbine-lp1-power.png', dpi=300) (quant, time_unit) = turbine_lp1.outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='Lower Pressure Turbine 1 Outflow Temperature') plt.grid() plt.savefig('startup-turbine-lp1-outflow-temp.png', dpi=300) # Condenser graphs condenser = plant_net.modules[3] (quant, time_unit) = condenser.outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('startup-condenser-outflow-temp.png', dpi=300) #setup initial values for simulation turbine1_outflow_temp = turbine_hp.outflow_phase.get_value( 'temp', end_time) turbine1_chi = turbine_hp.outflow_phase.get_value('quality', end_time) turbine1_power = turbine_hp.outflow_phase.get_value('power', end_time) turbine2_outflow_temp = turbine_lp1.outflow_phase.get_value( 'temp', end_time) turbine2_chi = turbine_lp1.outflow_phase.get_value('quality', end_time) turbine2_power = turbine_lp1.outflow_phase.get_value('power', end_time) condenser_runoff_temp = condenser.outflow_phase.get_value('temp', end_time) delayed_neutron_cc = reactor.neutron_phase.get_value( 'delayed-neutrons-cc', end_time) n_dens = reactor.neutron_phase.get_value('neutron-dens', end_time) fuel_temp = reactor.reactor_phase.get_value('fuel-temp', end_time) coolant_temp = reactor.coolant_outflow_phase.get_value('temp', end_time) # Values loaded into params when they are needed (module instantiation) # Properly shutdown simulation plant.close() # Now we run shutdown as a seperate simulation with starting parameters equal to the ending # values of the startup simulation #************************************************************************************************** # Preamble start_time = 0.0 * unit.minute end_time = 60 * unit.minute time_step = 30.0 # seconds show_time = (True, 5 * unit.minute) use_mpi = False # True for MPI; False for Python multiprocessing plot_results = True # True for enabling plotting section below params = get_params() # clear params, just to be safe #***************************************************************************** # Define Cortix system # System top level plant = Cortix(use_mpi=use_mpi, splash=True) # Network plant_net = plant.network = Network() params['start-time'] = start_time params['end-time'] = end_time params['shutdown time'] = 0.0 params['shutdown-mode'] = True #***************************************************************************** # Create reactor module params['delayed-neutron-cc'] = delayed_neutron_cc params['n-dens'] = n_dens params['fuel-temp'] = fuel_temp params['coolant-temp'] = coolant_temp params['operating-mode'] = 'shutdown' reactor = BWR(params) reactor.name = 'BWR' reactor.save = True reactor.time_step = time_step reactor.end_time = end_time reactor.show_time = show_time reactor.RCIS = False # Add reactor module to network plant_net.module(reactor) #***************************************************************************** # Create turbine high pressure module params['turbine_inlet_pressure'] = 2 params['turbine_outlet_pressure'] = 0.5 params['high_pressure_turbine'] = True params['turbine-outflow-temp'] = turbine1_outflow_temp params['turbine-chi'] = turbine1_chi params['turbine-work'] = turbine1_power params['turbine-inflow-temp'] = coolant_temp #params_turbine = reactor.params #params_turbine.inlet_pressure = 2 #params.turbine_outlet_pressure = 0.5 turbine_hp = Turbine(params) turbine_hp.name = 'High Pressure Turbine' turbine_hp.save = True turbine_hp.time_step = time_step turbine_hp.end_time = end_time # Add turbine high pressure module to network plant_net.module(turbine_hp) #***************************************************************************** # Create turbine low pressure 1 module params['turbine_inlet_pressure'] = 0.5 params['turbine_outlet_pressure'] = 0.005 params['high_pressure_turbine'] = False params['steam flowrate'] = params['steam flowrate'] / 2 params['turbine-outflow-temp'] = turbine2_outflow_temp params['turbine-inflow-temp'] = turbine1_outflow_temp params['turbine-chi'] = turbine2_chi params['turbine-work'] = turbine2_power turbine_lp1 = Turbine(params) turbine_lp1.name = 'Low Pressure Turbine 1' turbine_lp1.save = True turbine_lp1.time_step = time_step turbine_lp1.end_time = end_time plant_net.module(turbine_lp1) #***************************************************************************** # Create turbine low pressure 2 module params['turbine_inlet_pressure'] = 0.5 params['turbine_outlet_pressure'] = 0.005 params['high_pressure_turbine'] = False turbine_lp2 = Turbine(params) turbine_lp2.name = 'Low Pressure Turbine 2' turbine_lp2.save = True turbine_lp2.time_step = time_step turbine_lp2.end_time = end_time plant_net.module(turbine_lp2) #***************************************************************************** # Create condenser module params['steam flowrate'] = params['steam flowrate'] * 2 params['condenser-runoff-temp'] = condenser_runoff_temp condenser = Condenser() condenser.name = 'Condenser' condenser.save = True condenser.time_step = time_step condenser.end_time = end_time plant_net.module(condenser) #***************************************************************************** params['RCIS-shutdown-time'] = -1 * unit.minute rcis = Cooler(params) rcis.name = 'RCIS' rcis.save = True rcis.time_step = time_step rcis.end_time = end_time plant_net.module(rcis) #***************************************************************************** # Create the BoP network connectivity plant_net.connect([reactor, 'coolant-outflow'], [turbine_hp, 'inflow']) plant_net.connect([turbine_hp, 'outflow-1'], [turbine_lp1, 'inflow']) plant_net.connect([turbine_hp, 'outflow-2'], [turbine_lp2, 'inflow']) plant_net.connect([turbine_lp1, 'outflow-1'], [condenser, 'inflow-1']) plant_net.connect([turbine_lp2, 'outflow-1'], [condenser, 'inflow-2']) plant_net.connect([condenser, 'outflow'], [reactor, 'coolant-inflow']) plant_net.connect([reactor, 'RCIS-outflow'], [rcis, 'coolant-inflow']) plant_net.connect([rcis, 'coolant-outflow'], [reactor, 'RCIS-inflow']) #plant_net.connect([rcis, 'signal-in'], [reactor, 'signal-out']) plant_net.draw(engine='dot', node_shape='folder') #***************************************************************************** # Run network dynamics simulation plant.run() #***************************************************************************** # Plot results if plot_results and (plant.use_multiprocessing or plant.rank == 0): # Reactor plots reactor = plant_net.modules[0] (quant, time_unit ) = reactor.neutron_phase.get_quantity_history('neutron-dens') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('shutdown-neutron-dens.png', dpi=300) (quant, time_unit ) = reactor.neutron_phase.get_quantity_history('delayed-neutrons-cc') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('shutdown-delayed-neutrons-cc.png', dpi=300) (quant, time_unit ) = reactor.coolant_outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('shutdown-coolant-outflow-temp.png', dpi=300) (quant, time_unit) = reactor.reactor_phase.get_quantity_history('fuel-temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('shutdown-fuel-temp.png', dpi=300) # Turbine high pressure plots turbine_hp = plant_net.modules[1] (quant, time_unit) = turbine_hp.outflow_phase.get_quantity_history('power') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='High Pressure Turbine Power') plt.grid() plt.savefig('shutdown-turbine-hp-power.png', dpi=300) (quant, time_unit) = turbine_hp.outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='High Pressure Turbine Outflow Temperature') plt.grid() plt.savefig('shutdown-turbine-hp-outflow-temp.png', dpi=300) # Turbine low pressure graphs turbine_lp1 = plant_net.modules[2] (quant, time_unit) = turbine_lp1.outflow_phase.get_quantity_history('power') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='Lower Pressure Turbine 1 Power') plt.grid() plt.savefig('shutdown-turbine-lp1-power.png', dpi=300) (quant, time_unit) = turbine_lp1.outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']', title='Lower Pressure Turbine 1 Outflow Temperature') plt.grid() plt.savefig('shutdown-turbine-lp1-outflow-temp.png', dpi=300) # Condenser graphs condenser = plant_net.modules[4] (quant, time_unit) = condenser.outflow_phase.get_quantity_history('temp') quant.plot(x_scaling=1 / unit.minute, x_label='Time [m]', y_label=quant.latex_name + ' [' + quant.unit + ']') plt.grid() plt.savefig('shutdown-condenser-outflow-temp.png', dpi=300) # Shutdown The Simulation plant.close()
#!/usr/bin/env python # encoding: utf-8 from cooler import Cooler if __name__ == '__main__': near_cool = Cooler() near_cool.run()
def _single_clr_edge_and_node_info_from_sites(c: cooler.Cooler, sites: Dict[str, np.ndarray], balance: Optional[bool] = True, join: Optional[bool] = False): ''' Given some cooler and a dictionary of sites (with chromosomes as keys), return the submatrices retrieved from these slices within the Hi-C map. Submatrices are returned in sparse COO format with an edge_idxs dictionary, an edge_attrs dictionary and a node_info dictionary. Optionally users can balance the Hi-C matrix before retrieval of matrix information. SInce multiple chromosomes and slices per chromosome can be supplied, user may optionally join regions into one larger region consisting of the given slices concatenated together. This function does not actually do the joining procedure since the passed slices may not be disjoint. :param cooler: Cooler file object :type edge_index: cooler.Cooler :param slices: Dictionary with chromosomes as keys and lists of sites as values. Multiple sites are allowed per chromosome. :type slices: Dict[str,List[np.ndarray]] :param balance: Whether to perform matrix balancing on the Hi-C matrix before retrieving individual slices. :type balance: Optional[bool] :param join: Boolean determining whether to retrieve Hi-C martrix information corresponding to the interface between slices. This is only recommended if slices are disjoint since the interface isn't well defined if slices aren't disjoint. :type join: Optional[bool] ''' # Iterate through slices, adding in edge indexes and edge attributes edge_idxs = {} edge_attrs = {} sub_graph_nodes = {} chroms = list(sites.keys()) for idx, chrom1 in enumerate(chroms): edge_idxs[chrom1] = {} edge_attrs[chrom1] = {} sub_graph_nodes[chrom1] = {} for chrom2 in chroms[idx:]: if chrom1 != chrom2 and not join: continue mat = c.matrix(balance=balance).fetch(chrom1, chrom2) mat = mat[sites[chrom1], :] mat = mat[:, sites[chrom2]] mat = coo(mat) b1 = c.bins().fetch(chrom1).index.values[sites[chrom1]] b2 = c.bins().fetch(chrom2).index.values[sites[chrom2]] edge_index = np.concatenate( [b1[mat.row][None, :], b2[mat.col][None, :]], axis=0, ) edge_data = mat.data[:, None] if chrom1 != chrom2: edge_index = np.append(edge_index, edge_index[::-1, :], axis=1) edge_data = np.append(edge_data, edge_data, axis=0) edge_data[np.isnan(edge_data)] = 0 ind = np.lexsort((edge_index[0, :], edge_index[1, :])) edge_index = edge_index[:, ind] edge_data = edge_data[ind, :] edge_idxs[chrom1][chrom2] = [edge_index] edge_attrs[chrom1][chrom2] = [edge_data] if chrom1 == chrom2: sub_graph_nodes[chrom1][chrom2] = [b1] else: sub_graph_nodes[chrom1][chrom2] = [np.append(b1, b2)] return edge_idxs, edge_attrs, sub_graph_nodes
def _single_clr_edge_and_node_info_from_slices(c: cooler.Cooler, slices: Dict[str, List[np.ndarray]], balance: Optional[bool] = True, join: Optional[bool] = False): ''' Given some cooler and a dictionary of slices (with chromosomes as keys), return the submatrices retrieved from these slices within the Hi-C map. Submatrices are returned in sparse COO format with an edge_idxs dictionary, an edge_attrs dictionary and a node_info dictionary. Optionally users can balance the Hi-C matrix before retrieval of matrix information. SInce multiple chromosomes and slices per chromosome can be supplied, user may optionally join regions into one larger region consisting of the given slices concatenated together. This function does not actually do the joining procedure since the passed slices may not be disjoint. :param cooler: Cooler file object :type edge_index: cooler.Cooler :param slices: Dictionary with chromosomes as keys and lists of slices as values. Multiple slices are allowed per chromosome. :type slices: Dict[str,List[np.ndarray]] :param balance: Whether to perform matrix balancing on the Hi-C matrix before retrieving individual slices. :type balance: Optional[bool] :param join: Boolean determining whether to retrieve Hi-C martrix information corresponding to the interface between slices. This is only recommended if slices are disjoint since the interface isn't well defined if slices aren't disjoint. :type join: Optional[bool] ''' # Iterate through slices, adding in edge indexes and edge attributes edge_idxs = {} edge_attrs = {} sub_graph_nodes = {} chroms = list(slices.keys()) for cidx1, chrom1 in enumerate(chroms): edge_idxs[chrom1] = {} edge_attrs[chrom1] = {} sub_graph_nodes[chrom1] = {} for chrom2 in chroms[cidx1:]: if chrom1 != chrom2 and not join: continue edge_idxs[chrom1][chrom2] = [] edge_attrs[chrom1][chrom2] = [] sub_graph_nodes[chrom1][chrom2] = [] for idx, s1 in enumerate(slices[chrom1]): if chrom1 == chrom2: #don't want to repeat region pairings slist = slices[chrom1][idx:] else: slist = slices[chrom2] for jdx, s2 in enumerate(slist): if s1[0] == s2[0] and jdx != 0: continue if s1[0] != s2[0] and not join: continue mat = c.matrix(balance=balance, sparse=True)[s1[0]:s1[-1] + 1, s2[0]:s2[-1] + 1] edge_index = np.concatenate( [s1[mat.row][None, :], s2[mat.col][None, :]], axis=0, ) edge_data = mat.data[:, None] if np.sum(s1 - s2) != 0: edge_index, edge_data = make_edges_bidirectional( edge_index, edge_data) ind = np.lexsort((edge_index[0, :], edge_index[1, :])) edge_index = edge_index[:, ind] edge_data = edge_data[ind, :] edge_idxs[chrom1][chrom2].append(edge_index) edge_attrs[chrom1][chrom2].append(edge_data) if s1[0] == s2[0]: sub_graph_nodes[chrom1][chrom2].append(s1) else: sub_graph_nodes[chrom1][chrom2].append( np.append(s1, s2)) return edge_idxs, edge_attrs, sub_graph_nodes
def export_to_cooler( contact_table, output_prefix, cooler_resolution, fragment_table, chromsizes, query, query_columns=None, by_haplotype=False, ): results = [] if query_columns: columns = query_columns[:] else: columns = [] columns.extend(["align1_fragment_id", "align2_fragment_id"]) if by_haplotype: columns.extend(["align1_haplotype", "align2_haplotype"]) contact_df = dd.read_parquet(contact_table, engine=PQ_ENGINE, version=PQ_VERSION, columns=columns, index=False) if query: contact_df = contact_df.query(query) chrom_dict = pd.read_csv(chromsizes, sep="\t", header=None, names=["chrom", "size"], index_col=["chrom"], squeeze=True) # create even-widht bins using cooler bins_df = binnify(chrom_dict, cooler_resolution) bins_df.index.name = "bin_id" # convert to ranges for overlap bins = pr.PyRanges(bins_df.reset_index().rename(columns={ "start": "Start", "end": "End", "chrom": "Chromosome" })) fragment_df = dd.read_parquet(fragment_table, engine=PQ_ENGINE, version=PQ_VERSION).compute() midpoint_df = pr.PyRanges( fragment_df.reset_index()[[ "chrom", "start", "end", "fragment_id" ]].assign(start=lambda x: ((x.start + x.end) * 0.5).round(0).astype( int)).eval("end = start + 1").rename(columns={ "chrom": "Chromosome", "start": "Start", "end": "End" })) # use a pyranges joing to assign fragments to bins fragment_to_bin = midpoint_df.join( bins, how="left").df[["fragment_id", "bin_id"]] fragment_to_bin = fragment_to_bin.set_index( "fragment_id").sort_index() # .astype(np.uint32) nulls = fragment_to_bin["bin_id"] == -1 if nulls.any(): logger.warning( "Some fragments did not overlap bins, removing from analysis:\n{}". format(fragment_to_bin[nulls].join(fragment_df))) fragment_to_bin = fragment_to_bin[~nulls] # use a join to assign each end of a contact to a bin binned_contacts = (contact_df.merge( fragment_to_bin, how="inner", right_index=True, left_on="align1_fragment_id").merge( fragment_to_bin, how="inner", right_index=True, left_on="align2_fragment_id", suffixes=[None, "_2"]).rename(columns={ "bin_id": "bin1_id", "bin_id_2": "bin2_id" })) if not by_haplotype: cooler_path = output_prefix + ".cool" # group size == number of contacts per bin_pair pixels = binned_contacts.groupby( ["bin1_id", "bin2_id"]).size().rename("count").astype(np.int32).reset_index() create_cooler(cooler_path, bins_df, pixels, ordered=True, symmetric_upper=True, ensure_sorted=True) c = Cooler(cooler_path) logger.info(f"Created cooler: {c.info}") results.append(cooler_path) else: tmp_parquet = output_prefix + ".tmp.pq" pixels = ( # create a key to groupy by haplotype pair, order of haplotypes doesn't matter binned_contacts.assign( hap_key=lambda x: x[["align1_haplotype", "align2_haplotype"] ].apply(lambda y: "{}_{}".format(*sorted( y)).replace("-1", "nohap"), axis=1, meta="object") ).groupby(["hap_key", "bin1_id", "bin2_id"]).size().rename("count").astype( np.int32 ).reset_index().astype({"hap_key": "category"})) # save to a temporary parquet file, this might not be necessary # but want to avoid the whole contact matrix hitting memory pixels.to_parquet( tmp_parquet, write_metadata_file=True, partition_on=["hap_key"], write_index=False, engine=PQ_ENGINE, version=PQ_VERSION, ) pixels = dd.read_parquet(tmp_parquet, engine=PQ_ENGINE, version=PQ_VERSION, columns=["hap_key"], index=False) hap_keys = pixels["hap_key"].unique().compute() # create a cooler for each haplotype pair for hap_key in hap_keys: cooler_path = f"{output_prefix}.{hap_key}.cool" pixels = dd.read_parquet( tmp_parquet, filters=[("hap_key", "==", hap_key)], index=False, engine=PQ_ENGINE, version=PQ_VERSION, columns=["bin1_id", "bin2_id", "count"], ) create_cooler(cooler_path, bins_df, pixels, ordered=True, symmetric_upper=True, ensure_sorted=True) c = Cooler(cooler_path) logger.info(f"Created cooler: {c.info}") results.append(cooler_path) shutil.rmtree(tmp_parquet) return results
from cooler import Cooler # input_uri = "unzoomifiable_5kb.cool" input_uri = "missing_bin_cooler_40kb.cool" clr = Cooler(input_uri) print(clr.pixels(join=True)[17872226]) table = clr.pixels(join=True)[17800000:] print(table[table['chrom2'].isnull()]) # chrom1 start1 end1 chrom2 start2 end2 count # 17872226 chr88 2640000 2680000 NaN NaN NaN 1 ######################## # TODO ######################### # Now i'm on the same page with Nezar # bad pixel is found ... # FIND OUT - HOW that happened ?!?!?!?!?! ... # # processes that might be involved: # # pairix ${pairs_lib} # # cooler cload pairix \ # --nproc ${task.cpus} \ # --assembly ${params.input.genome.assembly} \
class T_control: def __init__(self, fan_pin, peltier_pin, step_pin, thermometer_pin): self.cooler = Cooler(Pin(fan_pin, Pin.OUT), Pin(peltier_pin, Pin.OUT)) self.cooler.fanOn() self.cooler.coolerHigh() self.pid = Pid(17, 1, 0.02, 0, 0.995) #(temperature, P, I, D, memoryFactor) self.thermometer = Thermometer(thermometer_pin) self.pump = PWMPump(Pin(step_pin, Pin.OUT)) for i in range(10): self.pump.pwm.freq(i * 1500) utime.sleep(0.1) self.pump.pwm.freq(15000) #self.pump = VariablePump(Pin(step_pin,Pin.OUT)) #self.pump.setSpeed(0.90) #self.pump.startMotor() def call(self): self.t = utime.time() read = -self.pid.update(self.thermometer.read()) if read > 2: self.cooler.coolerHigh() self.pump.pwm.freq(10000) elif read >= 1: self.cooler.coolerLow() self.pump.pwm.freq(10000) elif read >= -1: self.cooler.coolerLow() self.pump.pwm.freq(6666 + int(3333 * read)) else: self.cooler.coolerLow() self.pump.pwm.freq(200) def __loop(self): while self.isRunning: self.call() utime.sleep(10) def startPID(self): self.isRunning = True _thread.start_new_thread(self.__loop, ()) def stopPID(self): self.isRunning = False
def get_pairing_score_obs_exp( clr: cooler.Cooler, expected: pd.DataFrame, windowsize: int = 4 * 10**4, func: Callable = np.mean, regions: pd.DataFrame = pd.DataFrame(), norm: bool = True, arms: pd.DataFrame = pd.DataFrame(), ) -> pd.DataFrame: """Takes a cooler file (clr), an expected dataframe (expected; maybe generated by getExpected), a windowsize (windowsize), a summary function (func) and a set of genomic regions to calculate the pairing score as follows: A square with side-length windowsize is created for each of the entries in the supplied genomics regions and the summary function applied to the Hi-C pixels (obs/exp values) at the location in the supplied cooler file. The results are returned as a dataframe. If no regions are supplied, regions are constructed for each bin in the cooler file to construct a genome-wide pairing score.""" # Check whether genomic regions were supplied if len(regions) == 0: # If no regions are supplied, pregenerate all bins; drop bins with nan weights regions = clr.bins()[:].dropna() # find midpoint of each bin to assign windows to each midpoint regions.loc[:, "mid"] = (regions["start"] + regions["end"]) // 2 # check that norm is only set if genomewide pairingScore is calculated elif norm: raise ValueError( "Norm flag can only be set with genomeWide pairingScore!") # drop nan rows from regions regions = regions.dropna() # fix indices regions.index = range(len(regions)) regions.loc[:, "binID"] = range(len(regions)) # Chromosomal arms are needed so each process only extracts a subset from the file if len(arms) == 0: arms = get_arms_hg19() # extract all windows windows = assign_regions(windowsize, clr.binsize, regions["chrom"], regions["mid"], arms) # add binID to later merge piles windows.loc[:, "binID"] = regions["binID"] windows = windows.dropna() # generate pileup pile = do_pileup_obs_exp(clr, expected, windows, collapse=False) # convert to dataframe pile_frame = pile_to_frame(pile) # replace inf with nan pile_frame = pile_frame.replace([np.inf, -np.inf], np.nan) # apply function to each row (row = individual window) summarized = pile_frame.apply(func, axis=1) # subset regions with regions that were assigned windows output = pd.merge(regions, windows, on="binID", suffixes=("", "_w")).dropna() # add results output.loc[:, "PairingScore"] = summarized # normalize by median if norm: output.loc[:, "PairingScore"] = output["PairingScore"] - np.median( output.dropna()["PairingScore"]) return output[["chrom", "start", "end", "PairingScore"]]
for i in range(0, len(spans), batchsize): try: lock.acquire() print("right before collapse ...{} {}".format( i, spans[i:i + batchsize])) results = self._map(self.aggregate, spans[i:i + batchsize]) finally: lock.release() for df in results: # yield {k: v.values for k, v in six.iteritems(df)} yield df input_uri = "" c = Cooler(input_uri) new_bins = binnify(c.chromsizes, 2 * c.binsize) iterator = CoolerAggregator(input_uri, new_bins, 1000000, batchsize=1, map=map) # # last message before it fails ... # # INFO:cooler:17868809 17872380 # for ii in iterator: # print(ii) # from cooler.api import Cooler lo, hi = 17869999, 17872300 # lo, hi = 17868809, 17872380 clr = Cooler(input_uri)
from flask import Flask, redirect, url_for, render_template, request from cooler import Cooler # globals app = Flask(__name__) cooler = Cooler(21, 20, 16) @app.route('/', methods=('GET', 'POST')) def index(): if request.method == 'GET': return render_template('index.html', **cooler.__dict__) if request.method == 'POST': cooler.control_mode = request.form['mode'] if request.form['mode'] == 'automatic': # TODO: check rationality if thresholds try: cooler.min_threshold = float(request.form['min_threshold']) except Exception as E: pass try: cooler.max_threshold = float(request.form['max_threshold']) except Exception as E: pass cooler.set_speed(False) return redirect('/update') if request.form['mode'] == 'manual':