def run(): width = 25 height = 6 layers = chunked(data, width * height) image = [ first(pixel_layers, key=lambda x: x in ['0', '1']) for pixel_layers in zip(*layers) ] for r in chunked(image, width): print(''.join(r).replace('0', ' ').replace('1', '#'))
def campaign_visits_to_geojson(rpc, campaign_id, geojson_file): """ Export the geo location information for all the visits of a campaign into the `GeoJSON <http://geojson.org/>`_ format. :param rpc: The connected RPC instance to load the information with. :type rpc: :py:class:`.KingPhisherRPCClient` :param campaign_id: The ID of the campaign to load the information for. :param str geojson_file: The destination file for the GeoJSON data. """ ips_for_georesolution = {} ip_counter = collections.Counter() for visit in rpc.remote_table('visits', query_filter={'campaign_id': campaign_id}): ip_counter.update((visit.visitor_ip,)) visitor_ip = ipaddress.ip_address(visit.visitor_ip) if not isinstance(visitor_ip, ipaddress.IPv4Address): continue if visitor_ip.is_loopback or visitor_ip.is_private: continue if not visitor_ip in ips_for_georesolution: ips_for_georesolution[visitor_ip] = visit.first_visit elif ips_for_georesolution[visitor_ip] > visit.first_visit: ips_for_georesolution[visitor_ip] = visit.first_visit ips_for_georesolution = [ip for (ip, _) in sorted(ips_for_georesolution.items(), key=lambda x: x[1])] locations = {} for ip_addresses in iterutils.chunked(ips_for_georesolution, 50): locations.update(rpc.geoip_lookup_multi(ip_addresses)) points = [] for ip, location in locations.items(): if not (location.coordinates and location.coordinates[0] and location.coordinates[1]): continue points.append(geojson.Feature(geometry=location, properties={'count': ip_counter[ip], 'ip-address': ip})) feature_collection = geojson.FeatureCollection(points) with open(geojson_file, 'w') as file_h: json_ex.dump(feature_collection, file_h)
def d2scan(*args, time=None, md=None): """ Scan over one multi-motor trajectory relative to current positions. Parameters ---------- *args patterned like (``motor1, start1, stop1,`` ..., ``motorN, startN, stopN, intervals``) where 'intervals' in the number of strides (number of points - 1) Motors can be any 'setable' object (motor, temp controller, etc.) time : float, optional applied to any detectors that have a `count_time` setting md : dict, optional metadata """ if len(args) % 3 != 1: raise ValueError("wrong number of positional arguments") motors = [] for motor, start, stop, in chunked(args[:-1], 3): motors.append(motor) intervals = list(args)[-1] num = 1 + intervals inner = inner_spec_decorator('d2scan', time, motors=motors)( relative_inner_product_scan) return (yield from inner(gs.DETS, num, *(args[:-1]), md=md))
def chunk_outer_product_args(args): '''Scan over a mesh; each motor is on an independent trajectory. Parameters ---------- args patterned like (``motor1, start1, stop1, num1,``` ``motor2, start2, stop2, num2, snake2,`` ``motor3, start3, stop3, num3, snake3,`` ... ``motorN, startN, stopN, numN, snakeN``) The first motor is the "slowest", the outer loop. For all motors except the first motor, there is a "snake" argument: a boolean indicating whether to following snake-like, winding trajectory or a simple left-to-right trajectory. See Also -------- `bluesky.plan_patterns.outer_product` Yields ------ (motor, start, stop, num, snake) ''' args = list(args) # The first (slowest) axis is never "snaked." Insert False to # make it easy to iterate over the chunks or args.. args.insert(4, False) if len(args) % 5 != 0: raise ValueError("wrong number of positional arguments") yield from chunked(args, 5)
def sorted_bounds(disjoint=False, max_value=10000, max_len=100, remove_duplicates=False): if disjoint: # Since we accumulate later: max_value /= max_len s = st.lists(st.integers(min_value=0, max_value=max_value), min_size=0, max_size=20) if disjoint: s = s.map(accumulate).map(list) # Select only cases with even-length lists s = s.filter(lambda x: len(x) % 2 == 0) # Convert to list of 2-tuples s = s.map( lambda x: [tuple(q) for q in iterutils.chunked(sorted(x), size=2)]) # Remove cases with zero-length intervals s = s.filter(lambda x: all([a[0] != a[1] for a in x])) if remove_duplicates: # (this will always succeed if disjoint=True) s = s.filter(lambda x: x == list(set(x))) # Sort intervals and result return s.map(sorted)
def _get_crossval_split(stimuli, fixations, split_count, included_splits, random=True, stratified_attributes=None): if stratified_attributes is not None: return _get_stratified_crossval_split( stimuli, fixations, split_count, included_splits, random=random, stratified_attributes=stratified_attributes) inds = list(range(len(stimuli))) if random: print("Using random shuffles for crossvalidation") rst = np.random.RandomState(seed=42) rst.shuffle(inds) inds = list(inds) size = int(np.ceil(len(inds) / split_count)) chunks = chunked(inds, size=size) inds = [] for split_nr in included_splits: inds.extend(chunks[split_nr]) stimuli, fixations = create_subset(stimuli, fixations, inds) return stimuli, fixations
def current_user_saved_tracks_contains(track_ids: Iterable[str]) -> List[bool]: spotify = client.spotify_client() saved = [] for track_ids in iterutils.chunked(track_ids, 50): saved.extend(spotify.current_user_saved_tracks_contains(track_ids)) return saved
def run(argv: List[str]) -> None: parser = argparse.ArgumentParser() parser.add_argument('--engine', choices=['slurm', 'sge'], required=True) parser.add_argument('--step', choices=['parse', 'augment-phenos', 'manhattan', 'qq'], required=True) args = parser.parse_args(argv) def should_process(pheno: Dict[str, Any]) -> bool: if args.step == "parse": from . import parse_input_files get_input_filepaths = parse_input_files.get_input_filepaths get_output_filepaths = parse_input_files.get_output_filepaths elif args.step == "augment-phenos": from . import augment_phenos get_input_filepaths = augment_phenos.get_input_filepaths get_output_filepaths = augment_phenos.get_output_filepaths elif args.step == "manhattan": from . import manhattan get_input_filepaths = manhattan.get_input_filepaths get_output_filepaths = manhattan.get_output_filepaths elif args.step == "qq": from . import qq get_input_filepaths = qq.get_input_filepaths get_output_filepaths = qq.get_output_filepaths else: raise Exception("No implementation for step {}".format(args.step)) return PerPhenoParallelizer().should_process_pheno( pheno, get_input_filepaths=get_input_filepaths, get_output_filepaths=get_output_filepaths, ) idxs = [ i for i, pheno in enumerate(get_phenolist()) if should_process(pheno) ] if not idxs: print('All phenos are up-to-date!') exit(0) jobs = chunked(idxs, N_AT_A_TIME) batch_filepath = get_dated_tmp_path('{}-{}'.format(args.engine, args.step)) + '.sh' tmp_path = get_tmp_path(args.step) mkdir_p(tmp_path) with open(batch_filepath, 'w') as f: f.write(header_template[args.engine].format(n_jobs=len(jobs) - 1, tmp_path=tmp_path)) f.write('\n\njobs=(\n') for job in jobs: f.write(','.join(map(str, job)) + '\n') f.write(')\n\n') f.write('export PHEWEB_DATADIR={!r}\n'.format(conf.get_data_dir())) f.write(sys.argv[0] + ' conf num_procs=1 ' + args.step + ' --phenos=${jobs[$' + array_id_variable[args.engine] + ']}\n') print('Run:\n{} {}\n'.format(submit_command[args.engine], batch_filepath)) print('Monitor with `{} <jobid>`\n'.format(monitor_command[args.engine])) print('output will be in {}'.format(tmp_path))
def __init__(self, detectors, num, *args): if len(args) % 3 != 0: raise ValueError("wrong number of positional arguments") self.detectors = detectors self.num = num self._args = args self.motors = [] for motor, start, stop, in chunked(self.args, 3): self.motors.append(motor)
def cycler(self): # Build a Cycler for ScanND. cyclers = [] for motor, start, stop in chunked(self.args, 3): init_pos = self._init_pos[motor] steps = init_pos + np.linspace(start, stop, num=self.num, endpoint=True) c = cycler(motor, steps) cyclers.append(c) return functools.reduce(operator.add, cyclers)
def input_data(data,w2v_model): list_words=[] ti=[] for sublist in data: for item in sublist: list_words.append(item) for words in list_words: ti.append(np.array(iterutils.chunked(list(w2v_model.wv[words]), 1))) return ti
def polite_chunk(todays_users, size): chunked_users = chunked(todays_users, size) if len(chunked_users) <= 1 or chunked_users[-1] == size: return chunked_users chunked_users, last_chunk = chunked_users[:-1], chunked_users[-1] for i, user in enumerate(last_chunk): chunked_users[i].append(user) return chunked_users
def aucell4r(df_rnk: pd.DataFrame, signatures: Sequence[Type[GeneSignature]], auc_threshold: float = 0.05, noweights: bool = False, normalize: bool = False, num_workers: int = cpu_count()) -> pd.DataFrame: """ Calculate enrichment of gene signatures for single cells. :param df_rnk: The rank matrix (n_cells x n_genes). :param signatures: The gene signatures or regulons. :param auc_threshold: The fraction of the ranked genome to take into account for the calculation of the Area Under the recovery Curve. :param noweights: Should the weights of the genes part of a signature be used in calculation of enrichment? :param normalize: Normalize the AUC values to a maximum of 1.0 per regulon. :param num_workers: The number of cores to use. :return: A dataframe with the AUCs (n_cells x n_modules). """ if num_workers == 1: # Show progress bar ... aucs = pd.concat([enrichment4cells(df_rnk, module.noweights() if noweights else module, auc_threshold=auc_threshold) for module in tqdm(signatures)]).unstack("Regulon") aucs.columns = aucs.columns.droplevel(0) else: # Decompose the rankings dataframe: the index and columns are shared with the child processes via pickling. genes = df_rnk.columns.values cells = df_rnk.index.values # The actual rankings are shared directly. This is possible because during a fork from a parent process the child # process inherits the memory of the parent process. A RawArray is used instead of a synchronize Array because # these rankings are read-only. shared_ro_memory_array = RawArray(DTYPE_C, mul(*df_rnk.shape)) array = np.frombuffer(shared_ro_memory_array, dtype=DTYPE) # Copy the contents of df_rank into this shared memory block using row-major ordering. array[:] = df_rnk.values.flatten(order='C') # The resulting AUCs are returned via a synchronize array. auc_mtx = Array('d', len(cells) * len(signatures)) # Double precision floats. # Convert the modules to modules with uniform weights if necessary. if noweights: signatures = list(map(lambda m: m.noweights(), signatures)) # Do the analysis in separate child processes. chunk_size = ceil(float(len(signatures)) / num_workers) processes = [Process(target=_enrichment, args=(shared_ro_memory_array, chunk, genes, cells, auc_threshold, auc_mtx, (chunk_size*len(cells))*idx)) for idx, chunk in enumerate(chunked(signatures, chunk_size))] for p in processes: p.start() for p in processes: p.join() # Reconstitute the results array. Using C or row-major ordering. aucs = pd.DataFrame(data=np.ctypeslib.as_array(auc_mtx.get_obj()).reshape(len(signatures), len(cells)), columns=pd.Index(data=cells, name='Cell'), index=pd.Index(data=list(map(attrgetter("name"), signatures)), name='Regulon')).T return aucs/aucs.max(axis=0) if normalize else aucs
def loader_thread_routine(self, store): self._ips_for_georesolution = {} super(CampaignViewVisitsTab, self).loader_thread_routine(store) ips_for_geores = [ip for (ip, _) in sorted(self._ips_for_georesolution.items(), key=lambda x: x[1])] locations = {} for ip_addresses in iterutils.chunked(ips_for_geores, 50): locations.update(self.rpc.geoip_lookup_multi(ip_addresses)) for row in store: if row[2] in locations: row[5] = str(locations[row[2]])
def _pre_scan(self): self._offsets = {} for motor, start, stop, num, snake in chunked(self.args, 5): ret = yield Msg('read', motor) if len(ret.keys()) > 1: raise NotImplementedError("Can't DScan this motor") key, = ret.keys() current_value = ret[key]['value'] self._offsets[motor] = current_value yield from super()._pre_scan()
def cycler(self): # Build a Cycler for ScanND. cyclers = [] for motor, start, stop, in chunked(self.args, 3): init_pos = self._init_pos[motor] steps = init_pos + np.linspace(start, stop, num=self.num, endpoint=True) c = cycler(motor, steps) cyclers.append(c) return functools.reduce(operator.add, cyclers)
def download(df_path, out_dir): """Download HTRC features vols. """ df = pd.read_json(df_path) for htids in tqdm(chunked(list(df.htid), 100)): try: download_file(htids, outdir=out_dir) except Exception as e: print(e)
def cycler(self): # Build a Cycler for ScanND. cyclers = [] snake_booleans = [] for motor, start, stop, num, snake in chunked(self.args, 5): init_pos = self._init_pos[motor] steps = init_pos + np.linspace(start, stop, num=num, endpoint=True) c = cycler(motor, steps) cyclers.append(c) snake_booleans.append(snake) return snake_cyclers(cyclers, snake_booleans)
def get_briefing_links(idx=0): resp_bytes = get_url(BASE_IDX_URL + str(idx)) resp_tree = soupparser.fromstring(resp_bytes) items = resp_tree.cssselect('div.views-row .field-content') date_link_elems = chunked(items, 2) ret = [(date.text_content(), link.text_content(), qualify_url(link.find('a').get('href'))) for date, link in date_link_elems] return ret
def main(): input_file, output_file = get_paths_from_command_line() line = {} with open(input_file, "r") as f: lines = iterutils.chunked(f.readlines(), 46) pool = Pool() lines = list(pool.map(transform_line, lines)) pool.close() with open(output_file, "w") as f: lines = {idx: line for idx, line in enumerate(lines)} dump(lines, f, indent=4)
def _pre_scan(self): "Get current position for each motor." self._init_pos = {} for motor, start, stop, num, snake in chunked(self.args, 5): ret = yield Msg('read', motor) if len(ret.keys()) > 1: raise NotImplementedError("Can't DScan a multiaxis motor") key, = ret.keys() current_value = ret[key]['value'] self._init_pos[motor] = current_value yield from super()._pre_scan()
def _pre_scan(self): # Build a Cycler for ScanND. cyclers = [] snake_booleans = [] for motor, start, stop, num, snake in chunked(self.args, 5): offset = self._offsets[motor] steps = offset + np.linspace(start, stop, num=num, endpoint=True) c = cycler(motor, steps) cyclers.append(c) snake_booleans.append(snake) self.cycler = snake_cyclers(cyclers, snake_booleans) yield from super()._pre_scan()
def __init__(self, detectors, *args): args = list(args) # The first (slowest) axis is never "snaked." Insert False to # make it easy to iterate over the chunks or args.. args.insert(4, False) if len(args) % 5 != 0: raise ValueError("wrong number of positional arguments") self.detectors = detectors self._args = args self.motors = [] for motor, start, stop, num, snake in chunked(self.args, 5): self.motors.append(motor)
def absolute_mesh(*args, time=None, md=None): if (len(args) % 4) == 1: if time is not None: raise ValueError('wrong number of positional arguments') args, time = args[:-1], args[-1] total_points = 1 for motor, start, stop, num in chunked(args, 4): total_points *= num yield from _pre_scan(total_points=total_points, count_time=time) yield from spec_api.mesh(*args, time=time, md=md)
def _resolve_geolocations(self, all_ips): geo_locations = {} public_ips = [] for visitor_ip in all_ips: ip = ipaddress.ip_address(visitor_ip) if ip.is_private or ip.is_loopback: continue public_ips.append(visitor_ip) public_ips.sort() for ip_chunk in iterutils.chunked(public_ips, 100): geo_locations.update(self.rpc.geoip_lookup_multi(ip_chunk)) return geo_locations
def mesh(*args, time=None, md=None): """ Scan over a mesh; each motor is on an independent trajectory. Parameters ---------- *args patterned like (``motor1, start1, stop1, num1,``` ``motor2, start2, stop2, num2,,`` ``motor3, start3, stop3, num3,,`` ... ``motorN, startN, stopN, numN,``) The first motor is the "slowest", the outer loop. md : dict, optional metadata """ if len(args) % 4 != 0: raise ValueError("wrong number of positional arguments") motors = [] shape = [] extents = [] for motor, start, stop, num, in chunked(args, 4): motors.append(motor) shape.append(num) extents.append([start, stop]) # outer_product_scan expects a 'snake' param for all but fist motor chunked_args = iter(chunked(args, 4)) new_args = list(next(chunked_args)) for chunk in chunked_args: new_args.extend(list(chunk) + [False]) # shape goes in (rr, cc) # extents go in (x, y) inner = inner_spec_decorator('mesh', time, motors=motors, shape=shape, extent=list(chain(*extents[::-1])))( outer_product_scan) return (yield from inner(gs.DETS, *new_args, md=md))
def _post_scan(self): # Return the motor to its original position. yield from super()._post_scan() try: init_pos = self.init_pos delattr(self, '_init_pos') except AttributeError: raise RuntimeError("Trying to run _post_scan code for a DScan " "without running the _pre_scan code to get " "the baseline position.") for motor, start, stop, num, snake in chunked(self.args, 5): yield Msg('set', motor, init_pos[motor], block_group='A') yield Msg('wait', None, 'A')
def __init__(self, detectors, num, *args): if len(args) % 3 != 0: raise ValueError("wrong number of positional arguments") self.detectors = detectors self.num = num self._args = args self._motors = [] extents = [] for motor, start, stop in chunked(self.args, 3): self._motors.append(motor) extents.append([start, stop]) self.extents = tuple(extents) self.setup_attrs()
def run(): width = 25 height = 6 layers = chunked(data, width*height) layersinfo = [(l.count('0'), l.count('1')*l.count('2'))for l in layers] r = None for i in layersinfo: if not r or r[0] > i[0]: r = i print(r)
def _pre_scan(self): # bluesky increments the scan id by one in open_run, # so set it appropriately gs.RE.md['scan_id'] = get_next_scan_id() - 1 total_points = 1 for motor, start, stop, num, snake in chunked(self.args, 5): total_points *= num if hasattr(self, '_pre_scan_calculate'): yield from self._pre_scan_calculate() yield from scan_setup(self.detectors, total_points=total_points) yield from super()._pre_scan()
def switch(self): if self.contract in SWITCH_SET: talon_set = iterutils.chunked( self.talon, SWITCH_SET[self.contract] ) # razdelimo karte na skupine, ki jih declarer lahko zamenja self.cards_taken, self.cards_dropped = self.players[ self.declarer].switch(talon_set, self.hands[self.declarer]) self.hands[self.declarer] = [ e for e in (self.hands[self.declarer] + self.cards_taken) if not e in self.cards_dropped ] self.hands[self.declarer].sort(key=CARDS.index) self.talon = [e for e in self.talon if not e in self.cards_taken] return
def _pre_scan(self): # Build a Cycler for ScanND. num = self.num self.cycler = None for motor, start, stop, in chunked(self.args, 3): offset = self._offsets[motor] steps = offset + np.linspace(start, stop, num=num, endpoint=True) c = cycler(motor, steps) # Special case first pass because their is no # mutliplicative identity for cyclers. if self.cycler is None: self.cycler = c else: self.cycler += c yield from super()._pre_scan()
def _post_scan(self): yield from super()._post_scan() try: init_pos = self.init_pos delattr(self, "_init_pos") except AttributeError: raise RuntimeError( "Trying to run _post_scan code for a DScan " "without running the _pre_scan code to get " "the baseline position." ) # Return the motors to their original positions. for motor, start, stop in chunked(self.args, 3): yield Msg("set", motor, init_pos[motor], block_group="A") yield Msg("wait", None, "A")
def get_albums(album_ids: Iterable[str]) -> Dict[str, objects.Album]: spotify = client.spotify_client() albums = [] for album_ids in iterutils.chunked(album_ids, 20): albums.extend(spotify.albums(album_ids)['albums']) tracks = get_tracks(albums) albums_by_id = {} for album in albums: album = schemas.Album().load(album) album.tracks = tracks[album.id] albums_by_id[album.id] = album return albums_by_id
def __init__(self, data_source, batch_size=1, ratio_used=1.0, shuffle=True): self.ratio_used = ratio_used self.shuffle = shuffle shapes = data_source.get_shapes() unique_shapes = sorted(set(shapes)) shape_indices = [[] for shape in unique_shapes] for k, shape in enumerate(shapes): shape_indices[unique_shapes.index(shape)].append(k) if self.shuffle: for indices in shape_indices: random.shuffle(indices) self.batches = sum([chunked(indices, size=batch_size) for indices in shape_indices], [])
def __call__(self, *args, time=None, subs=None, **kwargs): args = list(args) if len(args) % 4 == 1: if time is None: time = args.pop(-1) else: raise ValueError("wrong number of positional arguments") original_times = _set_acquire_time(time) for i, _ in enumerate(chunked(list(args), 4)): # intervals -> intervals + 1 args[4*i + 3] += 1 # never snake; SPEC doesn't know how if i != 0: args.insert(4*(i + 1), False) result = super().__call__(*args, subs=subs, **kwargs) _unset_acquire_time(original_times) return result
def absolute_mesh(dets, *args, time=None, md=None): if (len(args) % 4) == 1: if time is not None: raise ValueError('wrong number of positional arguments') args, time = args[:-1], args[-1] total_points = 1 new_args = [] add_snake = False for motor, start, stop, num in chunked(args, 4): total_points *= num new_args += [motor, start, stop, num] if add_snake: new_args += [False] add_snake = True yield from _pre_scan(dets, total_points=total_points, count_time=time) return (yield from plans.outer_product_scan(dets, *new_args, md=md, per_step=one_nd_step))
def scan_steps(dets, *args, time=None, per_step=None, md=None): ''' Absolute scan over an arbitrary N-dimensional trajectory. Parameters ---------- ``*args`` : {Positioner, list/sequence} Patterned like (``motor1, motor1_positions, ..., motorN, motorN_positions``) Where motorN_positions is a list/tuple/sequence of absolute positions for motorN to go to. time : float, optional applied to any detectors that have a `count_time` setting per_step : callable, optional hook for cutomizing action of inner loop (messages per step) See docstring of bluesky.plans.one_nd_step (the default) for details. md : dict, optional metadata ''' if len(args) % 2 == 1: if time is not None: raise ValueError('Wrong number of positional arguments') args, time = args[:-1], args[-1] cyclers = [cycler(motor, steps) for motor, steps in chunked(args, 2)] cyc = sum(cyclers[1:], cyclers[0]) total_points = len(cyc) if md is None: md = {} from collections import ChainMap md = ChainMap(md, {'plan_name': 'scan_steps'}) plan = plans.scan_nd(dets, cyc, md=md, per_step=per_step) plan = plans.configure_count_time_wrapper(plan, time) yield from _pre_scan(dets, total_points=total_points, count_time=time) return (yield from plans.reset_positions_wrapper(plan))
def __init__(self, detectors, *args): args = list(args) # The first (slowest) axis is never "snaked." Insert False to # make it easy to iterate over the chunks or args.. args.insert(4, False) if len(args) % 5 != 0: raise ValueError("wrong number of positional arguments") self.detectors = detectors self._motors = [] self._args = args shape = [] extent = [] snaking = [] for motor, start, stop, num, snake in chunked(self.args, 5): self._motors.append(motor) shape.append(num) extent.append([start, stop]) snaking.append(snake) self.shape = tuple(shape) self.extents = tuple(extent) self.snaking = tuple(snaking) self.setup_attrs()
def inner_product(num, args): '''Scan over one multi-motor trajectory. Parameters ---------- num : integer number of steps args : list of {Positioner, Positioner, int} patterned like (``motor1, start1, stop1, ..., motorN, startN, stopN``) Motors can be any 'setable' object (motor, temp controller, etc.) Returns ------- cyc : cycler ''' if len(args) % 3 != 0: raise ValueError("wrong number of positional arguments") cyclers = [] for motor, start, stop, in chunked(args, 3): steps = np.linspace(start, stop, num=num, endpoint=True) c = cycler(motor, steps) cyclers.append(c) return functools.reduce(operator.add, cyclers)
def add_extras(articles, lang, project, log_rec): '''\ Add images and summaries to articles in groups. ''' log_rec['len_art'] = len(articles) ret = [] article_groups = chunked(articles, DEFAULT_GROUP_SIZE) for article_group in article_groups: titles = [a['article'] for a in article_group] images = get_images(titles, lang, project) summaries = get_summaries(titles, lang, project) for article in article_group: title = article['title'] article['image_url'] = images.get(title, DEFAULT_IMAGE) if word_filter(title) or word_filter(article['image_url']): article['image_url'] = DEFAULT_IMAGE summary = summaries.get(title, '') summary = crisco.shorten(summary, lang, 400) article['summary'] = summary ret.append(article) log_rec.success('finished adding images/summaries for {len_art} articles') return ret
def run(argv): def should_process(pheno): return PerPhenoParallelizer().should_process_pheno( pheno, get_input_filepaths = lambda pheno: pheno['assoc_files'], get_output_filepaths = lambda pheno: common_filepaths['parsed'](pheno['phenocode']), ) idxs = [i for i,pheno in enumerate(get_phenolist()) if should_process(pheno)] if not idxs: print('All phenos are up-to-date!') exit(0) jobs = chunked(idxs, N_AT_A_TIME) sbatch_filepath = get_dated_tmp_path('slurm-parse') + '.sh' tmp_path = get_tmp_path('') with open(sbatch_filepath, 'w') as f: f.write('''\ #!/bin/bash #SBATCH --cpus-per-task=4 #SBATCH --mem=1G #SBATCH --time=5-0:0 #SBATCH --array=0-{n_jobs} #SBATCH --output={tmp_path}/slurm-%j.out #SBATCH --error={tmp_path}/slurm-%j.out jobs=( '''.format(n_jobs = len(jobs)-1, tmp_path=tmp_path)) for job in jobs: f.write(','.join(map(str,job)) + '\n') f.write(')\n\n') f.write('export PHEWEB_DATADIR={!r}\n'.format(conf.data_dir)) f.write(sys.argv[0] + ' conf num_procs=4 parse --phenos=${jobs[$SLURM_ARRAY_TASK_ID]}\n') print('Run:\nsbatch {}\n'.format(sbatch_filepath)) print('Monitor with `squeue --long --array --job <jobid>`\n') print('output will be in {}/slurm-*.out'.format(tmp_path))
def _post_scan(self): # Return the motor to its original position. yield from super()._post_scan() for motor, start, stop, num, snake in chunked(self.args, 5): yield Msg('set', motor, self._offsets[motor], block_group='A') yield Msg('wait', None, 'A')
def _post_scan(self): # Return the motor to its original position. yield from super()._post_scan() for motor, start, stop in chunked(self.args, 3): yield Msg("set", motor, self._offsets[motor], block_group="A") yield Msg("wait", None, "A")