def parse_ag_opinion(html: Response) -> OpinionParseResult: summary = first(html, css=".page-top__subtitle--re p::text", expected="summary") title = first(html, css="h1.page-top__title--opinion::text", expected="title") date = first(html, css="time::text", expected="date") full_text = pipe( all(html, ".body-content p::text"), map(normalize_whitespace), join("\n"), ) citation_set = pipe( re.findall(r"\d+-\d+-\d+(?:\([-().A-Za-z0-9]*[-A-Za-z0-9]\))?", full_text), set, sorted, CitationSet, ) return OpinionParseResult( summary=summary, title=title, is_official=title.startswith("Official"), date=opinion_date_to_iso8601(date), full_text=full_text, source_url=html.url, citations=citation_set, )
def test_preprocess(): test_im_fn = '1.jpg' test_im = PIL.Image.open(test_im_fn) pipeline = ( preproc_resize, # 2 preproc_grayscale, # 3 preproc_rescale, # 4 #preproc_normalise, # 5 preproc_smooth, # 6 preproc_thresh, # 7 ) this_pipeline = [] for i, p in enumerate(pipeline): this_pipeline.append(p) pipe(test_im, *this_pipeline).save('{}.jpg'.format(i+2))
def top_model_crps_id(model): """ Gets the Id of the corpus created by the top version of the model type supplied. """ return pipe(model, find_best_fit_model_corpus_id, tlz.curried.get_in(['id']))
async def delete(self, id_): self.items, has_changed = pipe( id_, lambda key: dissoc(self.items, key), lambda new: (new, len(self.items) != len(new)), ) return has_changed
def add_to_data_frame(df): df['lat'], df['long'] = zip( *df['Locations'].map(lambda x: f.pipe( x , query_gmap_geocode , gmap_query_result_to_latlng))) return df
def top_model_kmeans_clstrs(model, n_clusters=8, **kwargs): """ Fits a kmeans model on the requested corpus (uses mini batch kmeans). returns the cluster labels. """ return pipe(model, top_model_corpus_df, kmeans_clusters(n_clusters=n_clusters, **kwargs))
def test_reform_combination(): assert pipe( france_data_tax_benefit_system, plfr2014, plf2015, plf2016, ayrault_muet, )
def preproc_thresh(image): """Threshhold image.""" pipeline = ( pil_2_numpy, lambda i: i > skimage.filters.threshold_otsu(i), #lambda i: i > THRESH * 255, numpy_2_pil, ) return pipe(image, *pipeline)
def main(): img = cv2.imread('images/crane1.png') merged, lines = pipe(img, binarize, get_lines, cluster_lines, cluster_coordinates) #cluster y axis cluster_axis(merged, 1) #cluster x axis cluster_axis(merged, 0) save_img(merged, lines, "final.png", img)
def get_reviews(x): return pipe(x, lambda x: or_pipe(x, _.find_elements_by_class_name("reviewText"), _.find_elements_by_class_name("review-text"), default=[], ), map(lambda x: x.text) )
def _clean_text(self, aspirate_result_text: str) -> str: return pipe( aspirate_result_text, _remove_signature, _remove_inline_space, _remove_end_space, _remove_reporting_system, _replace_add, )
def task_scheduler2() -> NoReturn: s = sched.scheduler(time.time, time.sleep) balancesheet = rim_db.get_ts_statement('balancesheet') code_year = balancesheet.index completion_of_code_year = set( zip(code_year.get_level_values(0), code_year.get_level_values(1))) code_year_set = pipe( ts.pro_api().stock_basic(exchange='', list_status='L', fields='ts_code'), lambda x: [t.ts_code for t in x.itertuples()], # 枚举当前可用的公司代码 lambda x: set(product(x, [f"{y}1231" for y in range(2017, 2020)])) ) # 构造 tuple (code, year) undo_code_year_set = code_year_set - completion_of_code_year tasks = pipe( undo_code_year_set, lambda x: zip(count(), x), lambda x: groupby(x, key=lambda y: y[0] // 36), # 分组,便于限流 lambda x: [ s.enter( i * 30, 1, download_and_save_statement, # 每分钟安排36个下载任务 kwargs={'code_year_lst': [j for j in jobs]}) for i, jobs in x ]) # c = list(code_year_set) # # today = datetime.datetime.now() # today = today.strftime("%Y-%m-%d") # financial_indicators = rim_db.get_financial_indicator(today) # code_set = financial_indicators.index # done_code_year_set = set(zip(code_set.get_level_values(0), code_set.get_level_values(1))) # # to_do_set = code_year_set - done_code_year_set # to_do_with_index = zip(to_do_set, count()) # # job_groups = groupby(to_do_with_index, key=lambda x: x[1]//36) # 每30秒查询-保存36条记录 # for i, jobs in job_groups: # s.enter(i * 30, 1, save_ts_indicator_to_db, kwargs={'code_year_lst': [j for j in jobs]}) s.run()
def preprocess(image): pipeline = ( preproc_resize, preproc_grayscale, preproc_rescale, #preproc_normalise, preproc_smooth, preproc_thresh, ) return pipe(image, *pipeline)
def predictions_most_frequent(label, entry): data_keys = label['data_keys'] data_fn = juxt(map(op.itemgetter, data_keys)) histogram = pipe(entry['predictions'], partial(groupby, data_fn), partial(valmap, len), _ordered_dict_sorted_by_value) return map(lambda values: {k: v for k, v in zip(data_keys, values)}, histogram.keys())
async def create(self, dto: CreateTodoItemDto): self.items, new_item = pipe( self.items.keys(), last, lambda key: key + 1, lambda new_key: TodoItem( id=new_key, msg=dto.msg, is_done=dto.is_done), lambda item: (assoc(self.items, item.id, item), item), ) return new_item
def kmeans_clstrs_with_corpus(corpusdf, **kwargs): """ Not needed. Returns corpus dataframe with kmeans clusters added as a column. """ return pipe(corpusdf, copy.deepcopy, lambda crps: (crps, kmeans_clusters(crps, **kwargs)), lambda args: args[0].assign(clusters=args[1]) )
def go_review_page(rank): def test(x): print x.tag_name return x return lambda x: pipe(x, find_review_anchers, _[rank], test, _.click(), )
def find_block_devs(self, folder): # Map of major_minor to path # Should be able to look at the paths prop for all devs, and put # matching MM to path back in a list. def build_paths(x): return [(x['major_minor'], path) for path in x['paths'] if path.startswith(folder)] return pipe(self.block_device_nodes.itervalues(), cmapcat(build_paths), dict)
def _validate_predictor_params(self, params): errors = None schema = self._get_predictor_schema() try: schema(params) except (Invalid, MultipleInvalid) as exc: errors = exc print("Error validing predictor params: %s" % exc) # Filter all hyper params that don't start with '_' hypers = pipe(params, self._keep_hyper_params, self._coerce_hypers) return hypers, errors
def kmeans_clusters(corpusdf, n_clusters=8, random_state=1, n_init=100, **kwargs): """ Fits a kmeans model on the supplied corpus (uses mini batch kmeans). returns the cluster labels. """ mkmeans_m = MiniBatchKMeans(n_clusters=n_clusters, random_state=random_state, n_init=n_init) return pipe(mkmeans_m, lambda mdl: mdl.fit(corpusdf), lambda mdl: mdl.labels_)
def __compileRegex(self): key_pattern = dict() for key, value in self.revars.items(): r = self.__preparevalue(key[1:], value) key_pattern.update(r) piped = pipe( self.regex, *[ callOnObject('replace', f'${key}', rf'(?P<{key}>{pattern})', 1) for key, pattern in key_pattern.items() ]) self.compiled = re.compile(piped)
def preprocess_skimage(image): return pipe(image, color.rgb2gray, #partial(restoration.denoise_bilateral, multichannel=False), #exposure.equalize_hist, partial(exposure.adjust_gamma, gamma=0.75), #lambda img: img > filters.threshold_local(img, block_size=11, method='mean'), #lambda img: img > filters.threshold_otsu(img), #lambda img: img > filters.threshold_local(img, block_size=7), #skutil.img_as_int, partial(transform.pyramid_expand, upscale=2) )
def id_check(author): """ checks ID type and calls appropriate function """ id_type = author['authorID_Type'] id_value = author['authorID'] funcs = {'ORCID': _orcid, 'GND': _ytc, 'Scopus': _ytc, 'WoS': _ytc, 'Repec': _ytc} if id_type and id_value: return pipe(author, funcs[id_type], url_check, error_check) if id_value and not id_type: author.update({error_key: u'Please provide type of author ID'}) return error_check(author)
def paths_to_major_minors(self, device_paths): """ Create a list of device major minors for a list of device paths from _path_to_major_minor dict. If any of the paths come back as None, continue to the next. :param device_paths: The list of paths to get the list of major minors for. :return: list of dev_major_minors, or an empty list if any device_path is not found. """ return pipe(device_paths, cmap(self.path_to_major_minor), cfilter(None), list)
def main(): syntaxes_dir_path = project_dir_path / "syntaxes" src_json = syntaxes_dir_path / "markdown.tmLanguage.json" dst_json = syntaxes_dir_path / "rmarkdown.tmLanguage.json" addition_json = syntaxes_dir_path / "addition.json" content = json.loads(src_json.read_text()) additions = json.loads(addition_json.read_text()) # content = conv_chunkparser(content) # add_new_with_context = lambda content: add_new(content, additions) content = pipe(content, conv_chunkparser) # , add_new_with_context) dst_json.write_text(json.dumps(content))
async def update(self, dto: UpdateTodoItemDto, id_: int): item = get_in([id_], self.items) if not item: return None self.items, new_item = pipe( (item, dto), lambda items: { **items[0].dict(), **items[1].dict(exclude_defaults=True) }, lambda data: TodoItem(**data), lambda todo: (assoc(self.items, id_, todo), todo), ) return new_item
def process_game_data(df: pd.DataFrame) -> pd.DataFrame: result = pipe( df, rename_features, remove_forfeits, add_features, drop_features, clean_team_names, create_team_indices, remove_duplicate_games, ) return result
def save_reviews(product_id, tag, reviews): f = open("data/{}.{}.csv".format(product_id, tag), "w") for review in reviews: print review f.write("{},{}\n".format( tag, pipe( review, _.split("\n"), map(_.strip()), map(_.encode("utf-8")), SF(" ".join)(_) ) )) f.close()
def paths_to_major_minors(node_block_devices, ndt, device_paths): """ Create a list of device major minors for a list of device paths from _path_to_major_minor dict. If any of the paths come back as None, continue to the next. :param node_block_devices: dict of major-minor ids keyed on path :param ndt: normalised device table :param device_paths: The list of paths to get the list of major minors for. :return: list of dev_major_minors, or an empty list if any device_path is not found. """ c_path_to_major_minor = path_to_major_minor(node_block_devices, ndt) return pipe(device_paths, cmap(c_path_to_major_minor), cfilter(None), list)
def _init_pipeline(self): # Get rid of URLs text_operations = [ # Get rid of URLs remove_pattern('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'), # Take out retweet header, there is only one remove_pattern('RT @[a-z,A-Z]*: '), # Get rid of hashtags remove_pattern('#'), # Get rid of references to other screennames remove_pattern('@[a-z,A-Z]*'), # everything thats no a number or space remove_pattern('[^\w\s]'), lambda series: series.str.replace('\s\s+', ' '), # strip series entries lambda series: series.str.strip() ] self.text_pipeline = lambda data: pipe(data, *text_operations)
def parse_stratagem_results_to_influx(measurement, fs_name, stratagem_results_json): parse_fns = { "size_distribution": partial(parse_size_distribution, measurement, fs_name, labels), "user_distribution": partial(parse_user_distribution, measurement, fs_name), } group_counters = stratagem_results_json.get("group_counters") return pipe( [], partial( reduce, lambda out, cur: out + [(cur.get("name"), cur.get("counters"))], group_counters), partial(filter, lambda xs: xs[0] not in ["warn_fids", "purge_fids"]), partial(map, lambda xs, parse_fns=parse_fns: parse_fns[xs[0]](xs[1])), partial(flatten), )
def run(self, args): host, mount_point, uuid, report_duration, purge_duration = args[ "client_args"] if report_duration is None and purge_duration is None: return action_list = [(label, args) for (duration, label, args) in [ ( purge_duration, "action_purge_stratagem", (mount_point, "{}-{}".format(uuid, "purge_fids-fids_expired")), ), ( report_duration, "action_warning_stratagem", (mount_point, "{}-{}".format(uuid, "warn_fids-fids_expiring_soon")), ), ] if duration is not None] action_list = filter( lambda xs: path.exists("{}/{}".format(MAILBOX_PATH, xs[1][1])), action_list) file_location = pipe( action_list, partial(map, lambda xs, host=host: self.invoke_rust_agent_expect_result( host, xs[0], xs[1])), partial(filter, bool), iter, partial(flip, next, None), ) if file_location: self.log(u"\u2713 Scan results sent to client under:\n{}".format( file_location)) return file_location
def parse_size_distribution(measurement, fs_name, labels, counters): return pipe( counters, filter_out_other_counter, partial( map, lambda x: x.update( {"name": size_distribution_name_table[x.get("name").lower()]}) or x), partial( map, lambda x: create_stratagem_influx_point( measurement, [ ("group_name", "size_distribution"), ("counter_name", x.get("name")), ("label", labels.get(x.get("name"))), ("fs_name", fs_name), ], [("count", x.get("count")), ("size", x.get("size"))], ), ), )
def test_pipe(): assert pipe(1, inc) == 2 assert pipe(1, inc, inc) == 3 assert pipe(1, double, inc, iseven) is False
def __call__(self, d): return functoolz.pipe(d, *self.filters)