def main(files): list(map(z.comp(convert_and_update, z.do(print)), files))
key in `ks`""" lowtxt = txt.lower() find_icase = partial(findall_ignore_case, low_txt=lowtxt, txt=txt) casedata = (DataFrame([(k, len(find_all(txt, k)), len(find_icase(k))) for k in ks], columns=['Word', 'Case', 'Nocase']) .query('Case != Nocase').assign(Ratio=lambda x: x.eval('Case / Nocase')) .sort_values('Ratio', ascending=True).reset_index(drop=1) ) return {diffcase: phrase2wd(k) for k in casedata.query('Ratio < @thresh').Word for diffcase in set(find_icase(k))} # Gensim def to_gensim_params(cnf, **kw): gparams = dict( size=cnf.N, # 80, # alpha=cnf.eta, min_alpha=cnf.min_eta, window=cnf.C / 2, sample=0, negative=cnf.K, #[5, 7, 10, 12, 15, 17], 0 sg=1, # iter=4, ) gparams.update( **kw) return gparams phrase2wd = lambda x: '_'.join(re.split(r'[ -]', x)) take = z.comp(list, islice) ilen = lambda xs: sum(1 for _ in xs)
# import itertools as it from os.path import basename, splitext, join, abspath import re import subprocess import toolz.curried as z import argparse import yaml fst = z.operator.itemgetter(0) snd = z.operator.itemgetter(1) NOTEBOOK_DIR = '_notebooks' # NOTEBOOK_HTML_DIR = 'ipy_html' NOTEBOOK_HTML_DIR = '_includes' base = z.comp(fst, splitext, basename) htmlname = z.comp(z.curry(join, NOTEBOOK_HTML_DIR), '{}.html'.format, base) pat = re.compile(r'''^---\n (.+?)\n --- ''', re.VERBOSE + re.DOTALL) # # ^--- # print(pat.findall(txt)[0]) def post_todict(posttxt, ret_yaml=False): m = pat.findall(posttxt) if not m: if ret_yaml: return (None, None) return
import operator import toolz.curried as toolz import pyramda as R reg_typename = "{}_reg_t".format rin_name = "{}_rin".format r_name = "{}_r".format v_name = "{}_v".format architecture_id = "two_process_{}".format indent = R.curry_n( 2, toolz.comp( "\n".join, R.apply(map), R.unapply(toolz.juxt([ toolz.comp(R.add, R.multiply(" "), toolz.first), toolz.comp(operator.methodcaller("split", "\n"), toolz.second)]))))
import re from bs4 import BeautifulSoup from ratelimit import limits, sleep_and_retry from requests import get from tqdm import tqdm from yaml import dump from toolz.curried import comp data_path = 'data.yaml' country_list_url = 'https://simple.wikipedia.org/wiki/List_of_European_countries' border_list_url = 'https://en.wikipedia.org/wiki/List_of_countries_and_territories_by_land_borders' base_url = 'https://en.wikipedia.org/wiki/' wikilimiter = comp(sleep_and_retry, limits(1, 1)) @wikilimiter def download_countries(): country_list_page = BeautifulSoup(get(country_list_url).text, features='html.parser') country_table = [row.find_all('a', href=re.compile(r'^/wiki/'))[1:] for row in country_list_page.find('tbody').find_all('tr')[1:]] return {str(a[0].string): str(a[-1]['href']).replace('/wiki/', '') for a in country_table} @wikilimiter def download_borders(): border_list_page = BeautifulSoup(get(border_list_url).text, features='html.parser') return dict(((q:=[str(a.string) for a in row.find_all('a', href=re.compile(r'^/wiki/')) if str(a.string)[0].isupper()])[0], q[1:]) for row in border_list_page.find('tbody').find_all('tr')[2:] if not ('overseas' in str(row) and 'excluding' not in str(row))) @wikilimiter def download_coords(capital): capital_page = BeautifulSoup(get(base_url + capital).text, features='html.parser')
from itertools import repeat, islice, count import numpy as np import numpy.random as nr from numpy.linalg import norm from operator import itemgetter as itg from pandas import Series, DataFrame, Index from numba import jit import toolz.curried as z from voluptuous import Any, Invalid, Schema, ALLOW_EXTRA import numba_utils as nbu import utils as ut nopython = jit(nopython=True) map = z.comp(list, builtins.map) UNKNOWN = '<UNK>' # Matrix weight representation class Cat(object): "Join and split W matrices for passing as single arg" @staticmethod def join(w1, w2): return np.hstack([w1, w2.T]) @staticmethod def split(Wall): n = Wall.shape[1] / 2 W1, W2_ = Wall[:, :n], Wall[:, n:] return W1, W2_.T
import operator import toolz.curried as toolz import pyramda as R reg_typename = "{}_reg_t".format rin_name = "{}_rin".format r_name = "{}_r".format v_name = "{}_v".format architecture_id = "two_process_{}".format indent = R.curry_n( 2, toolz.comp( "\n".join, R.apply(map), R.unapply( toolz.juxt([ toolz.comp(R.add, R.multiply(" "), toolz.first), toolz.comp(operator.methodcaller("split", "\n"), toolz.second) ]))))
casedata = (DataFrame( [(k, len(find_all(txt, k)), len(find_icase(k))) for k in ks], columns=['Word', 'Case', 'Nocase']).query('Case != Nocase').assign( Ratio=lambda x: x.eval('Case / Nocase')).sort_values( 'Ratio', ascending=True).reset_index(drop=1)) return { diffcase: phrase2wd(k) for k in casedata.query('Ratio < @thresh').Word for diffcase in set(find_icase(k)) } # Gensim def to_gensim_params(cnf, **kw): gparams = dict( size=cnf.N, # 80, # alpha=cnf.eta, min_alpha=cnf.min_eta, window=cnf.C / 2, sample=0, negative=cnf.K, #[5, 7, 10, 12, 15, 17], 0 sg=1, # iter=4, ) gparams.update(**kw) return gparams phrase2wd = lambda x: '_'.join(re.split(r'[ -]', x)) take = z.comp(list, islice) ilen = lambda xs: sum(1 for _ in xs)
import toolz.curried as toolz import pyramda as R get_sigtype = R.if_else( toolz.comp(R.equals(1), len), R.always("std_ulogic"), toolz.comp("std_logic_vector({} downto 0)".format, R.dec, len))