示例#1
0
def hydrate_dataset_part(part, dbc, cdir, dsid, as_blaze=True):
    if dbc is not None:
        logger.info('hydrating with database table')
        res = pipe(part.value, lambda x: DBTBL_FMT.format(dsid=dsid, part=x),
                   dbc.resolve_table)
        res = res if as_blaze else odo(res, pd.DataFrame)
        return res
    else:
        logger.info('hydrating with feather file')
        bzfn = bz.data if as_blaze else identity
        try:
            res = pipe(
                part.value,
                curry(get_datafile_path)(dsid=dsid,
                                         cdir=cdir,
                                         ftyp=DatasetFileType.FEATHER),
                feather.read_dataframe, bzfn)
        except Exception as e:
            res = pipe(
                part.value,
                curry(get_datafile_path)(dsid=dsid,
                                         cdir=cdir,
                                         ftyp=DatasetFileType.JSONREC),
                curry(pd.read_json), bzfn)
        return res
示例#2
0
def post_process_compiled_contracts(compiled_contracts):
    for contract_data in compiled_contracts:
        bytecode = contract_data.get('bytecode')

        if is_string(bytecode):
            bytecode_placeholder_locations = find_placeholder_locations(bytecode)
            bytecode_link_references = normalize_placeholder_link_references(
                bytecode_placeholder_locations,
                compiled_contracts,
            )
        else:
            bytecode_link_references = tuple()

        bytecode_runtime = contract_data.get('bytecode_runtime')
        if is_string(bytecode_runtime):
            bytecode_runtime_placeholder_locations = find_placeholder_locations(
                bytecode_runtime,
            )
            bytecode_runtime_link_references = normalize_placeholder_link_references(
                bytecode_runtime_placeholder_locations,
                compiled_contracts,
            )
        else:
            bytecode_runtime_link_references = tuple()

        yield pipe(
            contract_data,
            partial(assoc, key='linkrefs', value=bytecode_link_references),
            partial(assoc, key='linkrefs_runtime', value=bytecode_runtime_link_references),
        )
示例#3
0
    def get_compiled_contracts(self, source_file_paths, import_remappings):
        self.logger.debug("Import remappings: %s", import_remappings)
        self.logger.debug("Compiler Settings: %s",
                          pprint.pformat(self.compiler_settings))

        if 'import_remappings' in self.compiler_settings and import_remappings is not None:
            self.logger.warn(
                "Import remappings setting will be overridden by backend settings"
            )

        try:
            compilation_result = compile_files(
                source_file_paths,
                import_remappings=import_remappings,
                **self.compiler_settings)
        except ContractsNotFound:
            return {}

        compiled_contracts = pipe(
            compilation_result,
            normalize_compilation_result,
            post_process_compiled_contracts,
        )

        return compiled_contracts
示例#4
0
def post_process_compiled_contracts(compiled_contracts):
    for contract_data in compiled_contracts:
        bytecode = contract_data.get('bytecode')

        if is_string(bytecode):
            bytecode_placeholder_locations = find_placeholder_locations(bytecode)
            bytecode_link_references = normalize_placeholder_link_references(
                bytecode_placeholder_locations,
                compiled_contracts,
            )
        else:
            bytecode_link_references = tuple()

        bytecode_runtime = contract_data.get('bytecode_runtime')
        if is_string(bytecode_runtime):
            bytecode_runtime_placeholder_locations = find_placeholder_locations(
                bytecode_runtime,
            )
            bytecode_runtime_link_references = normalize_placeholder_link_references(
                bytecode_runtime_placeholder_locations,
                compiled_contracts,
            )
        else:
            bytecode_runtime_link_references = tuple()

        yield pipe(
            contract_data,
            partial(assoc, key='linkrefs', value=bytecode_link_references),
            partial(assoc, key='linkrefs_runtime', value=bytecode_runtime_link_references),
        )
示例#5
0
 def facet_map(self):
     facs = (self.flevels.groupby(['facet']).agg({
         'facet_level':
         lambda x: x.dropna().drop_duplicates().tolist()
     }).pipe(lambda xf: u.fill_none(xf)).to_dict(orient='index'))
     return pipe(facs,
                 curry(valmap)(lambda x: x['facet_level']),
                 curry(keyfilter)(lambda x: x != 'Overall'),
                 lambda x: merge(x, self.flevels_r))
示例#6
0
 def process_text(self, input_text: str, **kwargs) -> str:
     return pipe(
         input_text,
         lambda x: self.clean_pattern.sub(' ', x),
         normalize_hyphenated_words,
         normalize_quotation_marks,
         normalize_unicode,
         normalize_whitespace
     )
示例#7
0
def normalize(text: str) -> str:
    space = len(text) > 0 and text[-1] in string.whitespace

    text = text.lower()

    text = pipe(text, remove_accents, remove_punctuation, normalize_whitespace)

    if space:
        return f'{text} '

    return text
示例#8
0
def normalize_compilation_result(compilation_result):
    for key_from_compiler, raw_contract_data in compilation_result.items():
        contract_data = normalize_combined_json_contract_data(raw_contract_data)
        source_path, contract_name = normalize_combined_json_contract_key(
            key_from_compiler,
            contract_data,
        )
        yield pipe(
            contract_data,
            partial(assoc, key='source_path', value=source_path),
            partial(assoc, key='name', value=contract_name),
        )
示例#9
0
def normalize_compilation_result(compilation_result):
    for key_from_compiler, raw_contract_data in compilation_result.items():
        contract_data = normalize_combined_json_contract_data(raw_contract_data)
        source_path, contract_name = normalize_combined_json_contract_key(
            key_from_compiler,
            contract_data,
        )
        yield pipe(
            contract_data,
            partial(assoc, key='source_path', value=source_path),
            partial(assoc, key='name', value=contract_name),
        )
示例#10
0
def validate_unique(values):
    if not isdistinct(values):
        duplicates = pipe(
            values,
            frequencies,  # get the frequencies
            partial(valfilter,
                    lambda v: v > 1),  # filter to ones that occure > 1
            sorted,  # sort them
            tuple,  # cast them to an immutiable form
        )
        raise ValidationError(
            "The values provided are not unique.  Duplicates: {0}".format(
                ', '.join((str(value) for value in duplicates))))
示例#11
0
def normalize_compilation_result(compilation_result):
    """
    Take the result from the --standard-json compilation and flatten it into an
    interable of contract data dictionaries.
    """
    for source_path, file_contracts in compilation_result['contracts'].items():
        for contract_name, raw_contract_data in file_contracts.items():
            contract_data = normalize_standard_json_contract_data(raw_contract_data)
            yield pipe(
                contract_data,
                partial(assoc, key='source_path', value=source_path),
                partial(assoc, key='name', value=contract_name),
            )
示例#12
0
def upgrade_user_config(user_config, to_version=LATEST_VERSION):
    try:
        current_version = user_config['version']
    except KeyError:
        raise KeyError("No version key found in user config file:\n\n{0}".format(
            pprint.pformat(user_config),
        ))

    upgrade_sequence = get_upgrade_sequence(current_version, to_version, KNOWN_USER_VERSIONS)
    upgrade_functions = tuple(
        USER_UPGRADE_FUNCTIONS[version] for version in upgrade_sequence
    )
    upgraded_user_config = pipe(user_config, *upgrade_functions)
    return upgraded_user_config
示例#13
0
def upgrade_user_config(user_config, to_version=LATEST_VERSION):
    try:
        current_version = user_config['version']
    except KeyError:
        raise KeyError("No version key found in user config file:\n\n{0}".format(
            pprint.pformat(user_config),
        ))

    upgrade_sequence = get_upgrade_sequence(current_version, to_version, KNOWN_USER_VERSIONS)
    upgrade_functions = tuple(
        USER_UPGRADE_FUNCTIONS[version] for version in upgrade_sequence
    )
    upgraded_user_config = pipe(user_config, *upgrade_functions)
    return upgraded_user_config
示例#14
0
def validate_unique(values, title="Value"):
    if not isdistinct(values):
        duplicates = pipe(
            values,
            frequencies,  # get the frequencies
            partial(valfilter,
                    lambda v: v > 1),  # filter to ones that occure > 1
            sorted,  # sort them
            tuple,  # cast them to an immutiable form
        )
        raise ValidationError(
            "{title} does not contain unique items.  Duplicates: {0}".format(
                ', '.join((str(value) for value in duplicates)),
                title=title,
            ))
def serialize_full_transaction(transaction, block, transaction_index, is_pending):
    if is_pending:
        block_number = None
        block_hash = None
        transaction_index = None
    else:
        block_number = block['number']
        block_hash = block['hash']

    return pipe(
        transaction,
        partial(assoc, key='block_number', value=block_number),
        partial(assoc, key='block_hash', value=block_hash),
        partial(assoc, key='transaction_index', value=transaction_index),
    )
示例#16
0
def validate_unique(values, title="Value"):
    if not isdistinct(values):
        duplicates = pipe(
            values,
            frequencies,  # get the frequencies
            partial(valfilter, lambda v: v > 1),  # filter to ones that occure > 1
            sorted,  # sort them
            tuple,  # cast them to an immutiable form
        )
        raise ValidationError(
            "{title} does not contain unique items.  Duplicates: {0}".format(
                ', '.join((str(value) for value in duplicates)),
                title=title,
            )
        )
示例#17
0
def main():
    # corpus = "\n".join([gt.raw(fileid) for fileid in gt.fileids()])
    # corpus = '\n'.join([' '.join(s) for s in brown.sents()])
    text = open('src/synonymize/hounds_sherlock.txt').read().replace('_', ' ')

    bot = POSifiedText(text)

    try:
        while True:
            cmd = input('Generate sentence?')

            if cmd.lower() in ['n', 'no', 'q', 'quit']:
                break

            print(pipe(bot.make_sentence()))
    except KeyboardInterrupt:
        pass
示例#18
0
def embedding_groups(
        node_list: List[T],
        persona_embedding_list: List[np.ndarray]) -> Dict[T, List[np.ndarray]]:
    """
    Utility function, which given aligned list of nodes and embedding lists from the model.predict function,
    obtain a dictionary from base graph nodes to a list of embeddings. The order of the embeddings for the
    base nodes is not ordered, and the order may differ on different calls.

    :param node_list: list of base nodes, which is duplicated
    :param persona_embedding_list: corresponding embeddings
    :return: dictionary mapping base nodes to all their embeddings
    """
    return pipe(
        zip(node_list, persona_embedding_list),
        groupby(0),
        valmap(lambda x: list(map(getter(1), x))),
    )
示例#19
0
def process_sas_survey(svy_cfg, facets, client=None, lgr=logger):
    g = svy_cfg
    prefix = g.s3_url_prefix
    lgr.bind(p=prefix)
    evalr = asteval.Interpreter()
    evalr.symtable['pd.util'] = pd.util
    fn = g.rename_cols
    map_fn = evalr(fn)
    df_munger = curry(sdf.munge_df)(facets=facets, qids=g.qids,
                                    na_syns=g.na_synonyms, col_fn=map_fn,
                                    fmts=g.patch_format, fpc=g.fpc, lgr=lgr)
    lbl_loader = curry(load_variable_labels)(repl=g.replace_labels)
    xpt_loader = curry(load_sas_xport_df)(lgr=lgr)
    dfs = map(
        lambda r: pipe(prefix+r.xpt,
                       delayed(xpt_loader),
                       delayed(df_munger(r=r,
                                         lbls=lbl_loader(prefix+r.format,
                                                         prefix+r.formas)))),
        [r for idx, r in g.meta.iterrows()])
    lgr.info('merging SAS dfs')
    dfs = delayed(pd.concat)(dfs, ignore_index=True)
    scols = delayed(
        lambda xf: list(xf.columns
                          .intersection(set(g.qids)
                                        .union(facets))))(dfs)
    lgr.info('re-filtering question and facet columns to cast to category dtype', cols=scols)
    dfz = (dfs
           .apply(lambda x: x.astype('category'))
           .reset_index(drop=True)
           .assign(year=dfs['year'].astype(int),
                   sitecode=dfs['sitecode'].astype('category'),
                   weight=dfs['weight'].astype(float),
                   strata=dfs['strata'].astype(int, errors='ignore'),
                   psu=dfs['psu'].astype(int, errors='ignore'))
           .reset_index(drop=True))
    if g.fpc:
        dfz = (dfz.assign(fpc=dfs['fpc'].astype(int, errors='ignore'),
                         sample_ct=dfs['sample_ct'].astype(int, errors='ignore'))
                  .reset_index(drop=True))
    dfz.visualize()
    lgr.info('merged SAS dfs')
    lgr.unbind('p')
    return dfz
示例#20
0
def add_full_dependencies_to_compiled_contracts(compiled_contracts):
    dependency_graph = compute_direct_dependency_graph(compiled_contracts)
    deploy_order = compute_deploy_order(dependency_graph)

    for contract_data in compiled_contracts:
        full_dependencies = compute_recursive_contract_dependencies(
            contract_data['name'],
            dependency_graph,
        )
        ordered_full_dependencies = tuple(
            contract_name for contract_name in deploy_order
            if contract_name in full_dependencies)
        yield pipe(
            contract_data,
            partial(assoc, key='full_dependencies', value=full_dependencies),
            partial(assoc,
                    key='ordered_full_dependencies',
                    value=ordered_full_dependencies),
        )
示例#21
0
def upgrade_config(config, config_context, to_version=LATEST_VERSION):
    if config_context == ConfigContext.USER:
        known_versions = KNOWN_USER_VERSIONS
    elif config_context == ConfigContext.LEGACY:
        known_versions = KNOWN_LEGACY_VERSIONS

    try:
        current_version = config['version']
    except KeyError:
        raise KeyError("No version key found in config file:\n\n{0}".format(
            pprint.pformat(config), ))

    upgrade_sequence = get_upgrade_sequence(current_version, to_version,
                                            known_versions)
    upgrade_functions = tuple(UPGRADE_FUNCTIONS[version]
                              for version in upgrade_sequence)
    upgraded_config = pipe(config, *upgrade_functions)

    return upgraded_config
示例#22
0
def upgrade_config(config, config_context, to_version=LATEST_VERSION):
    if config_context == ConfigContext.USER:
        known_versions = KNOWN_USER_VERSIONS
    elif config_context == ConfigContext.LEGACY:
        known_versions = KNOWN_LEGACY_VERSIONS

    try:
        current_version = config['version']
    except KeyError:
        raise KeyError("No version key found in config file:\n\n{0}".format(
            pprint.pformat(config),
        ))

    upgrade_sequence = get_upgrade_sequence(current_version, to_version, known_versions)
    upgrade_functions = tuple(
        UPGRADE_FUNCTIONS[version] for version in upgrade_sequence
    )
    upgraded_config = pipe(config, *upgrade_functions)

    return upgraded_config
示例#23
0
    def process_text(self, input_text: str, **kwargs) -> str:
        temperature = kwargs.get('temperature', self.temperature or .25)

        result: List[str] = []

        for token in self.nlp(input_text >> self.cleaner):
            if any(x not in string.ascii_lowercase for x in token.orth_):
                result.append(token.orth_)

            else:
                new_token = pipe(
                    token.orth_.lower(),
                    lambda x: self.pin.manipulate(x, temperature=temperature)
                )

                if token.orth_ == token.orth_.capitalize():
                    new_token = new_token.capitalize()

                result.append(new_token)

        return ' '.join(result)
示例#24
0
def block2dict(lines, repl, to_lower=False):
    f_lwr = str.lower if to_lower else identity
    f_repl = curry(lambda k, r: r[k] if k in r else k)(r=repl)
    rqt = re.compile(r'[\"\']')  # match quote chars
    rws = re.compile(r'\s')        # match whitespace
    # keep only alnum and a few unreserved symbols
    ruri = re.compile(r'(?![\w\s\-\_\.\'\$\-\+\(\)\/]|\.).')
    d = thread_last(
        lines,
        map(lambda x: x.replace('\x92', "'")),
        map(lambda x: rqt.sub('', x.strip()).split('=')),
        map(lambda x: (rws.sub('', x[0].strip()), ruri.sub('', x[1].strip()))),
        filter(lambda x: x[0].find('-') == -1),  # no support for ranges
        (mapcat, lambda x: map(lambda y: (y, x[1]), x[0].split(','))),
        filter(lambda x: x[0].isnumeric()),  # remove non-numeric codes
        map(lambda x: (int(x[0]),  # cat codes are ints
                       pipe(x[1], f_lwr, f_repl))),
        dict
    )
    # d[-1] = np.nan #use NA as a marker for unmapped vals
    return d
示例#25
0
    def get_compiled_contracts(self, source_file_paths, import_remappings):
        self.logger.debug("Import remappings: %s", import_remappings)
        self.logger.debug("Compiler Settings: %s", pprint.pformat(self.compiler_settings))

        if 'import_remappings' in self.compiler_settings and import_remappings is not None:
            self.logger.warn("Import remappings setting will be overridden by backend settings")

        try:
            compilation_result = compile_files(
                source_file_paths,
                import_remappings=import_remappings,
                **self.compiler_settings
            )
        except ContractsNotFound:
            return {}

        compiled_contracts = pipe(
            compilation_result,
            normalize_compilation_result,
            post_process_compiled_contracts,
        )

        return compiled_contracts
示例#26
0
    def __init__(self, input_text: str, state_size: int = 2):
        nltk.download('brown')
        nltk.download('gutenberg')
        self.nlp = spacy.load('en_core_web_lg')

        self.synonyms: Dict[str, List[str]] = defaultdict(list)
        self.entities: Dict[str, List[str]] = defaultdict(list)

        input_text = pipe(
            input_text,
            # lambda x: x.replace('\n', ' '),
            lambda x: self.clean_pattern.sub(' ', x),
            normalize_hyphenated_words,
            normalize_quotation_marks,
            normalize_unicode,
            normalize_whitespace)

        markovify.Text.__init__(self,
                                input_text,
                                state_size,
                                retain_original=False)

        self.grammar = Grammar({**self.synonyms, **self.entities})
        self.grammar.add_modifiers(base_english)
示例#27
0
def post_process_compiled_contracts(compiled_contracts):
    return pipe(
        compiled_contracts,
        add_direct_dependencies_to_compiled_contracts,
        add_full_dependencies_to_compiled_contracts,
    )
示例#28
0
def test_pipe():
    assert pipe(1, inc) == 2
    assert pipe(1, inc, inc) == 3
    assert pipe(1, double, inc, iseven) is False
示例#29
0
def test_pipe():
    assert pipe(1, inc) == 2
    assert pipe(1, inc, inc) == 3
    assert pipe(1, double, inc, iseven) is False
示例#30
0
 def find_mismatched_levels(self):
     return pipe(self.meta.qns[ID_COLUMN], set, map(self.compare_levels),
                 filter(lambda x: set(x['surveys']) != set(x['socrata'])))
def _fetch_user_profiles(keyword: str) -> List[str]:
    return pipe(keyword, request_users_from_keyword_search,
                _get_json_from_response, get_users_from_json)