def pipe_strregex(context=None, _INPUT=None, conf=None, **kwargs): """A string module that replaces text using regexes. Each has the general format: "In [field] replace [regex pattern] with [text]". Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings conf : { 'RULE': [ { 'match': {'value': <regex>}, 'replace': {'value': <'replacement'>} } ] } Returns ------- _OUTPUT : generator of replaced strings """ splits = get_splits(_INPUT, conf['RULE'], **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs(first=convert_func)) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_urlbuilder(context=None, _INPUT=None, conf=None, **kwargs): """A url module that builds a url. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipeforever pipe or an iterable of items or fields conf : { 'PARAM': [ {'key': {'value': <'order'>}, 'value': {'value': <'desc'>}}, {'key': {'value': <'page'>}, 'value': {'value': <'2'>}} ] 'PATH': {'type': 'text', 'value': <''>}, 'BASE': {'type': 'text', 'value': <'http://site.com/feed.xml'>}, } Yields ------ _OUTPUT : url """ pkwargs = cdicts(opts, kwargs) get_params = get_funcs(conf.get('PARAM', []), **kwargs)[0] get_paths = get_funcs(conf.get('PATH', []), **pkwargs)[0] get_base = get_funcs(conf['BASE'], listize=False, **pkwargs)[0] parse_params = utils.parse_params splits = get_splits(_INPUT, funcs=[get_params, get_paths, get_base]) parsed = utils.dispatch(splits, *get_dispatch_funcs('pass', parse_params)) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_exchangerate(context=None, _INPUT=None, conf=None, **kwargs): """A string module that retrieves the current exchange rate for a given currency pair. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings (base currency) conf : { 'quote': {'value': <'USD'>}, 'default': {'value': <'USD'>}, 'offline': {'type': 'bool', 'value': '0'}, } Returns ------- _OUTPUT : generator of hashed strings """ offline = conf.get('offline', {}).get('value') rate_data = get_offline_rate_data(err=False) if offline else get_rate_data() rates = parse_request(rate_data) splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs()) _OUTPUT = starmap(partial(parse_result, rates=rates), parsed) return _OUTPUT
def pipe_regex(context=None, _INPUT=None, conf=None, **kwargs): """An operator that replaces text in items using regexes. Each has the general format: "In [field] replace [match] with [replace]". Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) conf : { 'RULE': [ { 'field': {'value': <'search field'>}, 'match': {'value': <'regex'>}, 'replace': {'value': <'replacement'>}, 'globalmatch': {'value': '1'}, 'singlelinematch': {'value': '2'}, 'multilinematch': {'value': '4'}, 'casematch': {'value': '8'} } ] } Returns ------- _OUTPUT : generator of items """ splits = get_splits(_INPUT, conf['RULE'], **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs('pass', convert_func)) _OUTPUT = parse_results(parsed) return _OUTPUT
def pipe_strreplace(context=None, _INPUT=None, conf=None, **kwargs): """A string module that replaces text. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings conf : { 'RULE': [ { 'param': {'value': <match type: 1=first, 2=last, 3=every>}, 'find': {'value': <text to find>}, 'replace': {'value': <replacement>} } ] } Returns ------- _OUTPUT : generator of replaced strings """ splits = get_splits(_INPUT, conf['RULE'], **kwargs) parsed = utils.dispatch(splits, *get_dispatch_funcs()) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_hash(context=None, _INPUT=None, conf=None, **kwargs): """A string module that hashes the given text. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings Returns ------- _OUTPUT : generator of hashed strings """ splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs()) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_currencyformat(context=None, _INPUT=None, conf=None, **kwargs): """A number module that formats a number to a given currency string. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or numbers conf : {'currency': {'value': <'USD'>}} Returns ------- _OUTPUT : generator of formatted currencies """ splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs('num')) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_strtransform(context=None, _INPUT=None, conf=None, **kwargs): """A string module that splits a string into tokens delimited by separators. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings conf : {'transformation': {value': <'swapcase'>}} Returns ------- _OUTPUT : generator of tokenized strings """ splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs()) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_substr(context=None, _INPUT=None, conf=None, **kwargs): """A string module that returns a substring. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings conf : { 'from': {'type': 'number', value': <starting position>}, 'length': {'type': 'number', 'value': <count of characters to return>} } Returns ------- _OUTPUT : generator of substrings """ conf['start'] = conf.pop('from', dict.get(conf, 'start')) splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs()) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_simplemath(context=None, _INPUT=None, conf=None, **kwargs): """A number module that performs basic arithmetic, such as addition and subtraction. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or numbers kwargs -- other value, if wired in conf : { 'OTHER': {'type': 'number', 'value': <'5'>}, 'OP': {'type': 'text', 'value': <'modulo'>} } Returns ------- _OUTPUT : generator of tokenized floats """ splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs('num')) _OUTPUT = starmap(parse_result, parsed) return _OUTPUT
def pipe_stringtokenizer(context=None, _INPUT=None, conf=None, **kwargs): """A string module that splits a string into tokens delimited by separators. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings conf : { 'to-str': {'value': <delimiter>}, 'dedupe': {'type': 'bool', value': <1>}, 'sort': {'type': 'bool', value': <1>} } Returns ------- _OUTPUT : generator of items """ conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter')) splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs()) items = starmap(parse_result, parsed) _OUTPUT = utils.multiplex(items) return _OUTPUT
def pipe_filter(context=None, _INPUT=None, conf=None, **kwargs): """An operator that filters for source items matching the given rules. Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) conf : { 'MODE': {'value': <'permit' or 'block'>}, 'COMBINE': {'value': <'and' or 'or'>} 'RULE': [ { 'field': {'value': 'search field'}, 'op': {'value': 'one of SWITCH above'}, 'value': {'value': 'search term'} } ] } kwargs : other inputs, e.g., to feed terminals for rule values Returns ------- _OUTPUT : generator of filtered items Examples -------- >>> import os.path as p >>> from pipe2py.modules.pipeforever import pipe_forever >>> from pipe2py.modules.pipefetchdata import pipe_fetchdata >>> parent = p.dirname(p.dirname(__file__)) >>> file_name = p.abspath(p.join(parent, 'data', 'gigs.json')) >>> path = 'value.items' >>> url = 'file://%s' % file_name >>> conf = {'URL': {'value': url}, 'path': {'value': path}} >>> input = pipe_fetchdata(_INPUT=pipe_forever(), conf=conf) >>> mode = {'value': 'permit'} >>> combine = {'value': 'and'} >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'web'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> pipe_filter(_INPUT=input, conf=conf).next()['title'] u'E-Commerce Website Developer | Elance Job' >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'kjhlked'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> list(pipe_filter(_INPUT=input, conf=conf)) [] """ conf = DotDict(conf) test = kwargs.pop('pass_if', None) permit = conf.get('MODE', **kwargs) == 'permit' combine = conf.get('COMBINE', **kwargs) if not combine in {'and', 'or'}: raise Exception("Invalid combine: %s. (Expected 'and' or 'or')" % combine) rule_defs = map(DotDict, utils.listize(conf['RULE'])) get_pass = partial(utils.get_pass, test=test) get_value = partial(utils.get_value, **kwargs) parse_conf = partial(utils.parse_conf, parse_func=get_value, **kwargs) get_rules = lambda i: imap(parse_conf, rule_defs, repeat(i)) funcs = [COMBINE_BOOLEAN[combine], utils.passthrough, utils.passthrough] inputs = imap(DotDict, _INPUT) splits = utils.broadcast(inputs, get_rules, utils.passthrough, get_pass) outputs = starmap(partial(parse_rules, **kwargs), splits) parsed = utils.dispatch(outputs, *funcs) gathered = starmap(partial(parse_result, permit=permit), parsed) _OUTPUT = ifilter(None, gathered) return _OUTPUT
def pipe_filter(context=None, _INPUT=None, conf=None, **kwargs): """An operator that filters for source items matching the given rules. Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) conf : { 'MODE': {'value': <'permit' or 'block'>}, 'COMBINE': {'value': <'and' or 'or'>} 'RULE': [ { 'field': {'value': 'search field'}, 'op': {'value': 'one of SWITCH above'}, 'value': {'value': 'search term'} } ] } kwargs : other inputs, e.g., to feed terminals for rule values Returns ------- _OUTPUT : generator of filtered items Examples -------- >>> import os.path as p >>> from pipe2py.modules.pipeforever import pipe_forever >>> from pipe2py.modules.pipefetchdata import pipe_fetchdata >>> parent = p.dirname(p.dirname(__file__)) >>> file_name = p.abspath(p.join(parent, 'data', 'gigs.json')) >>> path = 'value.items' >>> url = 'file://%s' % file_name >>> conf = {'URL': {'value': url}, 'path': {'value': path}} >>> input = pipe_fetchdata(_INPUT=pipe_forever(), conf=conf) >>> mode = {'value': 'permit'} >>> combine = {'value': 'and'} >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'web'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> pipe_filter(_INPUT=input, conf=conf).next()['title'] u'E-Commerce Website Developer | Elance Job' >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'kjhlked'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> list(pipe_filter(_INPUT=input, conf=conf)) [] """ conf = DotDict(conf) test = kwargs.pop('pass_if', None) permit = conf.get('MODE', **kwargs) == 'permit' combine = conf.get('COMBINE', **kwargs) if not combine in {'and', 'or'}: raise Exception( "Invalid combine: %s. (Expected 'and' or 'or')" % combine) rule_defs = map(DotDict, utils.listize(conf['RULE'])) get_pass = partial(utils.get_pass, test=test) get_value = partial(utils.get_value, **kwargs) parse_conf = partial(utils.parse_conf, parse_func=get_value, **kwargs) get_rules = lambda i: imap(parse_conf, rule_defs, repeat(i)) funcs = [COMBINE_BOOLEAN[combine], utils.passthrough, utils.passthrough] inputs = imap(DotDict, _INPUT) splits = utils.broadcast(inputs, get_rules, utils.passthrough, get_pass) outputs = starmap(partial(parse_rules, **kwargs), splits) parsed = utils.dispatch(outputs, *funcs) gathered = starmap(partial(parse_result, permit=permit), parsed) _OUTPUT = ifilter(None, gathered) return _OUTPUT
def asyncDispatch(splits, *asyncCallables): kwargs = {'map_func': _map_func, 'apply_func': _apply_func} return utils.dispatch(splits, *asyncCallables, **kwargs)
def get_parsed(_INPUT, func): finite = utils.finitize(_INPUT) confs = imap(func, finite) splits = imap(parse_conf, confs) return utils.dispatch(splits, *get_dispatch_funcs('pass', get_element))