Exemplo n.º 1
0
    def factory(cls, web3, class_name=None, **kwargs):

        kwargs['web3'] = web3

        normalizers = {
            'abi': normalize_abi,
            'address': partial(normalize_address, kwargs['web3'].ens),
            'bytecode': normalize_bytecode,
            'bytecode_runtime': normalize_bytecode,
        }

        contract = PropertyCheckingFactory(class_name or cls.__name__, (cls, ),
                                           kwargs,
                                           normalizers=normalizers)
        setattr(contract, 'functions',
                ContractFunctions(contract.abi, contract.web3))
        setattr(contract, 'events', ContractEvents(contract.abi,
                                                   contract.web3))
        setattr(contract, 'fallback',
                Contract.get_fallback_function(contract.abi, contract.web3))

        return contract
def parse_stratagem_results_to_influx(measurement, fs_name,
                                      stratagem_results_json):
    parse_fns = {
        "size_distribution":
        partial(parse_size_distribution, measurement, fs_name, labels),
        "user_distribution":
        partial(parse_user_distribution, measurement, fs_name),
    }

    group_counters = stratagem_results_json.get("group_counters")

    return pipe(
        [],
        partial(
            reduce,
            lambda out, cur: out + [(cur.get("name"), cur.get("counters"))],
            group_counters),
        partial(filter, lambda xs: xs[0] not in ["warn_fids", "purge_fids"]),
        partial(map, lambda xs, parse_fns=parse_fns: parse_fns[xs[0]](xs[1])),
        partial(flatten),
    )
def printable_train_data(malform_data_dir,
                         okform_data_dir,
                         ids,
                         extractor,
                         feature_names,
                         start,
                         end=None,
                         title_transform_func=make_capitalized_title,
                         exclude_labels=None,
                         exclude_word_positions=set([0])):
    """

    Adapted to PULS requirement:
    
    - auxil file is read to get the additional prepreocessed features

    Parameters
    ------------
    malform_data_dir: string
        the directory where the malformed data reside
    okform_data_dir: string
        the directory where the correctly formed data reside
    ids: list of string
        document ids
    extractor: FeatureExtractor
        the feature extractor
    feature_names: list of string
        the feature names
    start, end: int
        how many titles to extract
    title_transform_func: function
        funtion that accepts the title and transforms it
        into some badly capitalized version
    exclude_labels: iterable of str
        labels that we don't consider

    Returns
    ------------
    Generator of str:
        each str is one sentence, each line in the str is one token in the sent
        
    """
    feature_names += ['y']  # add the label feature name
    malform_data_dir = Path(malform_data_dir)

    # take care of this ["tickerSymbol",["NYSE","SKT"]]
    # /cs/taatto/home/hxiao/capitalization-recovery/corpus/puls-format-capitalized/3987E0BD03749C996A04B881079AD753.auxil
    clean_tag = (lambda t: t[0] if isinstance(t, list) else t)
    get_tokens = partial(map, partial(get_in, ['token']))
    get_tags = partial(map, compose(clean_tag, partial(get_in, ['pos'])))
    get_lemmas = partial(map, partial(get_in, ['lemma']))

    n_collected = 0

    for i, id_ in enumerate(ids):
        if i < start:
            continue

        if i % 1000 == 0:
            logger.info("Collected %d" % n_collected)
            logger.info("Finished %d" % i)

        if end is not None and i >= end:
            logger.info("Reached %d. Terminate." % end)
            break

        try:
            malform_auxil_path = (malform_data_dir /
                                  Path(id_)).with_suffix('.auxil')
            with malform_auxil_path.open(encoding='utf8') as f:
                logger.debug('processing: {}'.format(id_))
                # to get the last line
                lines = f.readlines()
                if len(lines) == 0:
                    raise EmptyFileError(
                        'auxil file empty: {}'.format(malform_auxil_path))

                l = lines[-1]

                data = json.loads(l.strip())

                okform_auxil_path = str(
                    (okform_data_dir / Path(id_)).with_suffix('.auxil'))
                okform_paf_path = str(
                    (okform_data_dir / Path(id_)).with_suffix('.paf'))

                good_title_sents, body_sents = separate_title_from_body(
                    okform_auxil_path, okform_paf_path)

                # extract the tokens
                doc = [[t['token'] for t in sent['features']]
                       for sent in body_sents]

                good_title_sents = list(good_title_sents)

                bad_title_sents = data['sents']
                if not isinstance(bad_title_sents, list):
                    raise InvalidTitleError(
                        'bad_title_sents not a list: {}'.format(
                            bad_title_sents))

                # we only consider headline that contains only ONE sentence
                if (len(good_title_sents) == 1 and len(bad_title_sents) == 1):
                    good_sent = good_title_sents[0]
                    bad_sent = bad_title_sents[0]
                    good_title_tokens = get_tokens(good_sent['features'])
                    bad_title_tokens = get_tokens(bad_sent['features'])

                    # some validity checking
                    if len(good_title_tokens) != len(bad_title_tokens):
                        raise TitleInconsistencyError('{}\n{}'.format(
                            good_title_tokens, bad_title_tokens))

                    good_title_tokens_lower = map(lambda s: s.lower(),
                                                  good_title_tokens)
                    bad_title_tokens_lower = map(lambda s: s.lower(),
                                                 bad_title_tokens)
                    if (good_title_tokens_lower != bad_title_tokens_lower):
                        raise TitleInconsistencyError('{}\n{}'.format(
                            good_title_tokens_lower, bad_title_tokens_lower))

                    tags = get_tags(bad_sent['features'])
                    lemmas = get_lemmas(bad_sent['features'])

                    # tag validity checking
                    for tag in tags:
                        if not (tag is None or isinstance(tag, basestring)):
                            raise InvalidTitleError(
                                '{}: tag {} not string'.format(id_, tag))

                    # get malformed title tokens
                    words = convert_to_trainable_format(good_title_tokens,
                                                        title_transform_func,
                                                        extractor,
                                                        doc=doc,
                                                        pos=tags,
                                                        lemma=lemmas)

                    # format the features in the required form
                    res = unicode()
                    for i, word in enumerate(words):
                        if (i not in exclude_word_positions and exclude_labels
                                and word['y'] not in exclude_labels):
                            word_feature_str = u'\t'.join([
                                unicode(word[feature_name])
                                for feature_name in feature_names
                            ])
                            res += word_feature_str + '\n'
                    n_collected += 1
                    yield id_, res
                else:
                    raise TitleInconsistencyError(
                        '# of title sentences more than 1: {}'.format(id_))
        except (IOError, TitleInconsistencyError, InvalidTitleError,
                EmptyFileError):
            logger.debug(traceback.format_exc())
            continue
        except:
            logger.error(traceback.format_exc())
            continue
size_distribution_name_table = {
    "size < 1m": "less_than_1m",
    "1m <= size < 1g": "greater_than_equal_1m_less_than_1g",
    "size >= 1g": "greater_than_equal_1g",
    "size >= 1t": "greater_than_equal_1t",
}

labels = {
    "less_than_1m": "<\\\ 1\\\ Mib",
    "greater_than_equal_1m_less_than_1g":
    ">\\\=\\\ 1\\\ Mib\\\,\\\ <\\\ 1\\\ GiB",
    "greater_than_equal_1g": ">\\\=\\\ 1\\\ GiB",
    "greater_than_equal_1t": ">\\\=\\\ 1\\\ TiB",
}

filter_out_other_counter = partial(
    filter, lambda counter: counter.get("name").lower() != "other")


def flatten(xs):
    return [item for l in xs for item in l]


def tuple_to_equals(xs):
    return "{}={}".format(*xs)


def create_stratagem_influx_point(measurement, tags, fields):
    return "{},{} {} {}".format(measurement,
                                ",".join(map(tuple_to_equals, tags)),
                                ",".join(map(tuple_to_equals,
                                             fields)), "").rstrip()
Exemplo n.º 5
0
from chroma_api.authentication import AnonymousAuthentication, PatchedDjangoAuthorization
from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerClient
from tastypie.validation import Validation
from chroma_api.validation_utils import validate
from chroma_api.utils import custom_response, dehydrate_command, StatefulModelResource
from chroma_core.models import (
    StratagemConfiguration,
    ManagedHost,
    ManagedMdt,
    ManagedTargetMount,
    ManagedFilesystem,
    Command,
    get_fs_id_from_identifier,
)

get_bundle_int_val = compose(partial(flip, int, 10), str)

# Postgres can store numbers up to 8 bytes (+9,223,372,036,854,775,807). These values will ultimately be passed back to the web
# interface, where they will be used by javascript. Therefore, the maximum size of an integer is limited to the
# maximum size allowed by javascript, which is Number.MAX_SAFE_INTEGER (9,007,199,254,740,991).
MAX_SAFE_INTEGER = 9007199254740991


def get_duration_type(duration_key):
    return duration_key.split("_")[0].capitalize()


def get_duration(duration_key, bundle):
    try:
        duration = bundle.data.get(duration_key) and get_bundle_int_val(
            bundle.data.get(duration_key))
Exemplo n.º 6
0
from collections import namedtuple
from typing import Iterable
from toolz.functoolz import partial

import premailer
import pystache
from wedding.model import Party

Invitation = namedtuple('Invitation', ['recipient', 'subject', 'message'])


def _url_sub(field: str, value: str, url: str) -> str:
    return url.replace(field, urllib.parse.quote(value))


_substitute_party = partial(_url_sub, '{partyId}')
_substitute_guest = partial(_url_sub, '{guestId}')


def build_invitations(invitation_template: str, body_template: str,
                      envelope_template: str,
                      party: Party) -> Iterable[Invitation]:
    invitation_url = _substitute_party(party.id, invitation_template)
    envelope_url = _substitute_party(party.id, envelope_template)

    return (Invitation(
        guest.email, 'Jenny and Jesse are Getting Married!',
        premailer.transform(
            pystache.render(
                body_template, {
                    'partyName': party.title,
Exemplo n.º 7
0
def get_latest_testrun(testrun_id):
    tr = get_latest_test_run(testrun_id)
    valid = partial(only_pretty_polarion, tr)
    fields = filter(valid, dir(tr))
    for attr in fields:
        print_kv(tr, attr)
Exemplo n.º 8
0
 def __payload(self, event):
     body = json.loads(event['body'])
     return option.cata(partial(compose(list, map), self.__codec.decode),
                        lambda: self.__codec.decode(body))(
                            body.get('items'))