示例#1
0
    def to_nn(self, shape, scale_pixels=False):
        """Convert data to neural network inputs/outputs
        """

        return pipe(
            self.df.ImageId.unique(), map(self._to_single_nn(shape)),
            list_unzip, iffy(constantly(self.augment), self._augment_nn),
            map(np.array), list,
            iffy(constantly(scale_pixels),
                 lens[0].modify(lambda x: x / 255)), self._reshape_output)
示例#2
0
def by_topic_nodes(references=False, use_set=False, simple_label=False):
    """Transform init() frame into the following columns:

        id, label, simple_label (optional), reference_count, references (optional)
    """
    by_topic = compose(topic_id, second)

    options = {
        'id': group_attr(topic_id),
        'label': group_attr(topic_text),
        'reference_count': len
    }

    if references:
        references_option = {
            'references': compose(list, map(first))
        }
        options = {**options, **references_option}

    if simple_label:
        options = {
            **options, 'simple_label':
            compose(iffy(lambda x: isinstance(x, str), lambda x: x.replace('(', '').replace(')', '')),
                group_attr(topic_text))
        }


    df = _by_group_transform(by_topic, options.items())

    if references and use_set:
        df.references = df.references.apply(frozenset)

    return df
示例#3
0
def representation(text_list, translation=esv, include_svd=True):
    """Converts list of sentences to a vocabulary-vectorized, SVD-reduced
    representation
    """
    return pipe(
        text_list,
        vocabulary_vectorizer(translation=translation).transform,
        iffy(constantly(include_svd),
             svd(translation=translation).transform),
    )
示例#4
0
    def default(self, obj):
        ordered_attrs = pipe(
            partial(map, lambda attr: (attr, getattr(obj, attr))),
            partial(remove_values, isnone),
            partial(remove_values, all_fn(isa(list, dict), isempty)),
            partial(walk_values, iffy(isa(dict), sort_dict)),
            OrderedDict)

        if isinstance(obj, Context):
            return ordered_attrs(['key', 'operator', 'operand', 'match_all'])
        elif isinstance(obj, Binding):
            return ordered_attrs(['keys', 'command', 'args', 'context'])
        else:
            return super().default(obj)
示例#5
0
def _read_sheet(sheet: Worksheet):
    rows = sheet.rows
    fields = [cell.value for cell in next(rows)]
    return [
        dict(
            zip(
                fields,
                [
                    F.iffy(F.notnone, F.identity, identity(cell.value))(
                        safe_loads(
                            (str(cell.value) or "").replace("‘", "'").replace(
                                "’", "'").replace("´", "'").replace(
                                    "“", '"').replace("”", '"')))
                    for cell in row
                ],
            )) for row in rows
    ]
示例#6
0
    def get_data_frame(document_id: str,
                       raw: bool = False,
                       auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        document = Document.get(document_id, auth_args=auth_args)

        file_id = pipe(
            document.get("content", []),
            c.filter(lambda c: c.get("format", {}).get("code") ==
                     "ocr-text-file-id"),
            c.first,
            c.get("attachment", default={}),
            c.get("url"),
            iffy(isa(str), lambda url: url.split("/")[-1]),
        )

        if file_id is None:
            raise ValueError(
                f"No block file found for document: '{document_id}'")

        files = Files(auth.session())
        filename = files.download(file_id, "/tmp/")

        frame = pd.read_json(filename, lines=True)
        os.remove(filename)

        if raw or len(frame) == 0:
            return frame

        return Block.sort(
            frame.drop(["Geometry"], axis=1).join(
                pd.json_normalize(frame.Geometry)).pipe(
                    partial(
                        Frame.expand,
                        custom_columns=[
                            Frame.codeable_like_column_expander("Polygon")
                        ],
                    )).set_index("Id"))
示例#7
0
NON_REPORTED_EXCEPTIONS = ["QueryExecutionError"]


def before_send(event, hint):
    if "exc_info" in hint:
        exc_type, exc_value, tb = hint["exc_info"]
        if any([(e in str(type(exc_value))) for e in NON_REPORTED_EXCEPTIONS]):
            return None

    return event


def init():
    if settings.SENTRY_DSN:
        sentry_sdk.init(
            dsn=settings.SENTRY_DSN,
            environment=settings.SENTRY_ENVIRONMENT,
            release=__version__,
            before_send=before_send,
            send_default_pii=True,
            integrations=[
                FlaskIntegration(),
                SqlalchemyIntegration(),
                RedisIntegration(),
                RqIntegration(),
            ],
        )


capture_message = iffy(lambda _: settings.SENTRY_DSN,
                       sentry_sdk.capture_message)
示例#8
0
def parse_grid(data=None,
               sep=None,
               strip='udr',
               comment='#',
               empty='',
               ignore_blank=True,
               dtype=None,
               on_empty=None,
               quiet=False):
    msg = fn.identity if quiet else print
    if data is None:
        data = clipboard_get()
    lines = data.splitlines()
    # Strip leading blank line (generally from pasting something into triple
    # quotes)
    if lines and lines[0] == '':
        lines = lines[1:]
    if not lines:
        msg("No data to parse.")
        return np.empty((0, 0), dtype=dtype)
    # Strip comments and blank lines
    if comment:
        lines = [re.sub(comment + '.*$', '', line) for line in lines]
    if ignore_blank:
        lines = [line for line in lines if line]
    # Determine splitter and split lines into items
    if sep is None:
        sep = guess_splitter([line for line in lines if line])
    if sep is not None and sep != '':
        msg(f"Separating by {sep!r}")
        lines = [re.split(sep, line) for line in lines]
    else:
        msg(f"Separating by character")
        lines = [list(line) for line in lines]
    # Pad out shape to rectangular
    lens = [len(line) for line in lines]
    width = max(lens)
    for line in lines:
        if len(line) < width:
            line.extend([''] * (width - len(line)))
    grid = np.array(lines, dtype=str)
    grid[grid == empty] = ''
    if strip:
        grid = subrect(grid, dirs=strip)
    # Determine dtype
    if dtype is None:
        dtype = object
        if all(re.match(r'^\d+$', d) for d in grid.flat if d):
            dtype = int
        elif all(re.match(r'^\d+\.?\d*$', d) for d in grid.flat if d):
            dtype = float
    # xform coerces items to type
    if dtype is object:
        xform = lambda x: x if x != empty else None
    else:
        xform = fn.iffy(dtype, default=on_empty)
        dtype = object if '' in grid.flat and on_empty is None else dtype
    h, w = grid.shape
    result = np.array([[xform(val) for val in line] for line in grid],
                      dtype=dtype)
    msg(f"Array is {h} rows x {w} cols of type {result.dtype}")
    return result
示例#9
0
def passages_by_uuid(uuid, include_text=False):
    references = find_by_uuid(uuid).apply(reference.init_raw_row, axis=1).tolist()
    return pipe(references, passage.init, iffy(constantly(include_text), passage.text))
示例#10
0
NON_REPORTED_EXCEPTIONS = ["QueryExecutionError"]


def before_send(event, hint):
    if "exc_info" in hint:
        exc_type, exc_value, tb = hint["exc_info"]
        if any([(e in str(type(exc_value))) for e in NON_REPORTED_EXCEPTIONS]):
            return None

    return event


def init():
    if settings.SENTRY_DSN:
        sentry_sdk.init(
            dsn=settings.SENTRY_DSN,
            environment=settings.SENTRY_ENVIRONMENT,
            release=__version__,
            before_send=before_send,
            send_default_pii=True,
            integrations=[
                FlaskIntegration(),
                SqlalchemyIntegration(),
                RedisIntegration(),
                RqIntegration(),
            ],
        )


capture_exception = iffy(lambda _: settings.SENTRY_DSN, sentry_sdk.capture_exception)
示例#11
0
 def map_types(self, types, func):
     return self.map(partial(walk_values, iffy(isa(types), func)))
示例#12
0
    def import_stage(self, harvest_object):
        '''
        The import stage will receive a HarvestObject object and will be
        responsible for:
            - performing any necessary action with the fetched object (e.g
              create a CKAN package).
              Note: if this stage creates or updates a package, a reference
              to the package must be added to the HarvestObject.
              Additionally, the HarvestObject must be flagged as current.
            - creating the HarvestObject - Package relation (if necessary)
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        logger.debug("in import stage: %s" % harvest_object.guid)
        if not harvest_object:
            logger.error('No harvest object received')
            self._save_object_error('No harvest object received')
            return False

        try:
            self._set_config(harvest_object.job.source.config)
            context = {'model': model, 'session': Session, 'user': self.user}

            package_dict = json.loads(harvest_object.content)

            package_dict['id'] = munge_title_to_name(harvest_object.guid)
            package_dict['name'] = package_dict['id']

            # add owner_org
            source_dataset = get_action('package_show')(
                {
                    'ignore_auth': True
                }, {
                    'id': harvest_object.source.id
                })
            owner_org = source_dataset.get('owner_org')
            package_dict['owner_org'] = owner_org

            try:
                prev_dict = iffy(json.loads)(_get_content(
                    some(
                        compose(partial(eq, package_dict['id']),
                                attrgetter('guid')),
                        harvest_object.source.jobs[-2].objects)))
                if prev_dict and prev_dict.get(
                        'integrity') == package_dict['integrity']:
                    logger.info('Package not changed. Skip update')
                    return False
            except IndexError:
                logger.debug('Skip integrity check. No previous data.')

            # logger.debug('Create/update package using dict: %s' % package_dict)
            self._create_or_update_package(package_dict, harvest_object,
                                           'package_show')

            Session.commit()

            logger.debug("Finished record")
        except:
            logger.exception('Something went wrong!')
            self._save_object_error('Exception in import stage',
                                    harvest_object)
            return False
        return True
示例#13
0
def show_experiments(all_experiments,
                     pager=True,
                     no_timestamp=False,
                     **kwargs):
    include_metrics = _parse_filter_list(kwargs.pop("include_metrics", []))
    exclude_metrics = _parse_filter_list(kwargs.pop("exclude_metrics", []))
    include_params = _parse_filter_list(kwargs.pop("include_params", []))
    exclude_params = _parse_filter_list(kwargs.pop("exclude_params", []))

    metric_names, param_names = _collect_names(
        all_experiments,
        include_metrics=include_metrics,
        exclude_metrics=exclude_metrics,
        include_params=include_params,
        exclude_params=exclude_params,
    )
    metric_headers = _normalize_headers(metric_names)
    param_headers = _normalize_headers(param_names)

    td = experiments_table(
        all_experiments,
        metric_headers,
        metric_names,
        param_headers,
        param_names,
        kwargs.get("sort_by"),
        kwargs.get("sort_order"),
        kwargs.get("precision"),
    )

    if no_timestamp:
        td.drop("Created")

    baseline_styler = iffy(constantly({"style": "bold"}), default={})
    row_styles = lmap(baseline_styler, td.column("is_baseline"))
    td.drop("is_baseline")

    merge_headers = ["Experiment", "queued", "ident_guide", "parent"]
    td.column("Experiment")[:] = map(prepare_exp_id, td.as_dict(merge_headers))
    td.drop(*merge_headers[1:])

    headers = {"metrics": metric_headers, "params": param_headers}
    styles = {
        "Experiment": {
            "no_wrap": True,
            "header_style": "black on grey93"
        },
        "Created": {
            "header_style": "black on grey93"
        },
    }
    header_bg_colors = {"metrics": "cornsilk1", "params": "light_cyan1"}
    styles.update({
        header: {
            "justify": "left" if typ == "metrics" else "params",
            "header_style": f"black on {header_bg_colors[typ]}",
            "collapse": idx != 0,
            "no_wrap": typ == "metrics",
        }
        for typ, hs in headers.items() for idx, header in enumerate(hs)
    })

    td.render(
        pager=pager,
        borders=True,
        rich_table=True,
        header_styles=styles,
        row_styles=row_styles,
    )
示例#14
0
文件: lang.py 项目: jacob414/micropy
        return fn


def mkclass(name: str, bases: Tuple = (), **clsattrs: Any) -> Any:
    "Does mkclass"

    Gen = type(name, (Base, ) + bases, clsattrs)
    return Gen


def arity(fn: Callable) -> int:
    "Returns the number of arguments required by `fn`."
    return len(inspect.signature(fn).parameters)


always_tup = funcy.iffy(funcy.complement(funcy.is_seqcont), lambda x: (x, ))


class Piping(pipelib.BasePiping):
    """Piping objects is for (ab)using Python operator overloading to
    build small pipeline-DSL's.

    The most basic one will simply refuse to do anything - you have to
    give it instructions/permissions on everything it's made for ;-).

    """
    class Fresh(object):
        "Marker for Piping instances that never has been run"
        pass

    class Executed(object):
示例#15
0
def show_experiments(all_experiments,
                     pager=True,
                     no_timestamp=False,
                     **kwargs):
    include_metrics = _parse_filter_list(kwargs.pop("include_metrics", []))
    exclude_metrics = _parse_filter_list(kwargs.pop("exclude_metrics", []))
    include_params = _parse_filter_list(kwargs.pop("include_params", []))
    exclude_params = _parse_filter_list(kwargs.pop("exclude_params", []))

    metric_names, param_names = _collect_names(
        all_experiments,
        include_metrics=include_metrics,
        exclude_metrics=exclude_metrics,
        include_params=include_params,
        exclude_params=exclude_params,
    )
    metric_headers = _normalize_headers(metric_names)
    param_headers = _normalize_headers(param_names)

    td = experiments_table(
        all_experiments,
        metric_headers,
        metric_names,
        param_headers,
        param_names,
        kwargs.get("sort_by"),
        kwargs.get("sort_order"),
        kwargs.get("precision"),
    )
    styles = [
        {
            "no_wrap": True,
            "header_style": "black on grey93"
        },
        {
            "header_style": "black on grey93"
        },
        *[{
            "justify": "right",
            "header_style": "black on cornsilk1",
            "no_wrap": True,
            "collapse": idx != 0,
        } for idx, _ in enumerate(metric_headers)],
        *[{
            "justify": "left",
            "header_style": "black on light_cyan1",
            "collapse": idx != 0,
        } for idx, _ in enumerate(param_headers)],
    ]

    if no_timestamp:
        td.drop("Created")
        styles.pop(1)

    baseline_styler = iffy(constantly({"style": "bold"}), default={})
    row_styles = lmap(baseline_styler, td.column("is_baseline"))
    td.drop("is_baseline")

    td.render(
        pager=pager,
        borders=True,
        rich_table=True,
        header_styles=styles,
        row_styles=row_styles,
    )