Пример #1
0
def compute_down(expr,
                 ec,
                 profiler_output=None,
                 compute_kwargs=None,
                 odo_kwargs=None,
                 **kwargs):
    """Compute down for blaze clients.

    Parameters
    ----------
    expr : Expr
        The expression to send to the server.
    ec : Client
        The blaze client to compute against.
    namespace : dict[Symbol -> any], optional
        The namespace to compute the expression in. This will be amended to
        include that data for the server. By default this will just be the
        client mapping to the server's data.
    compute_kwargs : dict, optional
        Extra kwargs to pass to compute on the server.
    odo_kwargs : dict, optional
        Extra kwargs to pass to odo on the server.
    profile : bool, optional
        Should blaze server run cProfile over the computation of the expression
        and the serialization of the response.
    profiler_output : file-like object, optional
        A file like object to hold the profiling output from the server.
        If this is not passed then the server will write the data to the
        server's filesystem
    """
    from .server import to_tree

    kwargs = keymap(u8, kwargs)

    tree = to_tree(expr)
    serial = ec.serial
    if profiler_output is not None:
        kwargs[u'profile'] = True
        kwargs[u'profiler_output'] = ':response'

    kwargs[u'compute_kwargs'] = keymap(u8, compute_kwargs or {})
    kwargs[u'odo_kwargs'] = keymap(u8, odo_kwargs or {})

    r = post(
        ec,
        '/compute',
        data=serial.dumps(assoc(kwargs, u'expr', tree)),
        auth=ec.auth,
        headers=mimetype(serial),
    )

    if not ok(r):
        raise ValueError("Bad response: %s" % reason(r))
    response = serial.loads(content(r))
    if profiler_output is not None:
        profiler_output.write(response[u'profiler_output'])
    return serial.data_loads(response[u'data'])
Пример #2
0
 def to_dict(self):
     return {
         'is_started':
         self.is_started,
         'title':
         self.title,
         'grid': {
             str(i):
             [chain.to_dict() if chain else None for chain in column]
             for i, column in enumerate(self.grid)
         },
         'player_order':
         self.player_order,
         'current_turn_player':
         self.current_turn_player,
         'current_action_player':
         self.current_action_player,
         'current_action_type':
         self.current_action_type.value,
         'current_action_details':
         self.current_action_details,
         'stock_availability':
         toolz.keymap(lambda b: b.value, self.stock_availability),
         'money_by_player':
         self.money_by_player,
         'stock_by_player':
         toolz.valmap(
             lambda stock_map: toolz.keymap(lambda brand: brand.value,
                                            stock_map),
             self.stock_by_player),
         'user_data_by_id':
         self.user_data_by_id,
         'tiles_remaining':
         self.tiles_remaining,
         'cost_by_brand':
         toolz.keymap(lambda b: b.value, self.cost_by_brand),
         'inactive_brands': [brand.value for brand in self.inactive_brands],
         'active_brands': [brand.value for brand in self.active_brands],
         'most_recently_placed_tile':
         None if not self.most_recently_placed_tile else
         self.most_recently_placed_tile.to_dict(),
         'most_recent_actions':
         self.most_recent_actions,
         'acquisition_resolution_queue': [{
             'player_id':
             details['player_id'],
             'acquirer':
             details['acquirer'].value,
             'acquiree':
             details['acquiree'].value,
             'acquiree_cost_at_acquisition_time':
             details['acquiree_cost_at_acquisition_time']
         } for details in self.acquisition_resolution_queue]
     }
Пример #3
0
    def _scatter(self, data, workers=None, broadcast=False):
        """ Scatter data to local data dictionary

        Rather than send data out to the cluster we keep data local.  However
        we do report to the scheduler that the local worker has the scattered
        data.  This allows other workers to come by and steal this data if
        desired.

        Keywords like ``broadcast=`` do not work, however operations like
        ``.replicate`` work fine after calling scatter, which can fill in for
        this functionality.
        """
        with log_errors():
            if not (workers is None and broadcast is False):
                raise NotImplementedError(
                    "Scatter from worker doesn't support workers or broadcast keywords"
                )

            if isinstance(data, dict) and not all(
                    isinstance(k, (bytes, str)) for k in data):
                d = yield self._scatter(keymap(tokey, data), workers,
                                        broadcast)
                raise gen.Return({k: d[tokey(k)] for k in data})

            if isinstance(data, (list, tuple, set, frozenset)):
                keys = []
                for x in data:
                    try:
                        keys.append(tokenize(x))
                    except:
                        keys.append(str(uuid.uuid1()))
                data2 = dict(zip(keys, data))
            elif isinstance(data, dict):
                keys = set(data)
                data2 = data
            else:
                raise TypeError("Don't know how to scatter %s" % type(data))

            nbytes = valmap(sizeof, data2)

            # self.worker.data.update(data2)  # thread safety matters
            self.worker.loop.add_callback(self.worker.data.update, data2)

            yield self.scheduler.update_data(
                who_has={key: [self.worker.address]
                         for key in data2},
                nbytes=valmap(sizeof, data2),
                client=self.id)

            if isinstance(data, dict):
                out = {k: Future(k, self) for k in data}
            elif isinstance(data, (tuple, list, set, frozenset)):
                out = type(data)([Future(k, self) for k in keys])
            else:
                raise TypeError("Input to scatter must be a list or dict")

            for key in keys:
                self.futures[key].finish(type=None)

            raise gen.Return(out)
Пример #4
0
def loads(b):
    """ Transform bytestream back into Python value """
    header_length, = struct.unpack('I', b[:4])
    if header_length:
        header = msgpack.loads(b[4: header_length + 4], encoding='utf8')
    else:
        header = {}
    payload = b[header_length + 4:]

    if header.get('compression'):
        try:
            decompress = compressions[header['compression']]['decompress']
            payload = decompress(payload)
        except KeyError:
            raise ValueError("Data is compressed as %s but we don't have this"
                    " installed" % header['compression'].decode())

    msg = msgpack.loads(payload, encoding='utf8')

    if header.get('decode'):
        if isinstance(msg, dict) and msg:
            msg = keymap(bytes.decode, msg)
        elif isinstance(msg, bytes):
            msg = msg.decode()
        else:
            raise TypeError("Asked to decode a %s" % type(msg).__name__)

    return msg
Пример #5
0
def loads(b):
    """ Transform bytestream back into Python value """
    header_length, = struct.unpack('I', b[:4])
    if header_length:
        header = msgpack.loads(b[4:header_length + 4], encoding='utf8')
    else:
        header = {}
    payload = b[header_length + 4:]

    if header.get('compression'):
        try:
            decompress = compressions[header['compression']]['decompress']
            payload = decompress(payload)
        except KeyError:
            raise ValueError("Data is compressed as %s but we don't have this"
                             " installed" % header['compression'].decode())

    msg = msgpack.loads(payload, encoding='utf8')

    if header.get('decode'):
        if isinstance(msg, dict) and msg:
            msg = keymap(bytes.decode, msg)
        elif isinstance(msg, bytes):
            msg = msg.decode()
        else:
            raise TypeError("Asked to decode a %s" % type(msg).__name__)

    return msg
Пример #6
0
    def create_branch(self,
                      branch_name,
                      files,
                      parents=[],
                      message='',
                      signature=None):
        # 1. create tree
        files = toolz.keymap(lambda path: tuple(path.split('/')), files)
        files = unflatten(files)
        tree_id = self.create_tree(files)

        # 2. create commit with the tree created above
        # TODO(kszucs): pass signature explicitly
        author = committer = self.signature
        commit_id = self.repo.create_commit(None, author, committer, message,
                                            tree_id, parents)
        commit = self.repo[commit_id]

        # 3. create branch pointing to the previously created commit
        branch = self.repo.create_branch(branch_name, commit)

        # append to the pushable references
        self._updated_refs.append('refs/heads/{}'.format(branch_name))

        return branch
Пример #7
0
 def requirejson_wrapper(*args, **kwargs):
   
   # TODO(vishesh): malformed JSON gives 500 error, should give 400,
   # can't seem to catch the ValueError from json.loads
   try:
     # GET/DELETE have no body. PUT/PATCH/POST have bodies.
     r = None
     if (request.method in ['GET', 'DELETE'] or
           (request.method in ['POST', 'PUT', 'PATCH'] and
                'json' not in request.content_type)):
       r = {k: request.params[k] for k in request.params}
     else:
       r = request.json
   except ValueError as e:
     jsonabort(400, ('Request should be parseable json, got error: '
                     '' + str(e.args)))
   
   if r == None:
     # the only time that r will be None is if the json part fails.
     # request.params being empty will give an empty dictionary instead,
     # so this logic is okay (don't need to change the expected
     # content-type based on the request method).
     jsonabort(400, ('Content-Type should be application/json, got '
                     '' + str(request.content_type)))
   
   if type(r) is not dict:
     jsonabort(400, 'Request must be a JSON object, not {}'.format(
       typename(r)))
   
   if not all(k in r for k in keys):
     jsonabort(400, 'Request is missing keys: ' +
               str(list(set(keys) - r.keys())))
   
   if strict and not all(p in keys or p in opts for p in r):
     # since we know that all k in keys is present in r
     # if the lengths are unequal then for sure there are extra keys.
     jsonabort(400, 'Strict mode: request has unrecognized keys: ' +
               str(list(r.keys() - set(keys))))
   
   p = t.keymap(lambda k: k.replace('-', '_'), t.merge(opts, r))
   
   # python 3.5+ type checking, replace known types with variables.
   if sys.version_info[0] >= 3 and sys.version_info[1] >= 5:
     ann = req_fun.__annotations__
     for (k, v) in p.items():
       if k in ann:
         try:
           p[k] = ann[k](v)
         except:
           jsonabort(400,
                     'Parameter {} should be type {}, got {}'.format(
                       k, ann[k], type(v)))
   
   overlap = set(kwargs) & set(p)
   if len(overlap) > 0:
     raise ValueError(
       'keyword args being clobbered by json params: ' + str(overlap))
   
   return req_fun(*args, **t.merge(kwargs, p))
Пример #8
0
    def test_novel_deltas_macro(self):
        asset_info = asset_infos[0][0]
        base_dates = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-04')
        ])
        baseline = pd.DataFrame({
            'value': (0, 1),
            'asof_date': base_dates,
            'timestamp': base_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-03': repeat_last_axis(
                np.array([10.0, 10.0, 10.0]),
                nassets,
            ),
            '2014-01-06': repeat_last_axis(
                np.array([10.0, 10.0, 11.0]),
                nassets,
            ),
        })

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Пример #9
0
 def _sniff_dialect(self, path):
     kwargs = self._kwargs
     dialect = sniff_dialect(path, self._sniff_nbytes,
                             encoding=self.encoding)
     kwargs = merge(dialect, keymap(alias, kwargs))
     return valfilter(lambda x: x is not None,
                      dict((d, kwargs[d])
                           for d in dialect_terms if d in kwargs))
Пример #10
0
 def _sniff_dialect(self, path):
     kwargs = self._kwargs
     dialect = sniff_dialect(path, self._sniff_nbytes,
                             encoding=self.encoding)
     kwargs = merge(dialect, keymap(alias, kwargs))
     return valfilter(lambda x: x is not None,
                      dict((d, kwargs[d])
                           for d in dialect_terms if d in kwargs))
Пример #11
0
    def _scatter(self, data, workers=None, broadcast=False):
        """ Scatter data to local data dictionary

        Rather than send data out to the cluster we keep data local.  However
        we do report to the scheduler that the local worker has the scattered
        data.  This allows other workers to come by and steal this data if
        desired.

        Keywords like ``broadcast=`` do not work, however operations like
        ``.replicate`` work fine after calling scatter, which can fill in for
        this functionality.
        """
        with log_errors():
            if not (workers is None and broadcast is False):
                raise NotImplementedError("Scatter from worker doesn't support workers or broadcast keywords")

            if isinstance(data, dict) and not all(isinstance(k, (bytes, str))
                                                   for k in data):
                d = yield self._scatter(keymap(tokey, data), workers, broadcast)
                raise gen.Return({k: d[tokey(k)] for k in data})

            if isinstance(data, (list, tuple, set, frozenset)):
                keys = []
                for x in data:
                    try:
                        keys.append(tokenize(x))
                    except:
                        keys.append(str(uuid.uuid1()))
                data2 = dict(zip(keys, data))
            elif isinstance(data, dict):
                keys = set(data)
                data2 = data
            else:
                raise TypeError("Don't know how to scatter %s" % type(data))

            nbytes = valmap(sizeof, data2)

            # self.worker.data.update(data2)  # thread safety matters
            self.worker.loop.add_callback(self.worker.data.update, data2)

            yield self.scheduler.update_data(
                    who_has={key: [self.worker.address] for key in data2},
                    nbytes=valmap(sizeof, data2),
                    client=self.id)

            if isinstance(data, dict):
                out = {k: Future(k, self) for k in data}
            elif isinstance(data, (tuple, list, set, frozenset)):
                out = type(data)([Future(k, self) for k in keys])
            else:
                raise TypeError(
                        "Input to scatter must be a list or dict")

            for key in keys:
                self.futures[key]['status'] = 'finished'
                self.futures[key]['event'].set()

            raise gen.Return(out)
Пример #12
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex([pd.Timestamp("2014-01-01"), pd.Timestamp("2014-01-04")])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame(
            {
                "sid": self.sids * 2,
                "value": (0, 1, 2, 1, 2, 3),
                "asof_date": repeated_dates,
                "timestamp": repeated_dates,
            }
        )
        expr = bz.Data(baseline, name="expr", dshape=self.dshape)
        deltas = bz.Data(baseline, name="deltas", dshape=self.dshape)
        deltas = bz.transform(deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1))
        expected_views = keymap(
            pd.Timestamp,
            {
                "2014-01-03": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [10.0, 11.0, 12.0]]),
                "2014-01-06": np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0], [11.0, 12.0, 13.0]]),
            },
        )
        if len(asset_info) == 4:
            expected_views = valmap(lambda view: np.c_[view, [np.nan, np.nan, np.nan]], expected_views)
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex(
            [
                pd.Timestamp("2014-01-01"),
                pd.Timestamp("2014-01-02"),
                pd.Timestamp("2014-01-03"),
                # omitting the 4th and 5th to simulate a weekend
                pd.Timestamp("2014-01-06"),
            ]
        )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product(
                    (sorted(expected_views.keys()), finder.retrieve_all(asset_info.index))
                ),
                columns=("value",),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Пример #13
0
    def _scatter(self, data, workers=None, broadcast=False, direct=None):
        """ Scatter data to local data dictionary

        Rather than send data out to the cluster we keep data local.  However
        we do report to the scheduler that the local worker has the scattered
        data.  This allows other workers to come by and steal this data if
        desired.

        Keywords like ``broadcast=`` do not work, however operations like
        ``.replicate`` work fine after calling scatter, which can fill in for
        this functionality.
        """
        with log_errors():
            if not (workers is None and broadcast is False):
                raise NotImplementedError("Scatter from worker doesn't support workers or broadcast keywords")

            if isinstance(data, dict) and not all(isinstance(k, (bytes, str))
                                                   for k in data):
                d = yield self._scatter(keymap(tokey, data), workers, broadcast)
                raise gen.Return({k: d[tokey(k)] for k in data})

            if isinstance(data, type(range(0))):
                data = list(data)
            input_type = type(data)
            names = False
            unpack = False
            if isinstance(data, (set, frozenset)):
                data = list(data)
            if not isinstance(data, (dict, list, tuple, set, frozenset)):
                unpack = True
                data = [data]
            if isinstance(data, (list, tuple)):
                names = list(map(tokenize, data))
                data = dict(zip(names, data))

            types = valmap(type, data)
            assert isinstance(data, dict)

            self.worker.update_data(data=data, report=False)

            yield self.scheduler.update_data(
                    who_has={key: [self.worker.address] for key in data},
                    nbytes=valmap(sizeof, data),
                    client=self.id)

            out = {k: self._Future(k, self) for k in data}
            for key, typ in types.items():
                self.futures[key].finish(type=typ)

            if issubclass(input_type, (list, tuple, set, frozenset)):
                out = input_type(out[k] for k in names)

            if unpack:
                assert len(out) == 1
                out = list(out.values())[0]
            raise gen.Return(out)
Пример #14
0
    def test_deltas(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = bz.Data(self.df, dshape=self.dshape)
        deltas = bz.Data(
            odo(
                bz.transform(
                    deltas,
                    value=deltas.value + 10,
                    timestamp=deltas.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )

        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': np.array([[10.0, 11.0, 12.0],
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[11.0, 12.0, 13.0],
                                    [2.0, 3.0, 4.0]]),
            '2014-01-04': np.array([[12.0, 13.0, 14.0],
                                    [12.0, 13.0, 14.0]]),
        })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Пример #15
0
    def read(cls, rootdir):
        path = cls.metadata_path(rootdir)
        with open(path) as fp:
            raw_data = json.load(fp)

            try:
                version = raw_data['version']
            except KeyError:
                # Version was first written with version 1, assume 0,
                # if version does not match.
                version = 0

            default_ohlc_ratio = raw_data['ohlc_ratio']

            if version >= 1:
                minutes_per_day = raw_data['minutes_per_day']
            else:
                # version 0 always assumed US equities.
                minutes_per_day = US_EQUITIES_MINUTES_PER_DAY

            if version >= 2:
                calendar = get_calendar(raw_data['calendar_name'])
                start_session = pd.Timestamp(raw_data['start_session'],
                                             tz='UTC')
                end_session = pd.Timestamp(raw_data['end_session'], tz='UTC')
            else:
                # No calendar info included in older versions, so
                # default to SZSH.
                # # SZSH
                calendar = get_calendar()

                start_session = pd.Timestamp(raw_data['first_trading_day'],
                                             tz='UTC')
                end_session = calendar.minute_to_session_label(
                    pd.Timestamp(raw_data['market_closes'][-1],
                                 unit='m',
                                 tz='UTC'))

            if version >= 3:
                ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid']
                if ohlc_ratios_per_sid is not None:
                    ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid)
            else:
                ohlc_ratios_per_sid = None

            return cls(
                default_ohlc_ratio,
                ohlc_ratios_per_sid,
                calendar,
                start_session,
                end_session,
                minutes_per_day,
                version=version,
            )
Пример #16
0
    def test_deltas(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = bz.Data(self.df, dshape=self.dshape)
        deltas = bz.Data(
            odo(
                bz.transform(
                    deltas,
                    value=deltas.value + 10,
                    timestamp=deltas.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )

        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]),
                '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]),
                '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]
                                        ]),
            })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([12] * nassets, [13] * nassets, [14] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Пример #17
0
 def probabilities(self, probabilities: dict):
     if probabilities is None:
         self._probabilities = None
     elif isinstance(probabilities, str):
         self._probabilities = {validate_str(probabilities): 1.0}
     elif isinstance(probabilities, dict):
         if abs(sum(probabilities.values()) - 1.0) > 1.0e-9:
             raise ValueError("probabilities must sum to 1.0")
         self._probabilities = keymap(validate_str, probabilities)
     else:
         raise TypeError("probabilities must be dict or single value")
Пример #18
0
    def read(cls, rootdir):
        path = cls.metadata_path(rootdir)
        with open(path) as fp:
            raw_data = json.load(fp)

            try:
                version = raw_data["version"]
            except KeyError:
                # Version was first written with version 1, assume 0,
                # if version does not match.
                version = 0

            default_ohlc_ratio = raw_data["ohlc_ratio"]

            if version >= 1:
                minutes_per_day = raw_data["minutes_per_day"]
            else:
                # version 0 always assumed US equities.
                minutes_per_day = US_EQUITIES_MINUTES_PER_DAY

            if version >= 2:
                calendar = get_calendar(raw_data["calendar_name"])
                start_session = pd.Timestamp(raw_data["start_session"],
                                             tz="UTC")
                end_session = pd.Timestamp(raw_data["end_session"], tz="UTC")
            else:
                # No calendar info included in older versions, so
                # default to NYSE.
                calendar = get_calendar("XNYS")

                start_session = pd.Timestamp(raw_data["first_trading_day"],
                                             tz="UTC")
                end_session = calendar.minute_to_session_label(
                    pd.Timestamp(raw_data["market_closes"][-1],
                                 unit="m",
                                 tz="UTC"))

            if version >= 3:
                ohlc_ratios_per_sid = raw_data["ohlc_ratios_per_sid"]
                if ohlc_ratios_per_sid is not None:
                    ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid)
            else:
                ohlc_ratios_per_sid = None

            return cls(
                default_ohlc_ratio,
                ohlc_ratios_per_sid,
                calendar,
                start_session,
                end_session,
                minutes_per_day,
                version=version,
            )
Пример #19
0
    def read(cls, rootdir):
        path = cls.metadata_path(rootdir)
        with open(path) as fp:
            raw_data = json.load(fp)

            try:
                version = raw_data['version']
            except KeyError:
                # Version was first written with version 1, assume 0,
                # if version does not match.
                version = 0

            default_ohlc_ratio = raw_data['ohlc_ratio']

            if version >= 1:
                minutes_per_day = raw_data['minutes_per_day']
            else:
                # version 0 always assumed US equities.
                minutes_per_day = US_EQUITIES_MINUTES_PER_DAY

            if version >= 2:
                calendar = get_calendar(raw_data['calendar_name'])
                start_session = pd.Timestamp(
                    raw_data['start_session'], tz='UTC')
                end_session = pd.Timestamp(raw_data['end_session'], tz='UTC')
            else:
                # No calendar info included in older versions, so
                # default to NYSE.
                calendar = get_calendar('NYSE')

                start_session = pd.Timestamp(
                    raw_data['first_trading_day'], tz='UTC')
                end_session = calendar.minute_to_session_label(
                    pd.Timestamp(
                        raw_data['market_closes'][-1], unit='m', tz='UTC')
                )

            if version >= 3:
                ohlc_ratios_per_sid = raw_data['ohlc_ratios_per_sid']
                if ohlc_ratios_per_sid is not None:
                    ohlc_ratios_per_sid = keymap(int, ohlc_ratios_per_sid)
            else:
                ohlc_ratios_per_sid = None

            return cls(
                default_ohlc_ratio,
                ohlc_ratios_per_sid,
                calendar,
                start_session,
                end_session,
                minutes_per_day,
                version=version,
            )
Пример #20
0
Файл: csv.py Проект: Will-So/odo
 def __init__(self, path, has_header='no-input', encoding='utf-8', **kwargs):
     self.path = path
     if has_header == 'no-input':
         if not os.path.exists(path):
             self.has_header = True
         else:
             self.has_header = None
     else:
         self.has_header = has_header
     self.encoding = encoding
     kwargs = keymap(alias, kwargs)
     self.dialect = dict((d, kwargs[d]) for d in dialect_terms
                                        if d in kwargs)
Пример #21
0
    def test_deltas_only_one_delta_in_universe(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = pd.DataFrame({
            'sid': [65, 66],
            'asof_date': [self.dates[1], self.dates[0]],
            'timestamp': [self.dates[2], self.dates[1]],
            'value': [10, 11],
        })
        deltas = bz.Data(deltas, name='deltas', dshape=self.dshape)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': np.array([[0.0, 11.0, 2.0],
                                    [1.0, 2.0, 3.0]]),
            '2014-01-03': np.array([[10.0, 2.0, 3.0],
                                    [2.0, 3.0, 4.0]]),
            '2014-01-04': np.array([[2.0, 3.0, 4.0],
                                    [2.0, 3.0, 4.0]]),
        })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                columns=[
                    'value',
                ],
                data=np.array([11, 10, 4]).repeat(len(asset_info.index)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Пример #22
0
Файл: csv.py Проект: gwulfs/odo
 def __init__(self, path, has_header=None, encoding='utf-8',
              sniff_nbytes=10000, **kwargs):
     self.path = path
     if has_header is None:
         self.has_header = (not os.path.exists(path) or
                            infer_header(path, sniff_nbytes))
     else:
         self.has_header = has_header
     self.encoding = encoding if encoding is not None else 'utf-8'
     kwargs = merge(sniff_dialect(path, sniff_nbytes, encoding=encoding),
                    keymap(alias, kwargs))
     self.dialect = valfilter(bool,
                              dict((d, kwargs[d])
                                   for d in dialect_terms if d in kwargs))
Пример #23
0
    def from_dict(state_data):
        new_state = GameState(state_data['title'])

        new_state.is_started = state_data['is_started']
        new_state.grid = GameState._build_grid_from_firestore_map(
            state_data['grid'])
        new_state.player_order = state_data['player_order']
        new_state.current_turn_player = state_data['current_turn_player']
        new_state.current_action_player = state_data['current_action_player']
        new_state.current_action_type = ActionType(
            state_data['current_action_type'])
        new_state.current_action_details = state_data['current_action_details']
        new_state.stock_availability = {
            Brand(brand): stock_count
            for brand, stock_count in state_data['stock_availability'].items()
        }
        new_state.money_by_player = state_data['money_by_player']
        new_state.stock_by_player = {
            player: {
                Brand(brand_value): amount
                for brand_value, amount in stock_map.items()
            }
            for player, stock_map in state_data['stock_by_player'].items()
        }
        new_state.user_data_by_id = state_data['user_data_by_id']
        new_state.tiles_remaining = state_data['tiles_remaining']
        new_state.cost_by_brand = toolz.keymap(Brand,
                                               state_data['cost_by_brand'])
        new_state.inactive_brands = [
            Brand(brand_value) for brand_value in state_data['inactive_brands']
        ]
        new_state.active_brands = [
            Brand(brand_value) for brand_value in state_data['active_brands']
        ]
        new_state.most_recently_placed_tile = None if not state_data[
            'most_recently_placed_tile'] else Tile(
                **state_data['most_recently_placed_tile'])
        new_state.most_recent_actions = state_data['most_recent_actions']
        new_state.acquisition_resolution_queue = [{
            'player_id':
            details['player_id'],
            'acquirer':
            Brand(details['acquirer']),
            'acquiree':
            Brand(details['acquiree']),
            'acquiree_cost_at_acquisition_time':
            details['acquiree_cost_at_acquisition_time']
        } for details in state_data['acquisition_resolution_queue']]

        return new_state
Пример #24
0
    def test_deltas_only_one_delta_in_universe(self, asset_info):
        expr = bz.Data(self.df, name='expr', dshape=self.dshape)
        deltas = pd.DataFrame({
            'sid': [65, 66],
            'asof_date': [self.dates[1], self.dates[0]],
            'timestamp': [self.dates[2], self.dates[1]],
            'value': [10, 11],
        })
        deltas = bz.Data(deltas, name='deltas', dshape=self.dshape)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': np.array([[0.0, 11.0, 2.0], [1.0, 2.0, 3.0]]),
                '2014-01-03': np.array([[10.0, 2.0, 3.0], [2.0, 3.0, 4.0]]),
                '2014-01-04': np.array([[2.0, 3.0, 4.0], [2.0, 3.0, 4.0]]),
            })

        nassets = len(asset_info)
        if nassets == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan]],
                expected_views,
            )

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                columns=[
                    'value',
                ],
                data=np.array([11, 10, 4]).repeat(len(asset_info.index)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
            )
            dates = self.dates
            dates = dates.insert(len(dates), dates[-1] + timedelta(days=1))
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Пример #25
0
 def __init__(self,
              path,
              has_header='no-input',
              encoding='utf-8',
              **kwargs):
     self.path = path
     if has_header == 'no-input':
         if not os.path.exists(path):
             self.has_header = True
         else:
             self.has_header = None
     else:
         self.has_header = has_header
     self.encoding = encoding
     kwargs = keymap(alias, kwargs)
     self.dialect = dict(
         (d, kwargs[d]) for d in dialect_terms if d in kwargs)
Пример #26
0
 def __init__(self,
              path,
              has_header=None,
              encoding='utf-8',
              sniff_nbytes=10000,
              **kwargs):
     self.path = path
     if has_header is None:
         self.has_header = (not os.path.exists(path)
                            or infer_header(path, sniff_nbytes))
     else:
         self.has_header = has_header
     self.encoding = encoding if encoding is not None else 'utf-8'
     kwargs = merge(sniff_dialect(path, sniff_nbytes, encoding=encoding),
                    keymap(alias, kwargs))
     self.dialect = valfilter(
         bool, dict((d, kwargs[d]) for d in dialect_terms if d in kwargs))
Пример #27
0
 def get_rights_positions(self, dts):
     # 获取当天为配股登记日的仓位 --- 卖出 因为需要停盘产生机会成本
     assets = set(self.positions)
     # print('ledger assets', assets)
     rights = self.position_tracker.retrieve_equity_rights(assets, dts)
     # print('ledger rights', rights)
     mapping_protocol = keymap(lambda x: x.sid, self.positions)
     # print('ledger mapping_protocol', mapping_protocol)
     union_assets = set(mapping_protocol) & set(rights.index)
     # print('ledger union_assets', union_assets)
     union_positions = keyfilter(lambda x: x in union_assets,
                                 mapping_protocol) if union_assets else None
     # print('ledger union_positions', union_positions)
     right_positions = list(
         union_positions.values()) if union_positions else []
     # print('right_positions', right_positions)
     return right_positions
Пример #28
0
def get_teams(event_comment: str) -> Teams:
    """
    Parse the event comment to figure out who was playing on which team.

    :param event_comment: The comment from meetup saying what the teams are.

    :return: Teams dict with 'Red' and 'Blue' keys, the values are lists of
             player names.
    """
    pattern = re.compile('\s*(red|blue):([^$]*)$', flags=re.IGNORECASE)
    teams = dict(
        pattern.findall(line) | to(list) | to(first)
        for line in event_comment.split('\n') if pattern.match(line))
    teams = keymap(str.title, teams)
    teams = valmap(lambda x: [y.strip() for y in x.split(',')], teams)

    return teams
Пример #29
0
def patch_cacheops(g):
    REDIS_URL = g.get('REDIS_URL')
    if not REDIS_URL:
        return

    log_setting('CACHEOPS', 'is enabled')

    g['CACHEOPS_REDIS'] = keymap(str.lower, dj_redis_url.parse(REDIS_URL))

    g['INSTALLED_APPS'].append('cacheops')

    g['CACHEOPS_DEGRADE_ON_FAILURE'] = True

    g['CACHEOPS_DEFAULTS'] = {'timeout': IN_SECONDS.FIFTEEN_MINUTES}
    g['CACHEOPS'] = {
        # Automatically cache any User.objects.get() calls for 15 minutes
        # This includes request.user or post.author access,
        # where Post.author is a foreign key to auth.User
        'auth.user': {'ops': 'get'},
        'core.user': {'ops': 'get'},

        # Automatically cache all gets and queryset fetches
        # to other django.contrib.auth models for an hour
        'auth.*': {'ops': ('fetch', 'get'), 'timeout': IN_SECONDS.ONE_HOUR},

        # Cache gets, fetches, counts and exists to Permission
        # 'all' is just an alias for ('get', 'fetch', 'count', 'exists')
        'auth.permission': {'ops': 'all', 'timeout': IN_SECONDS.ONE_HOUR},

        # Basically Never changing objects. Allow local_get (in memory)
        'event.event': {'ops': 'all', 'local_get': True},
        'ticket.tickettype': {'ops': 'all', 'local_get': True},
        'ticket.tickettier': {'ops': 'all', 'local_get': True},
        'ticket.ticketaddontype': {'ops': 'all', 'local_get': False},

        # Enable manual caching on all other models with default timeout of an hour
        # Use Post.objects.cache().get(...)
        #  or Tags.objects.filter(...).order_by(...).cache()
        # to cache particular ORM request.
        # Invalidation is still automatic
        '*.*': {'ops': (), 'timeout': IN_SECONDS.ONE_HOUR},

        # And since ops is empty by default you can rewrite last line as:
        '*.*': {'timeout': IN_SECONDS.ONE_HOUR},
    }
Пример #30
0
def validated_payouts(payouts_in):
    """
    This method validates json transactions.
    It ensures `recipient` addresses are valid ETH addresses,
    and expands `bucket` aliases into proper bucket_id's.
    """
    # swap bucket name with matching ID
    payouts = [{
        **x,
        'bucket': buckets[x['bucket']],
        'amount': float(x['amount'].replace(',', '')),
    } for x in (keymap(rename_field, y) for y in payouts_in)]

    # validate addresses
    for payout in payouts:
        validate_address(payout['recipient'])

    return payouts
Пример #31
0
def update_matches(date: datetime.date, teams: Teams,
                   winner: typing.Literal['r', 'b', 'd']) -> List[PlayerMatch]:
    """
    Update the matches json file containing a history of the matches that were
    played. If the match for a particular date already exists those values are
    overwritten with the new values.

    :param date: Date that the match took place.
    :param teams: Which players played on which colour team.
    :param winner: Which team was the winner (or draw)

    :return: The records from all matches that have been played.
    """
    winner = {'b': 'Blue', 'r': 'Red', 'd': 'Draw'}[winner]

    with open(DIR / '../data/matches.json') as f:
        matches = json.load(f)
        matches_by_date = groupby(lambda x: x['date'], matches | to(list))
        matches_by_date = keymap(
            lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date(),
            matches_by_date,
        )

    matches_by_date[date] = [{
        'date': date.strftime('%Y-%m-%d'),
        'name': name,
        'team': team,
        'points': {
            team: 3,
            'Draw': 1
        }.get(winner, 0),
    } for team, names in teams.items() for name in names]

    with open(DIR / '../data/matches.json', 'w') as f:
        matches = [
            match for date, matches in matches_by_date.items()
            for match in matches
        ]
        json.dump(sorted(matches, key=lambda x: x['date'], reverse=True),
                  f,
                  indent=2)

    return matches
Пример #32
0
    def current_portfolio_weights(self):
        """
        Compute each asset's weight in the portfolio by calculating its held
        value divided by the total value of all positions.

        Each equity's value is its price times the number of shares held. Each
        futures contract's value is its unit price times number of shares held
        times the multiplier.
        """
        if self.positions:
            # due to asset varies from tag name --- different pipelines has the same sid
            p_values = valmap(lambda x: x.last_sync_price * x.amount,
                              self.positions)
            p_values = keymap(lambda x: x.sid, p_values)
            aggregate = merge_with(sum, p_values)
            weights = pd.Series(aggregate) / self.portfolio_value
        else:
            weights = pd.Series(dtype='float')
        return weights.to_dict()
Пример #33
0
    def test_deltas_macro(self):
        asset_info = asset_infos[0][0]
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(
            self.macro_df.iloc[:-1],
            name='deltas',
            dshape=self.macro_dshape,
        )
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets),
                '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets),
            })

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            dates = self.dates
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Пример #34
0
    def test_deltas_macro(self):
        asset_info = asset_infos[0][0]
        expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(
            self.macro_df.iloc[:-1],
            name='deltas',
            dshape=self.macro_dshape,
        )
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(pd.Timestamp, {
            '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets),
            '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets),
        })

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            dates = self.dates
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=dates,
                start=dates[1],
                end=dates[-1],
                window_length=2,
                compute_fn=np.nanmax,
            )
Пример #35
0
def create_sentiment():
    sa = vader.SentimentIntensityAnalyzer()
    import unicodecsv as csv

    comments_with_sentiment = (
        toolz.merge(
            dict(
                id=d["id"],
                time=dt.datetime.utcfromtimestamp(float(d["created_utc"])).strftime("%Y-%m-%d %H:%M:%S"),
                ups=d["ups"],
                contr=d["controversiality"]
            ),
            toolz.keymap(
                lambda x: "vader_" + x,
                sa.polarity_scores(d["body"])
            ),
            dict(zip(
                ["pattern_polarity", "pattern_subjectivity"],
                pattern.sentiment(d["body"])
            ))
        )
        for d in corpus.load_json(
                p=None,  # just do all
                include_is=True,
                include_oos=True,
                #filter_deleted=False
        )
    )

    with open("data-sentiment/sentiment.csv", "w") as o:
        c = comments_with_sentiment.next()
        writer = csv.DictWriter(o, c.keys())
        writer.writeheader()
        writer.writerow(c)
        for c in comments_with_sentiment:
            writer.writerow(c)
Пример #36
0
    def test_novel_deltas_macro(self):
        asset_info = asset_infos[0][0]
        base_dates = pd.DatetimeIndex(
            [pd.Timestamp('2014-01-01'),
             pd.Timestamp('2014-01-04')])
        baseline = pd.DataFrame({
            'value': (0, 1),
            'asof_date': base_dates,
            'timestamp': base_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape)
        deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )

        nassets = len(asset_info)
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-03':
                repeat_last_axis(
                    np.array([10.0, 10.0, 10.0]),
                    nassets,
                ),
                '2014-01-06':
                repeat_last_axis(
                    np.array([10.0, 10.0, 11.0]),
                    nassets,
                ),
            })

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])
        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                list(concatv([10] * nassets, [11] * nassets)),
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Пример #37
0
    def schema(self):
        """

        Examples
        --------
        >>> from blaze import symbol
        >>> t = symbol('t', 'var * {name: string, amount: int}')
        >>> s = symbol('t', 'var * {name: string, id: int}')

        >>> join(t, s).schema
        dshape("{name: string, amount: int32, id: int32}")

        >>> join(t, s, how='left').schema
        dshape("{name: string, amount: int32, id: ?int32}")

        Overlapping but non-joined fields append _left, _right

        >>> a = symbol('a', 'var * {x: int, y: int}')
        >>> b = symbol('b', 'var * {x: int, y: int}')
        >>> join(a, b, 'x').fields
        ['x', 'y_left', 'y_right']
        """
        option = lambda dt: dt if isinstance(dt, Option) else Option(dt)

        on_left = self.on_left
        if not isinstance(on_left, list):
            on_left = on_left,

        on_right = self.on_right
        if not isinstance(on_right, list):
            on_right = on_right,

        right_types = keymap(
            dict(zip(on_right, on_left)).get,
            self.rhs.dshape.measure.dict,
        )
        joined = (
            (name, promote(dt, right_types[name], promote_option=False))
            for n, (name, dt) in enumerate(filter(
                compose(op.contains(on_left), first),
                self.lhs.dshape.measure.fields,
            ))
        )

        left = [
            (name, dt) for name, dt in zip(
                self.lhs.fields,
                types_of_fields(self.lhs.fields, self.lhs)
            ) if name not in on_left
        ]

        right = [
            (name, dt) for name, dt in zip(
                self.rhs.fields,
                types_of_fields(self.rhs.fields, self.rhs)
            ) if name not in on_right
        ]

        # Handle overlapping but non-joined case, e.g.
        left_other = set(name for name, dt in left if name not in on_left)
        right_other = set(name for name, dt in right if name not in on_right)
        overlap = left_other & right_other

        left_suffix, right_suffix = self.suffixes
        left = ((name + left_suffix if name in overlap else name, dt)
                for name, dt in left)
        right = ((name + right_suffix if name in overlap else name, dt)
                 for name, dt in right)

        if self.how in ('right', 'outer'):
            left = ((name, option(dt)) for name, dt in left)
        if self.how in ('left', 'outer'):
            right = ((name, option(dt)) for name, dt in right)

        return dshape(Record(chain(joined, left, right)))
Пример #38
0
Файл: zmq.py Проект: mindw/partd
 def append(self, data, lock=None):
     log('Client appends', self.address, str(len(data)) + ' keys')
     data = keymap(serialize_key, data)
     payload = list(chain.from_iterable(data.items()))
     self.send(b'append', payload)
Пример #39
0
        dt.Struct: np.object_,
    },
)

_numpy_dtypes = toolz.keymap(
    np.dtype,
    {
        'bool': dt.boolean,
        'int8': dt.int8,
        'int16': dt.int16,
        'int32': dt.int32,
        'int64': dt.int64,
        'uint8': dt.uint8,
        'uint16': dt.uint16,
        'uint32': dt.uint32,
        'uint64': dt.uint64,
        'float16': dt.float16,
        'float32': dt.float32,
        'float64': dt.float64,
        'double': dt.double,
        'unicode': dt.string,
        'str': dt.string,
        'datetime64': dt.timestamp,
        'datetime64[ns]': dt.timestamp,
        'timedelta64': dt.interval,
        'timedelta64[ns]': dt.Interval('ns'),
    },
)

_inferable_pandas_dtypes = {
    'boolean': dt.boolean,
    'string': dt.string,
Пример #40
0
MAPPING = {'object': 'String',
           'uint64': 'UInt64',
           'uint32': 'UInt32',
           'uint16': 'UInt16',
           'uint8': 'UInt8',
           'float64': 'Float64',
           'float32': 'Float32',
           'int64': 'Int64',
           'int32': 'Int32',
           'int16': 'Int16',
           'int8': 'Int8',
           'datetime64[D]': 'Date',
           'datetime64[ns]': 'DateTime'}

PD2CH = keymap(np.dtype, MAPPING)
CH2PD = itemmap(reversed, MAPPING)
CH2PD['Null'] = 'object'
CH2PD['Nothing'] = 'object'

NULLABLE_COLS = ['UInt64', 'UInt32', 'UInt16', 'UInt8', 'Float64', 'Float32',
                 'Int64', 'Int32', 'Int16', 'Int8', 'String', 'DateTime']

for col in NULLABLE_COLS:
    CH2PD['Nullable({})'.format(col)] = CH2PD[col]
PY3 = sys.version_info[0] == 3


def normalize(df, index=True):
    if index:
        df = df.reset_index()
Пример #41
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex(
            [pd.Timestamp('2014-01-01'),
             pd.Timestamp('2014-01-04')])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame({
            'sid': self.sids * 2,
            'value': (0, 1, 2, 1, 2, 3),
            'asof_date': repeated_dates,
            'timestamp': repeated_dates,
        })
        expr = bz.Data(baseline, name='expr', dshape=self.dshape)
        deltas = bz.Data(baseline, name='deltas', dshape=self.dshape)
        deltas = bz.transform(
            deltas,
            value=deltas.value + 10,
            timestamp=deltas.timestamp + timedelta(days=1),
        )
        expected_views = keymap(
            pd.Timestamp, {
                '2014-01-03':
                np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0],
                          [10.0, 11.0, 12.0]]),
                '2014-01-06':
                np.array([[10.0, 11.0, 12.0], [10.0, 11.0, 12.0],
                          [11.0, 12.0, 13.0]]),
            })
        if len(asset_info) == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan, np.nan]],
                expected_views,
            )
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value', ),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )
Пример #42
0
    },
)


_numpy_dtypes = toolz.keymap(
    np.dtype,
    {
        'bool': dt.boolean,
        'int8': dt.int8,
        'int16': dt.int16,
        'int32': dt.int32,
        'int64': dt.int64,
        'uint8': dt.uint8,
        'uint16': dt.uint16,
        'uint32': dt.uint32,
        'uint64': dt.uint64,
        'float16': dt.float16,
        'float32': dt.float32,
        'float64': dt.float64,
        'double': dt.double,
        'unicode': dt.string,
        'str': dt.string,
        'datetime64': dt.timestamp,
        'datetime64[ns]': dt.timestamp,
        'timedelta64': dt.interval,
        'timedelta64[ns]': dt.Interval('ns'),
    },
)


_inferable_pandas_dtypes = {
    'boolean': dt.boolean,
Пример #43
0
Файл: zmq.py Проект: dask/partd
 def append(self, data, lock=None):
     logger.debug('Client appends %s %s', self.address, str(len(data)) + ' keys')
     data = keymap(serialize_key, data)
     payload = list(chain.from_iterable(data.items()))
     self.send(b'append', payload)
Пример #44
0
def scalar_type(t):
    # compatibility
    return dtype(t).scalar_type()


_numpy_to_ibis = toolz.keymap(
    np.dtype, {
        'bool': boolean,
        'int8': int8,
        'int16': int16,
        'int32': int32,
        'int64': int64,
        'uint8': uint8,
        'uint16': uint16,
        'uint32': uint32,
        'uint64': uint64,
        'float16': float16,
        'float32': float32,
        'float64': float64,
        'double': double,
        'str': string,
        'datetime64': timestamp,
        'datetime64[ns]': timestamp,
        'timedelta64': interval,
        'timedelta64[ns]': Interval('ns')
    })

dtype = Dispatcher('dtype')

validate_type = dtype
Пример #45
0
    def schema(self):
        """

        Examples
        --------
        >>> from blaze import symbol
        >>> t = symbol('t', 'var * {name: string, amount: int}')
        >>> s = symbol('t', 'var * {name: string, id: int}')

        >>> join(t, s).schema
        dshape("{name: string, amount: int32, id: int32}")

        >>> join(t, s, how='left').schema
        dshape("{name: string, amount: int32, id: ?int32}")

        Overlapping but non-joined fields append _left, _right

        >>> a = symbol('a', 'var * {x: int, y: int}')
        >>> b = symbol('b', 'var * {x: int, y: int}')
        >>> join(a, b, 'x').fields
        ['x', 'y_left', 'y_right']
        """

        option = lambda dt: dt if isinstance(dt, Option) else Option(dt)

        on_left = self.on_left
        if not isinstance(on_left, list):
            on_left = on_left,

        on_right = self.on_right
        if not isinstance(on_right, list):
            on_right = on_right,

        right_types = keymap(
            dict(zip(on_right, on_left)).get,
            self.rhs.dshape.measure.dict,
        )
        joined = ((name, promote(dt, right_types[name], promote_option=False))
                  for n, (name, dt) in enumerate(
                      filter(
                          compose(op.contains(on_left), first),
                          self.lhs.dshape.measure.fields,
                      )))

        left = [(name, dt) for name, dt in zip(
            self.lhs.fields, types_of_fields(self.lhs.fields, self.lhs))
                if name not in on_left]

        right = [(name, dt) for name, dt in zip(
            self.rhs.fields, types_of_fields(self.rhs.fields, self.rhs))
                 if name not in on_right]

        # Handle overlapping but non-joined case, e.g.
        left_other = set(name for name, dt in left if name not in on_left)
        right_other = set(name for name, dt in right if name not in on_right)
        overlap = left_other & right_other

        left_suffix, right_suffix = self.suffixes
        left = ((name + left_suffix if name in overlap else name, dt)
                for name, dt in left)
        right = ((name + right_suffix if name in overlap else name, dt)
                 for name, dt in right)

        if self.how in ('right', 'outer'):
            left = ((name, option(dt)) for name, dt in left)
        if self.how in ('left', 'outer'):
            right = ((name, option(dt)) for name, dt in right)

        return dshape(Record(chain(joined, left, right)))
Пример #46
0
 def get(self):
     resp = keymap(str, valmap(sizeof, self.server.data))
     self.write(resp)
Пример #47
0
    def test_novel_deltas(self, asset_info):
        base_dates = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-04')
        ])
        repeated_dates = base_dates.repeat(3)
        baseline = pd.DataFrame({
            'sid': self.sids * 2,
            'value': (0., 1., 2., 1., 2., 3.),
            'int_value': (0, 1, 2, 1, 2, 3),
            'asof_date': repeated_dates,
            'timestamp': repeated_dates,
        })
        expr = bz.data(baseline, name='expr', dshape=self.dshape)
        deltas = bz.data(
            odo(
                bz.transform(
                    expr,
                    value=expr.value + 10,
                    timestamp=expr.timestamp + timedelta(days=1),
                ),
                pd.DataFrame,
            ),
            name='delta',
            dshape=self.dshape,
        )
        expected_views = keymap(pd.Timestamp, {
            '2014-01-03': np.array([[10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0]]),
            '2014-01-06': np.array([[10.0, 11.0, 12.0],
                                    [10.0, 11.0, 12.0],
                                    [11.0, 12.0, 13.0]]),
        })
        if len(asset_info) == 4:
            expected_views = valmap(
                lambda view: np.c_[view, [np.nan, np.nan, np.nan]],
                expected_views,
            )
            expected_output_buffer = [10, 11, 12, np.nan, 11, 12, 13, np.nan]
        else:
            expected_output_buffer = [10, 11, 12, 11, 12, 13]

        cal = pd.DatetimeIndex([
            pd.Timestamp('2014-01-01'),
            pd.Timestamp('2014-01-02'),
            pd.Timestamp('2014-01-03'),
            # omitting the 4th and 5th to simulate a weekend
            pd.Timestamp('2014-01-06'),
        ])

        with tmp_asset_finder(equities=asset_info) as finder:
            expected_output = pd.DataFrame(
                expected_output_buffer,
                index=pd.MultiIndex.from_product((
                    sorted(expected_views.keys()),
                    finder.retrieve_all(asset_info.index),
                )),
                columns=('value',),
            )
            self._run_pipeline(
                expr,
                deltas,
                expected_views,
                expected_output,
                finder,
                calendar=cal,
                start=cal[2],
                end=cal[-1],
                window_length=3,
                compute_fn=op.itemgetter(-1),
            )