Exemplo n.º 1
0
 def __iter__(self):
     offset = self.offset
     cur = self.start
     if offset._normalizeFirst:
         cur = datetools.normalize_date(cur)
     while cur <= self.end:
         yield cur
         cur = cur + offset
Exemplo n.º 2
0
 def __iter__(self):
     offset = self.offset
     cur = self.start
     if offset._normalizeFirst:
         cur = datetools.normalize_date(cur)
     while cur <= self.end:
         yield cur
         cur = cur + offset
Exemplo n.º 3
0
def load_ipython_extension(ip):
    global _loaded
    if not _loaded:
        ip.register_magics(MDFMagics)

        # create the ambient context
        today = datetools.normalize_date(datetime.now())
        ctx = MDFContext(today)
        ctx._activate_ctx()

        _loaded = True

        print("""Use the magic function %mdf_help for a list of commands""")
Exemplo n.º 4
0
def load_ipython_extension(ip):
    global _loaded
    if not _loaded:
        ip.register_magics(MDFMagics)

        # create the ambient context
        today = datetools.normalize_date(datetime.now())
        ctx = MDFContext(today)
        ctx._activate_ctx()

        _loaded = True

        print("""Use the magic function %mdf_help for a list of commands""")
Exemplo n.º 5
0
 def mdf_reset(self, parameter_s=""):
     """
     Resets the current mdf context, and optionally sets the current date.
     
     %mdf_reset [date]
     
     eg: %mdf_reset
     or: %mdf_reset 2010-01-01
     """
     if parameter_s:
         now = _parse_datetime(parameter_s, self.shell.user_global_ns, self.shell.user_ns)
     else:
         now = datetools.normalize_date(datetime.now())
     ctx = MDFContext(now)
     ctx._activate_ctx()
Exemplo n.º 6
0
 def mdf_reset(self, parameter_s=""):
     """
     Resets the current mdf context, and optionally sets the current date.
     
     %mdf_reset [date]
     
     eg: %mdf_reset
     or: %mdf_reset 2010-01-01
     """
     if parameter_s:
         now = _parse_datetime(parameter_s, self.shell.user_global_ns,
                               self.shell.user_ns)
     else:
         now = datetools.normalize_date(datetime.now())
     ctx = MDFContext(now)
     ctx._activate_ctx()
Exemplo n.º 7
0
def generate_range(start=None, end=None, periods=None,
                   offset=datetools.BDay(), time_rule=None):
    """
    Generates a sequence of dates corresponding to the specified time
    offset. Similar to dateutil.rrule except uses pandas DateOffset
    objects to represent time increments

    Parameters
    ----------
    start : datetime (default None)
    end : datetime (default None)
    periods : int, optional

    Note
    ----
    * This method is faster for generating weekdays than dateutil.rrule
    * At least two of (start, end, periods) must be specified.
    * If both start and end are specified, the returned dates will
    satisfy start <= date <= end.

    Returns
    -------
    dates : generator object

    See also
    --------
    DateRange, dateutil.rrule
    """

    if time_rule is not None:
        offset = datetools.getOffset(time_rule)

    if time_rule is None:
        if offset in datetools._offsetNames:
            time_rule = datetools._offsetNames[offset]

    start = datetools.to_datetime(start)
    end = datetools.to_datetime(end)

    if start and not offset.onOffset(start):
        start = offset.rollforward(start)

    if end and not offset.onOffset(end):
        end = offset.rollback(end)

        if periods is None and end < start:
            end = None
            periods = 0

    if end is None:
        end = start + (periods - 1) * offset

    if start is None:
        start = end - (periods - 1) * offset

    cur = start
    if offset._normalizeFirst:
        cur = datetools.normalize_date(cur)

    while cur <= end:
        yield cur

        # faster than cur + offset
        cur = offset.apply(cur)
Exemplo n.º 8
0
    def write(self, sid, df):
        """
        Write the OHLCV data for the given sid.

        If there is no bcolz ctable yet created for the sid, create it.

        If the length of the bcolz ctable is not exactly to the date before
        the first day provided, fill the ctable with 0s up to that date.

        Writes in blocks of the size of the days times minutes per day.

        Parameters:
        -----------
        sid : int
            The asset identifer for the data being written.
        days : pd.DatetimeIndex
            The days for which to write data from the given df.
        df : pd.DataFrame
            DataFrame of market data with the following characteristics.
            columns : ('open', 'high', 'low', 'close', 'volume')
                open : float64
                high : float64
                low  : float64
                close : float64
                volume : float64|int64
            index : DatetimeIndex of market minutes.
        """
        table = self._ensure_ctable(sid)

        last_date = self.last_date_in_output_for_sid(sid)
        tds = self._trading_days
        days = tds[tds.slice_indexer(start=normalize_date(df.index[0]),
                                     end=normalize_date(df.index[-1]))]
        input_first_day = days[0]

        if last_date is pd.NaT:
            # If there is no data, determine how many days to add so that
            # desired days are written to the correct slots.
            days_to_zerofill = tds[tds.slice_indexer(end=input_first_day)]
            # Chop off the input first day.
            days_to_zerofill = days_to_zerofill[:-1]
        else:
            next_date = last_date + 1
            if next_date < input_first_day:
                # If last_date and input_first_day are not adjacent need to
                # fill in between.
                days_to_zerofill = tds[tds.slice_indexer(start=last_date + 1,
                                                         end=input_first_day)]
                # Chop off the input first day.
                days_to_zerofill = days_to_zerofill[:-1]
            elif next_date > input_first_day:
                raise BcolzMinuteOverlappingData(
                    dedent("""
                window start={0} is before expected write date={1} for
                sid={2}""".strip()).format(days[0], input_first_day, sid))
            else:
                days_to_zerofill = None

        if days_to_zerofill is not None and len(days_to_zerofill):
            self._zerofill(table, len(days_to_zerofill))

        days_to_write = tds[tds.slice_indexer(start=input_first_day,
                                              end=days[-1])]
        minutes_count = len(days_to_write) * self._minutes_per_day

        all_minutes = self._minute_index
        indexer = all_minutes.slice_indexer(start=days_to_write[0])
        all_minutes_in_window = all_minutes[indexer]

        open_col = np.zeros(minutes_count, dtype=np.uint32)
        high_col = np.zeros(minutes_count, dtype=np.uint32)
        low_col = np.zeros(minutes_count, dtype=np.uint32)
        close_col = np.zeros(minutes_count, dtype=np.uint32)
        vol_col = np.zeros(minutes_count, dtype=np.uint32)

        dt_ixs = np.searchsorted(all_minutes_in_window.values, df.index.values)

        ohlc_ratio = self._ohlc_ratio
        open_col[dt_ixs] = (df.open.values * ohlc_ratio).astype(np.uint32)
        high_col[dt_ixs] = (df.high.values * ohlc_ratio).astype(np.uint32)
        low_col[dt_ixs] = (df.low.values * ohlc_ratio).astype(np.uint32)
        close_col[dt_ixs] = (df.close.values * ohlc_ratio).astype(np.uint32)
        vol_col[dt_ixs] = df.volume.values.astype(np.uint32)

        table.append([open_col, high_col, low_col, close_col, vol_col])
        table.flush()
Exemplo n.º 9
0
def test_normalize_date():
    actual = normalize_date(datetime(2007, 10, 1, 1, 12, 5, 10))
    assert actual == datetime(2007, 10, 1)
Exemplo n.º 10
0
def test_normalize_date():
    actual = normalize_date(datetime(2007, 10, 1, 1, 12, 5, 10))
    assert actual == datetime(2007, 10, 1)
Exemplo n.º 11
0
    def write(self, sid, df):
        """
        Write the OHLCV data for the given sid.

        If there is no bcolz ctable yet created for the sid, create it.

        If the length of the bcolz ctable is not exactly to the date before
        the first day provided, fill the ctable with 0s up to that date.

        Writes in blocks of the size of the days times minutes per day.

        Parameters:
        -----------
        sid : int
            The asset identifer for the data being written.
        days : pd.DatetimeIndex
            The days for which to write data from the given df.
        df : pd.DataFrame
            DataFrame of market data with the following characteristics.
            columns : ('open', 'high', 'low', 'close', 'volume')
                open : float64
                high : float64
                low  : float64
                close : float64
                volume : float64|int64
            index : DatetimeIndex of market minutes.
        """
        table = self._ensure_ctable(sid)

        last_date = self.last_date_in_output_for_sid(sid)
        tds = self._trading_days
        days = tds[tds.slice_indexer(start=normalize_date(df.index[0]),
                                     end=normalize_date(df.index[-1]))]
        input_first_day = days[0]

        if last_date is pd.NaT:
            # If there is no data, determine how many days to add so that
            # desired days are written to the correct slots.
            days_to_zerofill = tds[tds.slice_indexer(end=input_first_day)]
            # Chop off the input first day.
            days_to_zerofill = days_to_zerofill[:-1]
        else:
            next_date = last_date + 1
            if next_date < input_first_day:
                # If last_date and input_first_day are not adjacent need to
                # fill in between.
                days_to_zerofill = tds[tds.slice_indexer(
                    start=last_date + 1,
                    end=input_first_day)]
                # Chop off the input first day.
                days_to_zerofill = days_to_zerofill[:-1]
            elif next_date > input_first_day:
                raise BcolzMinuteOverlappingData(dedent("""
                window start={0} is before expected write date={1} for
                sid={2}""".strip()).format(days[0], input_first_day, sid))
            else:
                days_to_zerofill = None

        if days_to_zerofill is not None and len(days_to_zerofill):
            self._zerofill(table, len(days_to_zerofill))

        days_to_write = tds[tds.slice_indexer(start=input_first_day,
                                              end=days[-1])]
        minutes_count = len(days_to_write) * self._minutes_per_day

        all_minutes = self._minute_index
        indexer = all_minutes.slice_indexer(start=days_to_write[0])
        all_minutes_in_window = all_minutes[indexer]

        open_col = np.zeros(minutes_count, dtype=np.uint32)
        high_col = np.zeros(minutes_count, dtype=np.uint32)
        low_col = np.zeros(minutes_count, dtype=np.uint32)
        close_col = np.zeros(minutes_count, dtype=np.uint32)
        vol_col = np.zeros(minutes_count, dtype=np.uint32)

        dt_ixs = np.searchsorted(all_minutes_in_window.values,
                                 df.index.values)

        ohlc_ratio = self._ohlc_ratio
        open_col[dt_ixs] = (df.open.values * ohlc_ratio).astype(np.uint32)
        high_col[dt_ixs] = (df.high.values * ohlc_ratio).astype(np.uint32)
        low_col[dt_ixs] = (df.low.values * ohlc_ratio).astype(np.uint32)
        close_col[dt_ixs] = (df.close.values * ohlc_ratio).astype(
            np.uint32)
        vol_col[dt_ixs] = df.volume.values.astype(np.uint32)

        table.append([
            open_col,
            high_col,
            low_col,
            close_col,
            vol_col
        ])
        table.flush()
Exemplo n.º 12
0
    def write(self, sid, df):
        """
        Write the OHLCV data for the given sid.

        If there is no bcolz ctable yet created for the sid, create it.

        If the length of the bcolz ctable is not exactly to the date before
        the first day provided, fill the ctable with 0s up to that date.

        Writes in blocks of the size of the days times minutes per day.

        Parameters:
        -----------
        sid : int
            The asset identifer for the data being written.
        days : pd.DatetimeIndex
            The days for which to write data from the given df.
        df : pd.DataFrame
            DataFrame of market data with the following characteristics.
            columns : ('open', 'high', 'low', 'close', 'volume')
                open : float64
                high : float64
                low  : float64
                close : float64
                volume : float64|int64
            index : DatetimeIndex of market minutes.
        """
        table = self._ensure_ctable(sid)

        tds = self._trading_days
        input_first_day = normalize_date(df.index[0])
        input_last_day = normalize_date(df.index[-1])

        last_date = self.last_date_in_output_for_sid(sid)

        if last_date >= input_first_day:
            raise BcolzMinuteOverlappingData(dedent("""
            Data with last_date={0} already includes input start={1} for
            sid={2}""".strip()).format(last_date, input_first_day, sid))

        day_before_input = input_first_day - tds.freq

        self.pad(sid, day_before_input)
        table = self._ensure_ctable(sid)

        days_to_write = tds[tds.slice_indexer(start=input_first_day,
                                              end=input_last_day)]

        minutes_count = len(days_to_write) * self._minutes_per_day

        all_minutes = self._minute_index
        indexer = all_minutes.slice_indexer(start=days_to_write[0])
        all_minutes_in_window = all_minutes[indexer]

        open_col = np.zeros(minutes_count, dtype=np.uint32)
        high_col = np.zeros(minutes_count, dtype=np.uint32)
        low_col = np.zeros(minutes_count, dtype=np.uint32)
        close_col = np.zeros(minutes_count, dtype=np.uint32)
        vol_col = np.zeros(minutes_count, dtype=np.uint32)

        dt_ixs = np.searchsorted(all_minutes_in_window.values,
                                 df.index.values)

        ohlc_ratio = self._ohlc_ratio
        open_col[dt_ixs] = (df.open.values * ohlc_ratio).astype(np.uint32)
        high_col[dt_ixs] = (df.high.values * ohlc_ratio).astype(np.uint32)
        low_col[dt_ixs] = (df.low.values * ohlc_ratio).astype(np.uint32)
        close_col[dt_ixs] = (df.close.values * ohlc_ratio).astype(
            np.uint32)
        vol_col[dt_ixs] = df.volume.values.astype(np.uint32)

        table.append([
            open_col,
            high_col,
            low_col,
            close_col,
            vol_col
        ])
        table.flush()
Exemplo n.º 13
0
    def __new__(
        cls,
        data=None,
        freq=None,
        start=None,
        end=None,
        periods=None,
        dtype=None,
        copy=False,
        name=None,
        tz=None,
        verify_integrity=True,
        normalize=False,
        **kwds
    ):

        warn = False
        if "offset" in kwds and kwds["offset"]:
            freq = kwds["offset"]
            warn = True

        if not isinstance(freq, datetools.DateOffset):
            freq = datetools.to_offset(freq)

        if warn:
            import warnings

            warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning)
            if isinstance(freq, basestring):
                freq = datetools.get_offset(freq)
        else:
            if isinstance(freq, basestring):
                freq = datetools.to_offset(freq)

        offset = freq

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is " "supplied")

        if data is None:
            _normalized = True

            if start is not None:
                start = Timestamp(start)
                if not isinstance(start, Timestamp):
                    raise ValueError("Failed to convert %s to timestamp" % start)

                if normalize:
                    start = datetools.normalize_date(start)
                    _normalized = True
                else:
                    _normalized = _normalized and start.time() == _midnight

            if end is not None:
                end = Timestamp(end)
                if not isinstance(end, Timestamp):
                    raise ValueError("Failed to convert %s to timestamp" % end)

                if normalize:
                    end = datetools.normalize_date(end)
                    _normalized = True
                else:
                    _normalized = _normalized and end.time() == _midnight

            start, end, tz = tools._figure_out_timezone(start, end, tz)

            if (
                offset._should_cache()
                and not (offset._normalize_cache and not _normalized)
                and datetools._naive_in_cache_range(start, end)
            ):
                index = cls._cached_range(start, end, periods=periods, offset=offset, name=name)
            else:
                index = _generate_regular_range(start, end, periods, offset)

            index = index.view(cls)
            index.name = name
            index.offset = offset
            index.tz = tz

            return index

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError(
                    "DatetimeIndex() must be called with a " "collection of some kind, %s was passed" % repr(data)
                )

            if isinstance(data, datetime):
                data = [data]

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype="O")

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data)
            else:
                data = np.asarray(data, dtype="M8[us]")

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data)
        elif issubclass(data.dtype.type, np.integer):
            subarr = np.array(data, dtype="M8[us]", copy=copy)
        elif issubclass(data.dtype.type, np.datetime64):
            subarr = np.array(data, dtype="M8[us]", copy=copy)
        else:
            subarr = np.array(data, dtype="M8[us]", copy=copy)

        # TODO: this is horribly inefficient. If user passes data + offset, we
        # need to make sure data points conform. Punting on this

        if verify_integrity:
            if offset is not None:
                for i, ts in enumerate(subarr):
                    if not offset.onOffset(Timestamp(ts)):
                        val = Timestamp(offset.rollforward(ts)).value
                        subarr[i] = val

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        return subarr