Exemplo n.º 1
0
    def test_setitem_corner(self):
        # corner case
        df = self.klass({'B' : [1., 2., 3.],
                         'C' : ['a', 'b', 'c']},
                        index=np.arange(3))
        del df['B']
        df['B'] = [1., 2., 3.]
        self.assert_('B' in df)
        self.assertEqual(len(df.columns), 1)

        df['A'] = 'beginning'
        df['E'] = 'foo'
        df['D'] = 'bar'
        df[datetime.now()] = 'date'
        df[datetime.now()] = 5.

        # what to do when empty frame with index
        dm = DataMatrix(index=self.frame.index)
        dm['A'] = 'foo'
        dm['B'] = 'bar'
        self.assertEqual(len(dm.objects.columns), 2)

        dm['C'] = 1
        self.assertEqual(len(dm.columns), 1)

        # set existing column
        dm['A'] = 'bar'
        self.assertEqual('bar', dm['A'][0])

        dm = DataMatrix(index=np.arange(3))
        dm['A'] = 1
        dm['foo'] = 'bar'
        del dm['foo']
        dm['foo'] = 'bar'
        self.assertEqual(len(dm.objects.columns), 1)
Exemplo n.º 2
0
    def createData2(self):
        y_data = [[1, np.NaN], [2, 3], [4, 5]]
        y_index = [
            datetime(2000, 1, 1),
            datetime(2000, 1, 2),
            datetime(2000, 1, 3)
        ]
        y_cols = ['A', 'B']
        self.panel_y2 = DataMatrix(np.array(y_data),
                                   index=y_index,
                                   columns=y_cols)

        x1_data = [[6, np.NaN], [7, 8], [9, 30], [11, 12]]
        x1_index = [
            datetime(2000, 1, 1),
            datetime(2000, 1, 2),
            datetime(2000, 1, 3),
            datetime(2000, 1, 4)
        ]
        x1_cols = ['A', 'B']
        x1 = DataMatrix(np.array(x1_data), index=x1_index, columns=x1_cols)

        x2_data = [[13, 14, np.NaN], [15, np.NaN, np.NaN], [16, 17, 48],
                   [19, 20, 21], [22, 23, 24]]
        x2_index = [
            datetime(2000, 1, 1),
            datetime(2000, 1, 2),
            datetime(2000, 1, 3),
            datetime(2000, 1, 4),
            datetime(2000, 1, 5)
        ]
        x2_cols = ['C', 'A', 'B']
        x2 = DataMatrix(np.array(x2_data), index=x2_index, columns=x2_cols)

        self.panel_x2 = {'x1': x1, 'x2': x2}
Exemplo n.º 3
0
    def createData3(self):
        y_data = [[1, 2],
                  [3, 4]]
        y_index = [datetime(2000, 1, 1),
                   datetime(2000, 1, 2)]
        y_cols = ['A', 'B']
        self.panel_y3 = DataMatrix(np.array(y_data), index=y_index,
                                   columns=y_cols)

        x1_data = [['A', 'B'],
                   ['C', 'A']]
        x1_index = [datetime(2000, 1, 1),
                    datetime(2000, 1, 2)]
        x1_cols = ['A', 'B']
        x1 = DataMatrix(np.array(x1_data), index=x1_index,
                        columns=x1_cols)

        x2_data = [['3.14', '1.59'],
                   ['2.65', '3.14']]
        x2_index = [datetime(2000, 1, 1),
                    datetime(2000, 1, 2)]
        x2_cols = ['A', 'B']
        x2 = DataMatrix(np.array(x2_data), index=x2_index,
                        columns=x2_cols)

        self.panel_x3 = {'x1' : x1, 'x2' : x2}
Exemplo n.º 4
0
    def test_combineFirst_mixed(self):
        a = Series(['a','b'], index=range(2))
        b = Series(range(2), index=range(2))
        f = DataMatrix({'A' : a, 'B' : b})

        a = Series(['a','b'], index=range(5, 7))
        b = Series(range(2), index=range(5, 7))
        g = DataMatrix({'A' : a, 'B' : b})

        combined = f.combineFirst(g)
Exemplo n.º 5
0
    def test_reindex_bool(self):
        frame = DataMatrix(np.ones((10, 2), dtype=bool),
                           index=np.arange(0, 20, 2),
                           columns=[0, 2])

        reindexed = frame.reindex(np.arange(10))
        self.assert_(reindexed.values.dtype == np.float_)
        self.assert_(np.isnan(reindexed[0][1]))

        reindexed = frame.reindex(columns=range(3))
        self.assert_(reindexed.values.dtype == np.float_)
        self.assert_(isnull(reindexed[1]).all())
Exemplo n.º 6
0
    def test_constructor_objects_corner(self):
        obj = {'A' : {1 : '1', 2 : '2'}}
        obj_dm = DataMatrix(obj)
        mat = np.zeros((3, 3), dtype=float)

        dm = DataMatrix(mat, index=[1, 2, 3], columns=['B', 'C', 'D'],
                        objects=obj_dm)
        assert dm.index is not obj_dm.index

        dm = DataMatrix(mat, index=[1, 2, 3], columns=['B', 'C', 'D'],
                        objects=obj)

        dm = DataMatrix(index=[1, 2, 3], objects=obj_dm)
        dm = DataMatrix(index=[1, 2, 3], objects=obj)
Exemplo n.º 7
0
def frame_query(sql, con, indexField='Time', asDataMatrix=False):
    """
    Returns a DataFrame corresponding to the result set of the query
    string.

    Optionally provide an indexField parameter to use one of the
    columns as the index. Otherwise will be 0 to len(results) - 1.

    Parameters
    ----------
    sql: string
        SQL query to be executed
    con: DB connection object, optional
    indexField: string, optional
        column name to use for the returned DataFrame object.
    """
    data = array_query(sql, con)
    if indexField is not None:
        try:
            idx = Index(data.pop(indexField))
        except KeyError:
            raise KeyError('indexField %s not found! %s' % (indexField, sql))
    else:
        idx = Index(np.arange(len(data.values()[0])))

    if asDataMatrix:
        return DataMatrix(data, index=idx)
    else:
        return DataFrame(data=data, index=idx)
Exemplo n.º 8
0
def _combine_rhs(rhs):
    """
    Glue input X variables together while checking for potential
    duplicates
    """
    series = {}

    if isinstance(rhs, Series):
        series['x'] = rhs
    elif isinstance(rhs, DataFrame):
        series = rhs.copy()
    elif isinstance(rhs, dict):
        for name, value in rhs.iteritems():
            if isinstance(value, Series):
                _safe_update(series, {name: value})
            elif isinstance(value, (dict, DataFrame)):
                _safe_update(series, value)
            else:
                raise Exception('Invalid RHS data type: %s' % type(value))
    else:
        raise Exception('Invalid RHS type: %s' % type(rhs))

    if not isinstance(series, DataFrame):
        series = DataMatrix(series)

    return series
Exemplo n.º 9
0
def bucketcat(series, cats):
    """
    Produce DataMatrix representing quantiles of a Series

    Parameters
    ----------
    series : Series
    cat : Series or same-length array
        bucket by category; mutually exxlusive with 'by'

    Returns
    -------
    DataMatrix
    """
    if not isinstance(series, Series):
        series = Series(series, index=np.arange(len(series)))

    cats = np.asarray(cats)

    unique_labels = np.unique(cats)
    unique_labels = unique_labels[notnull(unique_labels)]

    # group by
    data = {}

    for i, label in enumerate(unique_labels):
        data[label] = series[cats == label]

    return DataMatrix(data, columns=unique_labels)
Exemplo n.º 10
0
def bucket(series, k, by=None):
    """
    Produce DataMatrix representing quantiles of a Series

    Parameters
    ----------
    series : Series
    k : int
        number of quantiles
    by : Series or same-length array
        bucket by value

    Returns
    -------
    DataMatrix
    """
    if by is None:
        by = series
    else:
        by = by.reindex(series.index)

    split = _split_quantile(by, k)
    mat = np.empty((len(series), k), dtype=float) * np.NaN

    for i, v in enumerate(split):
        mat[:, i][v] = series.take(v)

    return DataMatrix(mat, index=series.index, columns=np.arange(k) + 1)
Exemplo n.º 11
0
    def testWithWeights(self):
        data = np.arange(10).reshape((5, 2))
        index = [
            datetime(2000, 1, 1),
            datetime(2000, 1, 2),
            datetime(2000, 1, 3),
            datetime(2000, 1, 4),
            datetime(2000, 1, 5)
        ]
        cols = ['A', 'B']
        weights = DataMatrix(data, index=index, columns=cols)

        result = ols(y=self.panel_y2, x=self.panel_x2, weights=weights)

        assert_almost_equal(result._y_trans.values.flat, [0, 16, 25])

        exp_x = [[0, 0, 0], [36, 68, 4], [150, 240, 5]]
        assert_almost_equal(result._x_trans.values, exp_x)

        exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1],
                          [12, 21, 1]]
        #         exp_x_filtered = [[0, 0, 0],
        #                           [36, 68, 4],
        #                           [150, 240, 5],
        #                           [66, 120, 6],
        #                           [84, 147, 7]]

        assert_almost_equal(result._x_filtered.values, exp_x_filtered)
Exemplo n.º 12
0
    def var_beta(self):
        """Returns the covariance of beta."""
        result = {}
        result_index = self._result_index
        for i in xrange(len(self._var_beta_raw)):
            dm = DataMatrix(self._var_beta_raw[i],
                            columns=self.beta.cols(),
                            index=self.beta.cols())
            result[result_index[i]] = dm

        return WidePanel.fromDict(result, intersect=False)
Exemplo n.º 13
0
def _cat_labels(labels):
    # group by
    data = {}

    unique_labels = np.unique(labels)
    unique_labels = unique_labels[notnull(unique_labels)]

    for label in unique_labels:
        mask = labels == label
        data[stringified] = series[mask]

    return DataMatrix(data, index=series.index)
Exemplo n.º 14
0
    def test_constructor_with_objects(self):
        index = self.mixed_frame.index[:5]

        dm = DataMatrix(data=None, index=index,
                        objects=self.mixed_frame.objects)
        self.assert_(dm.index is index)
        self.assert_(dm.objects.index is index)

        dm = DataMatrix(data=None, index=index,
                        objects=self.mixed_frame.objects._series)
        self.assert_(dm.index is index)
        self.assert_(dm.objects.index is index)

        index = self.mixed_frame.index
        dm = DataMatrix(data=None, index=index,
                        objects=self.mixed_frame.objects)
        self.assert_(dm.index is index)
        self.assert_(dm.objects.index is index)

        index = self.mixed_frame.index
        dm = DataMatrix(objects=self.mixed_frame.objects)
        self.assert_(dm.index is self.mixed_frame.index)

        # take dict of objects
        index = self.mixed_frame.index
        dm = DataMatrix(data={}, objects=self.mixed_frame.objects._series)
        self.assert_(isinstance(dm.objects, DataMatrix))
        self.assert_(dm.index is dm.objects.index)

        index = self.mixed_frame.index
        dm = DataMatrix(objects=self.mixed_frame.objects._series)
        self.assert_(isinstance(dm.objects, DataMatrix))
        self.assert_(dm.index is dm.objects.index)

        index = self.mixed_frame.index
        dm = DataMatrix(data=self.frame._series,
                        objects=self.mixed_frame.objects._series)
        self.assert_(isinstance(dm.objects, DataMatrix))
        self.assert_(dm.objects.columns.equals(
                self.mixed_frame.objects.columns))

        objs = DataMatrix({'bar' : ['bar'] * len(self.mixed_frame)})
        dm = DataMatrix(self.mixed_frame._series, objects=objs)
        self.assert_('foo' in dm.objects)
Exemplo n.º 15
0
    def test_more_constructor(self):
        arr = randn(10)
        dm = self.klass(arr, columns=['A'], index=np.arange(10))
        self.assertEqual(dm.values.ndim, 2)

        arr = randn(0)
        dm = self.klass(arr)
        self.assertEqual(dm.values.ndim, 2)
        self.assertEqual(dm.values.ndim, 2)

        # no data specified
        dm = self.klass(columns=['A', 'B'], index=np.arange(10))
        self.assertEqual(dm.values.shape, (10, 2))

        dm = self.klass(columns=['A', 'B'])
        self.assertEqual(dm.values.shape, (0, 2))

        dm = self.klass(index=np.arange(10))
        self.assertEqual(dm.values.shape, (10, 0))

        # corner, silly
        self.assertRaises(Exception, self.klass, (1, 2, 3))

        # can't cast
        mat = np.array(['foo', 'bar'], dtype=object).reshape(2, 1)
        df = DataMatrix(mat, index=[0, 1], columns=[0], dtype=float)
        self.assert_(df.values.dtype == np.object_)

        dm = self.klass(DataFrame(self.frame._series))
        common.assert_frame_equal(dm, self.frame)

        # int cast
        dm = DataMatrix({'A' : np.ones(10, dtype=int),
                         'B' : np.ones(10, dtype=float)},
                        index=np.arange(10))

        self.assertEqual(len(dm.columns), 2)
        self.assert_(dm.values.dtype == np.float_)
Exemplo n.º 16
0
    def test_setitem_ambig(self):
        # difficulties with mixed-type data
        from decimal import Decimal

        # created as float type
        dm = DataMatrix(index=range(3), columns=range(3))

        coercable_series = Series([Decimal(1) for _ in range(3)],
                                  index=range(3))
        uncoercable_series = Series(['foo', 'bzr', 'baz'], index=range(3))

        dm[0] = np.ones(3)
        self.assertEqual(len(dm.cols()), 3)
        self.assert_(dm.objects is None)

        dm[1] = coercable_series
        self.assertEqual(len(dm.cols()), 3)
        self.assert_(dm.objects is None)

        dm[2] = uncoercable_series
        self.assertEqual(len(dm.cols()), 3)
        self.assert_(dm.objects is not None)
        self.assert_(2 in dm.objects)
        self.assert_(2 not in dm.columns)
Exemplo n.º 17
0
    def setUp(self):
        arr = randn(N)
        arr[self._nan_locs] = np.NaN

        self.arr = arr
        self.rng = DateRange(datetime(2009, 1, 1), periods=N)

        self.series = Series(arr.copy(), index=self.rng)

        self.frame = DataFrame(randn(N, K),
                               index=self.rng,
                               columns=np.arange(K))

        self.matrix = DataMatrix(randn(N, K),
                                 index=self.rng,
                                 columns=np.arange(K))
Exemplo n.º 18
0
    def checkDataSet(self, dataset, start=None, end=None, skip_moving=False):
        exog = dataset.exog[start:end]
        endog = dataset.endog[start:end]
        x = DataMatrix(exog,
                       index=np.arange(exog.shape[0]),
                       columns=np.arange(exog.shape[1]))
        y = Series(endog, index=np.arange(len(endog)))

        self.checkOLS(exog, endog, x, y)

        if not skip_moving:
            self.checkMovingOLS('rolling', x, y)
            self.checkMovingOLS('rolling', x, y, nw_lags=0)
            self.checkMovingOLS('expanding', x, y, nw_lags=0)
            self.checkMovingOLS('rolling', x, y, nw_lags=1)
            self.checkMovingOLS('expanding', x, y, nw_lags=1)
            self.checkMovingOLS('expanding', x, y, nw_lags=1, nw_overlap=True)
Exemplo n.º 19
0
def _rollingMoment(arg, window, func, minp, time_rule=None):
    """
    Rolling statistical measure using supplied function. Designed to be
    used with passed-in Cython array-based functions.

    Parameters
    ----------
    arg :  DataFrame or numpy ndarray-like
    window : Number of observations used for calculating statistic
    func : Cython function to compute rolling statistic on raw series
    minp : int
        Minimum number of observations required to have a value
    """
    types = (DataFrame, DataMatrix, Series)
    if time_rule is not None and isinstance(arg, types):
        # Conform to whatever frequency needed.
        arg = arg.asfreq(time_rule)

    if isinstance(arg, DataMatrix):
        T, N = arg.values.shape
        resultMatrix = np.empty((T, N), dtype=arg.values.dtype)
        arg.values[np.isinf(arg.values)] = NaN
        for i in range(N):
            resultMatrix[:, i] = func(arg.values[:, i], window, minp=minp)
        output = DataMatrix(resultMatrix, index=arg.index, columns=arg.columns)

    elif isinstance(arg, DataFrame):
        output = DataFrame(index=arg.index)
        for col, series in arg.iteritems():
            series[np.isinf(series)] = NaN
            output[col] = Series(func(series, window, minp=minp),
                                 index=series.index)
    elif isinstance(arg, Series):
        arg[np.isinf(arg)] = NaN
        output = Series(func(arg, window, minp=minp), index=arg.index)
    else:
        try:
            assert (hasattr(arg, '__iter__'))
        except AssertionError:
            raise AssertionError('Expected DataFrame or array-like argument')
        arg[np.isinf(arg)] = NaN
        output = func(arg, window, minp=minp)
    return output
Exemplo n.º 20
0
    def _forecast_mean_raw(self):
        """Returns the raw covariance of beta."""
        nobs = self._nobs
        window = self._window

        # x should be ones
        dummy = DataMatrix(index=self._y.index)
        dummy['y'] = 1

        cum_xy = self._cum_xy(dummy, self._y)

        results = []
        for n, i in enumerate(self._valid_indices):
            sumy = cum_xy[i]

            if self._is_rolling and i >= window:
                sumy = sumy - cum_xy[i - window]

            results.append(sumy[0] / nobs[n])

        return np.array(results)
Exemplo n.º 21
0
def _process_data_structure(arg, kill_inf=True):
    if isinstance(arg, DataFrame):
        if isinstance(arg, DataMatrix):
            return_hook = lambda v: DataMatrix(
                v, index=arg.index, columns=arg.columns, objects=arg.objects)
        else:
            return_hook = lambda v: DataFrame(
                v, index=arg.index, columns=arg.columns)
        values = arg.values
    elif isinstance(arg, Series):
        values = arg.values
        return_hook = lambda v: Series(v, arg.index)
    else:
        return_hook = lambda v: v
        values = arg

    if not issubclass(values.dtype.type, float):
        values = values.astype(float)

    if kill_inf:
        values = values.copy()
        values[np.isinf(values)] = np.NaN

    return return_hook, values
Exemplo n.º 22
0
    def test_count_objects(self):
        dm = DataMatrix(self.mixed_frame._series)
        df = DataFrame(self.mixed_frame._series)

        common.assert_series_equal(dm.count(), df.count())
        common.assert_series_equal(dm.count(1), df.count(1))
Exemplo n.º 23
0
def makeTimeDataMatrix():
    data = getTimeSeriesData()
    return DataMatrix(data)
Exemplo n.º 24
0
def makeDataMatrix():
    data = getSeriesData()
    return DataMatrix(data)
Exemplo n.º 25
0
 def test_cumsum_corner(self):
     dm = DataMatrix(np.arange(20).reshape(4, 5),
                     index=range(4), columns=range(5))
     result = dm.cumsum()
Exemplo n.º 26
0
 def t_stat(self):
     """Returns the t-stat value."""
     return DataMatrix(self._t_stat_raw,
                       columns=self.beta.cols(),
                       index=self._result_index)
Exemplo n.º 27
0
 def std_err(self):
     """Returns the standard err values."""
     return DataMatrix(self._std_err_raw,
                       columns=self.beta.cols(),
                       index=self._result_index)
Exemplo n.º 28
0
 def p_value(self):
     """Returns the p values."""
     cols = self.beta.cols()
     return DataMatrix(self._p_value_raw,
                       columns=cols,
                       index=self._result_index)
Exemplo n.º 29
0
 def beta(self):
     """Returns the betas in Series/DataMatrix form."""
     return DataMatrix(self._beta_raw,
                       index=self._result_index,
                       columns=self._x.cols())
Exemplo n.º 30
0
 def var_beta(self):
     """Returns the variance-covariance matrix of beta."""
     return DataMatrix(self._var_beta_raw,
                       index=self.beta.index,
                       columns=self.beta.index)