Exemplo n.º 1
0
 def test_categorical_zeroes(self):
     # keep the `d` category with 0
     s = Series(pd.Categorical(
         list('bbbaac'), categories=list('abcd'), ordered=True))
     result = s.value_counts()
     expected = Series([3, 2, 1, 0], index=pd.Categorical(
         ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True))
     tm.assert_series_equal(result, expected, check_index_type=True)
Exemplo n.º 2
0
def percentileRank(frame, column=None, kind='mean'):
    """
    Return score at percentile for each point in time (cross-section)

    Parameters
    ----------
    frame: DataFrame
    column: string or Series, optional
       Column name or specific Series to compute percentiles for.
       If not provided, percentiles are computed for all values at each
       point in time. Note that this can take a LONG time.
    kind: {'rank', 'weak', 'strict', 'mean'}, optional
        This optional parameter specifies the interpretation of the
        resulting score:

        - "rank": Average percentage ranking of score.  In case of
                  multiple matches, average the percentage rankings of
                  all matching scores.
        - "weak": This kind corresponds to the definition of a cumulative
                  distribution function.  A percentileofscore of 80%
                  means that 80% of values are less than or equal
                  to the provided score.
        - "strict": Similar to "weak", except that only values that are
                    strictly less than the given score are counted.
        - "mean": The average of the "weak" and "strict" scores, often used in
                  testing.  See

                  http://en.wikipedia.org/wiki/Percentile_rank

    See also
    --------
    scipy.stats.percentileofscore

    Returns
    -------
    TimeSeries or DataFrame, depending on input
    """
    from scipy.stats import percentileofscore
    fun = lambda xs, score: percentileofscore(remove_na(xs),
                                              score, kind=kind)

    results = {}
    framet = frame.T
    if column is not None:
        if isinstance(column, Series):
            for date, xs in frame.T.iteritems():
                results[date] = fun(xs, column.get(date, NaN))
        else:
            for date, xs in frame.T.iteritems():
                results[date] = fun(xs, xs[column])
        results = Series(results)
    else:
        for column in frame.columns:
            for date, xs in framet.iteritems():
                results.setdefault(date, {})[column] = fun(xs, xs[column])
        results = DataFrame(results).T
    return results
Exemplo n.º 3
0
 def test_save_load(self):
     self.series.save('tmp1')
     self.ts.save('tmp3')
     unp_series = Series.load('tmp1')
     unp_ts = Series.load('tmp3')
     os.remove('tmp1')
     os.remove('tmp3')
     assert_series_equal(unp_series, self.series)
     assert_series_equal(unp_ts, self.ts)
Exemplo n.º 4
0
    def test_fromValue(self):
        nans = Series.fromValue(np.NaN, index=self.ts.index)
        self.assert_(nans.dtype == np.float_)

        strings = Series.fromValue('foo', index=self.ts.index)
        self.assert_(strings.dtype == np.object_)

        d = datetime.now()
        dates = Series.fromValue(d, index=self.ts.index)
        self.assert_(dates.dtype == np.object_)
Exemplo n.º 5
0
    def _nobs_raw(self):
        if self._is_rolling:
            window = self._window
        else:
            # expanding case
            window = len(self._index)

        result = Series(self._time_obs_count).rolling(window, min_periods=1).sum().values

        return result.astype(int)
Exemplo n.º 6
0
    def test_merge_int(self):
        left = Series({'a' : 1., 'b' : 2., 'c' : 3., 'd' : 4})
        right = Series({1 : 11, 2 : 22, 3 : 33})

        self.assert_(left.dtype == np.float_)
        self.assert_(issubclass(right.dtype.type, np.integer))

        merged = left.merge(right)
        self.assert_(merged.dtype == np.float_)
        self.assert_(isnull(merged['d']))
        self.assert_(not isnull(merged['c']))
Exemplo n.º 7
0
    def test_fill(self):
        ts = Series([0., 1., 2., 3., 4.], index=common.makeDateIndex(5))

        self.assert_(np.array_equal(ts, ts.fill()))

        ts[2] = np.NaN

        self.assert_(np.array_equal(ts.fill(), [0., 1., 1., 3., 4.]))
        self.assert_(np.array_equal(ts.fill(method='backfill'), [0., 1., 3., 3., 4.]))

        self.assert_(np.array_equal(ts.fill(value=5), [0., 1., 5., 3., 4.]))
Exemplo n.º 8
0
    def test_categorical(self):
        s = Series(pd.Categorical(list('aaabbc')))
        result = s.value_counts()
        expected = pd.Series([3, 2, 1], index=pd.CategoricalIndex(['a', 'b', 'c']))
        tm.assert_series_equal(result, expected, check_index_type=True)

        # preserve order?
        s = s.cat.as_ordered()
        result = s.value_counts()
        expected.index = expected.index.as_ordered()
        tm.assert_series_equal(result, expected, check_index_type=True)
Exemplo n.º 9
0
    def test_iloc_setitem_series(self):
        s = Series(np.random.randn(10), index=range(0,20,2))

        s.iloc[1] = 1
        result = s.iloc[1]
        self.assert_(result == 1)

        s.iloc[:4] = 0
        expected = s.iloc[:4]
        result = s.iloc[:4]
        assert_series_equal(result, expected)
Exemplo n.º 10
0
    def y_fitted(self):
        """Returns the fitted y values.  This equals BX."""
        if self._weights is None:
            index = self._x_filtered.index
            orig_index = index
        else:
            index = self._y.index
            orig_index = self._y_orig.index

        result = Series(self._y_fitted_raw, index=index)
        return result.reindex(orig_index)
Exemplo n.º 11
0
    def test_reindex_int(self):
        ts = self.ts[::2]
        int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)

        # this should work fine
        reindexed_int = int_ts.reindex(self.ts.index)

        # if NaNs introduced
        self.assert_(reindexed_int.dtype == np.float_)

        # NO NaNs introduced
        reindexed_int = int_ts.reindex(int_ts.index[::2])
        self.assert_(reindexed_int.dtype == np.int_)
Exemplo n.º 12
0
    def test_value_counts(self):
        np.random.seed(1234)
        from pandas.tools.tile import cut

        arr = np.random.randn(4)
        factor = cut(arr, 4)

        tm.assertIsInstance(factor, Categorical)
        result = algos.value_counts(factor)
        cats = ['(-1.194, -0.535]', '(-0.535, 0.121]', '(0.121, 0.777]',
                '(0.777, 1.433]']
        expected_index = CategoricalIndex(cats, cats, ordered=True)
        expected = Series([1, 1, 1, 1], index=expected_index)
        tm.assert_series_equal(result.sort_index(), expected.sort_index())
Exemplo n.º 13
0
    def test_firstValid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts._firstTimeWithValue()
        self.assertEqual(index, ts.index[5])

        ts[-5:] = np.NaN
        index = ts._lastTimeWithValue()
        self.assertEqual(index, ts.index[-6])

        ser = Series([], index=[])
        self.assert_(ser._lastTimeWithValue() is None)
        self.assert_(ser._firstTimeWithValue() is None)
Exemplo n.º 14
0
    def test_categorical_nans(self):
        s = Series(pd.Categorical(list('aaaaabbbcc')))  # 4,3,2,1 (nan)
        s.iloc[1] = np.nan
        result = s.value_counts()
        expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex(
            ['a', 'b', 'c'], categories=['a', 'b', 'c']))
        tm.assert_series_equal(result, expected, check_index_type=True)
        result = s.value_counts(dropna=False)
        expected = pd.Series([
            4, 3, 2, 1
        ], index=pd.CategoricalIndex(['a', 'b', 'c', np.nan]))
        tm.assert_series_equal(result, expected, check_index_type=True)

        # out of order
        s = Series(pd.Categorical(
            list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c']))
        s.iloc[1] = np.nan
        result = s.value_counts()
        expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex(
            ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True))
        tm.assert_series_equal(result, expected, check_index_type=True)

        result = s.value_counts(dropna=False)
        expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex(
            ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True))
        tm.assert_series_equal(result, expected, check_index_type=True)
Exemplo n.º 15
0
    def test_value_counts_normalized(self):
        # GH12558
        s = Series([1, 2, np.nan, np.nan, np.nan])
        dtypes = (np.float64, np.object, 'M8[ns]')
        for t in dtypes:
            s_typed = s.astype(t)
            result = s_typed.value_counts(normalize=True, dropna=False)
            expected = Series([0.6, 0.2, 0.2],
                              index=Series([np.nan, 2.0, 1.0], dtype=t))
            tm.assert_series_equal(result, expected)

            result = s_typed.value_counts(normalize=True, dropna=True)
            expected = Series([0.5, 0.5],
                              index=Series([2.0, 1.0], dtype=t))
            tm.assert_series_equal(result, expected)
Exemplo n.º 16
0
    def test_reindex_bool(self):

        # A series other than float, int, string, or object
        ts = self.ts[::2]
        bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)

        # this should work fine
        reindexed_bool = bool_ts.reindex(self.ts.index)

        # if NaNs introduced
        self.assert_(reindexed_bool.dtype == np.object_)

        # NO NaNs introduced
        reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
        self.assert_(reindexed_bool.dtype == np.bool_)
Exemplo n.º 17
0
    def test_groupby_transform(self):
        data = Series(np.arange(9) / 3, index=np.arange(9))

        index = np.arange(9)
        np.random.shuffle(index)
        data = data.reindex(index)

        grouped = data.groupby(lambda x: x // 3)

        transformed = grouped.transform(lambda x: x * x.sum())
        self.assertEqual(transformed[7], 12)

        # corner cases
        self.assertRaises(Exception, grouped.transform,
                          lambda x: x.mean())
Exemplo n.º 18
0
    def test_asfreq(self):
        ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30),
                                         datetime(2009, 11, 30),
                                         datetime(2009, 12, 31)])

        daily_ts = ts.asfreq('WEEKDAY')
        monthly_ts = daily_ts.asfreq('EOM')
        self.assert_(np.array_equal(monthly_ts, ts))

        daily_ts = ts.asfreq('WEEKDAY', fillMethod='pad')
        monthly_ts = daily_ts.asfreq('EOM')
        self.assert_(np.array_equal(monthly_ts, ts))

        daily_ts = ts.asfreq(datetools.bday)
        monthly_ts = daily_ts.asfreq(datetools.bmonthEnd)
        self.assert_(np.array_equal(monthly_ts, ts))
Exemplo n.º 19
0
    def test_merge(self):
        index, data = common.getMixedTypeDict()

        source = Series(data['B'], index=data['C'])
        target = Series(data['C'][:4], index=data['D'][:4])

        merged = target.merge(source)

        for k, v in merged.iteritems():
            self.assertEqual(v, source[target[k]])

        # input could be a dict
        merged = target.merge(source.toDict())

        for k, v in merged.iteritems():
            self.assertEqual(v, source[target[k]])
Exemplo n.º 20
0
def _filter_data(lhs, rhs):
    """
    Cleans the input for single OLS.

    Parameters
    ----------
    lhs: Series
        Dependent variable in the regression.
    rhs: dict, whose values are Series, DataFrame, or dict
        Explanatory variables of the regression.

    Returns
    -------
    Series, DataFrame
        Cleaned lhs and rhs
    """
    if not isinstance(lhs, Series):
        assert(len(lhs) == len(rhs))
        lhs = Series(lhs, index=rhs.index)

    rhs = _combine_rhs(rhs)

    rhs_valid = np.isfinite(rhs.values).sum(1) == len(rhs.columns)

    if not rhs_valid.all():
        pre_filtered_rhs = rhs[rhs_valid]
    else:
        pre_filtered_rhs = rhs

    index = lhs.index + rhs.index
    if not index.equals(rhs.index) or not index.equals(lhs.index):
        rhs = rhs.reindex(index)
        lhs = lhs.reindex(index)

        rhs_valid = np.isfinite(rhs.values).sum(1) == len(rhs.columns)

    lhs_valid = np.isfinite(lhs.values)
    valid = rhs_valid & lhs_valid

    if not valid.all():
        filt_index = rhs.index[valid]
        filtered_rhs = rhs.reindex(filt_index)
        filtered_lhs = lhs.reindex(filt_index)
    else:
        filtered_rhs, filtered_lhs = rhs, lhs

    return filtered_lhs, filtered_rhs, pre_filtered_rhs, index, valid
Exemplo n.º 21
0
    def test_groupby_transform(self):
        data = Series(np.arange(9) / 3, index=np.arange(9))

        index = np.arange(9)
        np.random.shuffle(index)
        data = data.reindex(index)

        grouped = data.groupby(lambda x: x // 3)

        transformed = grouped.transform(lambda x: x * x.sum())
        self.assertEqual(transformed[7], 12)

        transformed = grouped.transform(np.mean)
        for name, group in grouped:
            mean = group.mean()
            for idx in group.index:
                self.assertEqual(transformed[idx], mean)
Exemplo n.º 22
0
    def test_groupby(self):
        data = Series(np.arange(9) / 3, index=np.arange(9))

        index = np.arange(9)
        np.random.shuffle(index)
        data = data.reindex(index)

        grouped = data.groupby(lambda x: x // 3)

        repr(grouped.groups) # nothing else here

        for k, v in grouped:
            self.assertEqual(len(v), 3)

        agged = grouped.aggregate(np.mean)
        self.assertEqual(agged[1], 1)

        assert_series_equal(agged, grouped.agg(np.mean)) # shorthand
        assert_series_equal(agged, grouped.mean())

        assert_series_equal(grouped.agg(np.sum), grouped.sum())


        transformed = grouped.transform(lambda x: x * x.sum())
        self.assertEqual(transformed[7], 12)

        value_grouped = data.groupby(data)
        assert_series_equal(value_grouped.aggregate(np.mean), agged)

        # complex agg
        agged = grouped.aggregate([np.mean, np.std])
        agged = grouped.aggregate({'one' : np.mean,
                                   'two' : np.std})

        group_constants = {
            0 : 10,
            1 : 20,
            2 : 30
        }
        agged = grouped.agg(lambda x: group_constants[x.groupName] + x.mean())
        self.assertEqual(agged[1], 21)

        # corner cases
        self.assertRaises(Exception, grouped._aggregate_named,
                          lambda x: x * 2)
Exemplo n.º 23
0
    def test_order(self):

        ts = self.ts.copy()
        ts[:5] = np.NaN
        vals = ts.values

        result = ts.order()
        self.assert_(np.isnan(result[-5:]).all())
        self.assert_(np.array_equal(result[:-5], np.sort(vals[5:])))

        result = ts.order(missingAtEnd=False)
        self.assert_(np.isnan(result[:5]).all())
        self.assert_(np.array_equal(result[5:], np.sort(vals[5:])))

        # something object-type
        ser = Series(['A', 'B'], [1, 2])
        # no failure
        ser.order()
Exemplo n.º 24
0
    def test_interpolate(self):
        ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index)

        ts_copy = ts.copy()
        ts_copy[5:10] = np.NaN

        linear_interp = ts_copy.interpolate(method='linear')
        self.assert_(np.array_equal(linear_interp, ts))

        ord_ts = Series([d.toordinal() for d in self.ts.index],
                        index=self.ts.index).astype(float)

        ord_ts_copy = ord_ts.copy()
        ord_ts_copy[5:10] = np.NaN

        time_interp = ord_ts_copy.interpolate(method='time')
        self.assert_(np.array_equal(time_interp, ord_ts))

        # try time interpolation on a non-TimeSeries
        self.assertRaises(Exception, self.series.interpolate, method='time')
Exemplo n.º 25
0
    def test_combineFirst(self):
        series = Series(common.makeIntIndex(20).astype(float),
                        index=common.makeIntIndex(20))

        series_copy = series * 2
        series_copy[::2] = np.NaN

        # nothing used from the input
        combined = series.combineFirst(series_copy)

        self.assert_(np.array_equal(combined, series))

        # Holes filled from input
        combined = series_copy.combineFirst(series)
        self.assert_(np.isfinite(combined).all())

        self.assert_(np.array_equal(combined[::2], series[::2]))
        self.assert_(np.array_equal(combined[1::2], series_copy[1::2]))

        # mixed types
        index = common.makeStringIndex(20)
        floats = Series(common.randn(20), index=index)
        strings = Series(common.makeStringIndex(10), index=index[::2])

        combined = strings.combineFirst(floats)

        common.assert_dict_equal(strings, combined, compare_keys=False)
        common.assert_dict_equal(floats[1::2], combined, compare_keys=False)

        # corner case
        s = Series([1., 2, 3], index=[0, 1, 2])
        result = s.combineFirst(Series([], index=[]))
        assert_series_equal(s, result)
Exemplo n.º 26
0
 def p_value(self):
     """Returns the p values."""
     return Series(self._p_value_raw, index=self.beta.index)
Exemplo n.º 27
0
 def test_rank_1d(self):
     self.assertEqual(1, pmath.rank(self.series))
     self.assertEqual(0, pmath.rank(Series(0, self.series.index)))
Exemplo n.º 28
0
 def rank(self):
     return Series(self._rank_raw, index=self._result_index)
Exemplo n.º 29
0
 def nobs(self):
     return Series(self._nobs, index=self._result_index)
Exemplo n.º 30
0
 def test_reindex_bool_pad(self):
     # fail
     ts = self.ts[5:]
     bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
     filled_bool = bool_ts.reindex(self.ts.index, fillMethod='pad')
     self.assert_(isnull(filled_bool[:5]).all())
Exemplo n.º 31
0
def _bins_to_cuts_new(
    x,
    bins,
    right=True,
    labels=None,
    retbins=False,
    precision=3,
    name=None,
    include_lowest=False,
):
    x_is_series = isinstance(x, Series)
    series_index = None

    # Added this line to the original code
    bins = np.array(sorted(list(set(bins))))

    if x_is_series:
        series_index = x.index
        if name is None:
            name = x.name

    x = np.asarray(x)

    side = "left" if right else "right"
    ids = bins.searchsorted(x, side=side)

    if len(algos.unique(bins)) < len(bins):
        raise ValueError("Bin edges must be unique: %s" % repr(bins))

    if include_lowest:
        ids[x == bins[0]] = 1

    na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0)
    has_nas = na_mask.any()

    if labels is not False:
        if labels is None:
            increases = 0
            while True:
                try:
                    levels = _format_levels(bins,
                                            precision,
                                            right=right,
                                            include_lowest=include_lowest)
                except ValueError:
                    increases += 1
                    precision += 1
                    if increases >= 20:
                        raise
                else:
                    break

        else:
            if len(labels) != len(bins) - 1:
                raise ValueError("Bin labels must be one fewer than "
                                 "the number of bin edges")
            levels = labels

        levels = np.asarray(levels, dtype=object)
        np.putmask(ids, na_mask, 0)
        fac = Categorical(ids - 1, levels, ordered=True, fastpath=True)
    else:
        fac = ids - 1
        if has_nas:
            fac = fac.astype(np.float64)
            np.putmask(fac, na_mask, np.nan)

    if x_is_series:
        fac = Series(fac, index=series_index, name=name)

    if not retbins:
        return fac

    return fac, bins
Exemplo n.º 32
0
 def resid(self):
     """Returns the residuals."""
     return Series(self._resid_raw[self._valid_obs_labels],
                   index=self._result_index)
Exemplo n.º 33
0
 def r2(self):
     """Returns the r-squared values."""
     return Series(self._r2_raw, index=self._result_index)
Exemplo n.º 34
0
 def forecast_vol(self):
     return Series(self._forecast_vol_raw, index=self._result_index)
Exemplo n.º 35
0
 def forecast_mean(self):
     return Series(self._forecast_mean_raw, index=self._result_index)
Exemplo n.º 36
0
 def df_resid(self):
     """Returns the residual degrees of freedom."""
     return Series(self._df_resid_raw, index=self._result_index)
Exemplo n.º 37
0
 def df_model(self):
     """Returns the model degrees of freedom."""
     return Series(self._df_model_raw, index=self._result_index)
Exemplo n.º 38
0
    def test_warn(self):

        s = Series([1, 2, 3])
        with tm.assert_produces_warning(FutureWarning):
            algos.factorize(s, order='A')
Exemplo n.º 39
0
 def beta(self):
     """Returns the betas in Series form."""
     return Series(self._beta_raw, index=self._x.columns)
Exemplo n.º 40
0
 def y_predict(self):
     """Returns the predicted y values."""
     return Series(self._y_predict_raw[self._valid_obs_labels],
                   index=self._result_index)
Exemplo n.º 41
0
    def test_iloc_getitem_frame(self):
        """ originally from test_frame.py"""
        df = DataFrame(np.random.randn(10, 4),
                       index=range(0, 20, 2),
                       columns=range(0, 8, 2))

        result = df.iloc[2]
        exp = df.ix[4]
        assert_series_equal(result, exp)

        result = df.iloc[2, 2]
        exp = df.ix[4, 4]
        self.assert_(result == exp)

        # slice
        result = df.iloc[4:8]
        expected = df.ix[8:14]
        assert_frame_equal(result, expected)

        result = df.iloc[:, 2:3]
        expected = df.ix[:, 4:5]
        assert_frame_equal(result, expected)

        # list of integers
        result = df.iloc[[0, 1, 3]]
        expected = df.ix[[0, 2, 6]]
        assert_frame_equal(result, expected)

        result = df.iloc[[0, 1, 3], [0, 1]]
        expected = df.ix[[0, 2, 6], [0, 2]]
        assert_frame_equal(result, expected)

        # neg indicies
        result = df.iloc[[-1, 1, 3], [-1, 1]]
        expected = df.ix[[18, 2, 6], [6, 2]]
        assert_frame_equal(result, expected)

        # dups indicies
        result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
        expected = df.ix[[18, 18, 2, 6], [6, 2]]
        assert_frame_equal(result, expected)

        # with index-like
        s = Series(index=range(1, 5))
        result = df.iloc[s.index]
        expected = df.ix[[2, 4, 6, 8]]
        assert_frame_equal(result, expected)

        # out-of-bounds slice
        self.assertRaises(IndexError, df.iloc.__getitem__,
                          tuple([slice(None), slice(1, 5, None)]))
        self.assertRaises(IndexError, df.iloc.__getitem__,
                          tuple([slice(None), slice(-5, 3, None)]))
        self.assertRaises(IndexError, df.iloc.__getitem__,
                          tuple([slice(1, 11, None)]))
        self.assertRaises(IndexError, df.iloc.__getitem__,
                          tuple([slice(-11, 3, None)]))

        # try with labelled frame
        df = DataFrame(np.random.randn(10, 4),
                       index=list('abcdefghij'),
                       columns=list('ABCD'))

        result = df.iloc[1, 1]
        exp = df.ix['b', 'B']
        self.assert_(result == exp)

        result = df.iloc[:, 2:3]
        expected = df.ix[:, ['C']]
        assert_frame_equal(result, expected)

        # negative indexing
        result = df.iloc[-1, -1]
        exp = df.ix['j', 'D']
        self.assert_(result == exp)

        # out-of-bounds exception
        self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5]))

        # trying to use a label
        self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D']))
Exemplo n.º 42
0
def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs):
    """
    wrapper function to dispatch to the appropriate window functions
    wraps/unwraps ndarrays for compat

    can be removed when ndarray support is removed
    """
    is_ndarray = isinstance(arg, np.ndarray)
    if is_ndarray:
        if arg.ndim == 1:
            arg = Series(arg)
        elif arg.ndim == 2:
            arg = DataFrame(arg)
        else:
            raise AssertionError("cannot support ndim > 2 for ndarray compat")

        warnings.warn("pd.{dispatch}_{name} is deprecated for ndarrays and "
                      "will be removed "
                      "in a future version".format(dispatch=dispatch,
                                                   name=name),
                      FutureWarning,
                      stacklevel=3)

    # get the functional keywords here
    if func_kw is None:
        func_kw = []
    kwds = {}
    for k in func_kw:
        value = kwargs.pop(k, None)
        if value is not None:
            kwds[k] = value

    # how is a keyword that if not-None should be in kwds
    how = kwargs.pop('how', None)
    if how is not None:
        kwds['how'] = how

    r = getattr(arg, dispatch)(**kwargs)

    if not is_ndarray:

        # give a helpful deprecation message
        # with copy-pastable arguments
        pargs = ','.join([
            "{a}={b}".format(a=a, b=b) for a, b in kwargs.items()
            if b is not None
        ])
        aargs = ','.join(args)
        if len(aargs):
            aargs += ','

        def f(a, b):
            if is_scalar(b):
                return "{a}={b}".format(a=a, b=b)
            return "{a}=<{b}>".format(a=a, b=type(b).__name__)

        aargs = ','.join([f(a, b) for a, b in kwds.items() if b is not None])
        warnings.warn("pd.{dispatch}_{name} is deprecated for {klass} "
                      "and will be removed in a future version, replace with "
                      "\n\t{klass}.{dispatch}({pargs}).{name}({aargs})".format(
                          klass=type(arg).__name__,
                          pargs=pargs,
                          aargs=aargs,
                          dispatch=dispatch,
                          name=name),
                      FutureWarning,
                      stacklevel=3)

    result = getattr(r, name)(*args, **kwds)

    if is_ndarray:
        result = result.values
    return result
Exemplo n.º 43
0
 def std_err(self):
     """Returns the standard err values of the betas."""
     return Series(self._std_err_raw, index=self.beta.index)
Exemplo n.º 44
0
 def t_stat(self):
     """Returns the t-stat values of the betas."""
     return Series(self._t_stat_raw, index=self.beta.index)
Exemplo n.º 45
0
    def test_loc_getitem_label_list(self):

        # list of labels
        self.check_result('list lbl',
                          'loc', [0, 2, 4],
                          'ix', [0, 2, 4],
                          typs=['ints'],
                          axes=0)
        self.check_result('list lbl',
                          'loc', [3, 6, 9],
                          'ix', [3, 6, 9],
                          typs=['ints'],
                          axes=1)
        self.check_result('list lbl',
                          'loc', [4, 8, 12],
                          'ix', [4, 8, 12],
                          typs=['ints'],
                          axes=2)
        self.check_result('list lbl',
                          'loc', ['a', 'b', 'd'],
                          'ix', ['a', 'b', 'd'],
                          typs=['labels'],
                          axes=0)
        self.check_result('list lbl',
                          'loc', ['A', 'B', 'C'],
                          'ix', ['A', 'B', 'C'],
                          typs=['labels'],
                          axes=1)
        self.check_result('list lbl',
                          'loc', ['Z', 'Y', 'W'],
                          'ix', ['Z', 'Y', 'W'],
                          typs=['labels'],
                          axes=2)
        self.check_result('list lbl',
                          'loc', [2, 8, 'null'],
                          'ix', [2, 8, 'null'],
                          typs=['mixed'],
                          axes=0)
        self.check_result('list lbl',
                          'loc',
                          [Timestamp('20130102'),
                           Timestamp('20130103')],
                          'ix', [Timestamp('20130102'),
                                 Timestamp('20130103')],
                          typs=['ts'],
                          axes=0)

        # fails
        self.check_result('list lbl',
                          'loc', [0, 1, 2],
                          'indexer', [0, 1, 2],
                          typs=['empty'],
                          fails=KeyError)
        self.check_result('list lbl',
                          'loc', [0, 2, 3],
                          'ix', [0, 2, 3],
                          typs=['ints'],
                          axes=0,
                          fails=KeyError)
        self.check_result('list lbl',
                          'loc', [3, 6, 7],
                          'ix', [3, 6, 9],
                          typs=['ints'],
                          axes=1,
                          fails=KeyError)
        self.check_result('list lbl',
                          'loc', [4, 8, 10],
                          'ix', [4, 8, 12],
                          typs=['ints'],
                          axes=2,
                          fails=KeyError)

        # array like
        self.check_result('array like',
                          'loc',
                          Series(index=[0, 2, 4]).index,
                          'ix', [0, 2, 4],
                          typs=['ints'],
                          axes=0)
        self.check_result('array like',
                          'loc',
                          Series(index=[3, 6, 9]).index,
                          'ix', [3, 6, 9],
                          typs=['ints'],
                          axes=1)
        self.check_result('array like',
                          'loc',
                          Series(index=[4, 8, 12]).index,
                          'ix', [4, 8, 12],
                          typs=['ints'],
                          axes=2)
Exemplo n.º 46
0
 def resid(self):
     """Returns the residuals."""
     return Series(self._resid_raw, index=self._x.index)
Exemplo n.º 47
0
    def _window_time_obs(self):
        window_obs = Series(self._time_obs_count > 0).rolling(self._window, min_periods=1).sum().values

        window_obs[np.isnan(window_obs)] = 0
        return window_obs.astype(int)
Exemplo n.º 48
0
 def y_fitted(self):
     """Returns the fitted y values.  This equals BX."""
     result = Series(self._y_fitted_raw, index=self._y.index)
     return result.reindex(self._y_orig.index)
Exemplo n.º 49
0
 def y_fitted(self):
     """Returns the fitted y values.  This equals BX."""
     result = Series(self._y_fitted_raw, index=self._y.index)
     return result.reindex(self._y_orig.index)
Exemplo n.º 50
0
def test_quantile():
    s = Series(np.random.randn(100))

    result = algos.quantile(s, [0, .25, .5, .75, 1.])
    expected = algos.quantile(s.values, [0, .25, .5, .75, 1.])
    tm.assert_almost_equal(result, expected)
Exemplo n.º 51
0
class IndicatorAnalyst(object):
    """
    指标的统计分析类:
    1)原始数据情况分析
    2)添加不同指标后,原始数据被分成了长度为window的数据集,一种是不重复地分组,另一组是移动分组
    3)对组内的数据进行描述统计分析和整个品种的描述统计分析
    """
    def __init__(self, data_set, indicator=None):
        """
        Args:
            data_set: dict(symbol=DataFrame)或DataFrame, 待分析的数据集是一个以品种名为key,value是DataFrame或者是一个DataFrame
            indicator: Series,指标序列,默认是一个空的Series,可以通过直接设置indicator属性设置,或者在类内编写指标获得
        Notes:
            数据集的长度应当与indicator长度相同,否则会报错
        """
        self.__identify = None  # 识别标签函数对象,目前主要是_group_identify 和 _rolling_identify
        self.__indicator = None  # 当前处理的指标对象
        self.__data = None  # 当前处理的数据集对象
        self.__group = None  # 当前处理的分组对象
        self.__symbol = None  # 当前品种对象
        self.__profit = None  # 当前品种的盈亏序列
        self._data_set = data_set.copy()  # 总体数据集
        self._indicator = Series() if indicator is None else indicator
        self._ind_len = 0  # 当前处理的指标数据行数
        self._group = None
        self._profit_func = {
            FOREX_TYPE: self._forex_profit,
            STOCK_TYPE: self._stock_profit,
            FUTURE_TYPE: self._future_profit
        }

    @property
    def data_set(self):
        return self._data_set

    @property
    def group(self):
        """
        按照条件后分组的对象集合,若输入的数据集是dict,则返回dict,若是DataFrame则返回DataFrame
        """
        return self._group

    @property
    def indicator(self):
        """指标序列"""
        return self._indicator

    @indicator.setter
    def indicator(self, ind):
        """设置指标序列"""
        self._indicator = ind

    def interval_analyst(
        self,
        condition,
        symbol,
        window=200,
        rolling=False,
        profit_mode=True,
        direction=1,
        group_plot=False,
        applied_price="open",
        fig_save_path=None,
    ):
        """
         分析指标满足条件下,在之后的窗口内价格的统计信息
        Args:
            condition: [func]返回, True或False的函数对象;
            symbol:[dict or Symbol], 统计的品种对象;
            window:[int, -1, default 200] 观察窗口的大小,默认是200个bar,当rolling为True时,window取-1表示将满足条件开始的
                点直到最后一个数据归为一组;当rolling为False,window取-1时,表示每一次满足条件区间内的数据分为一组,这种情况下
                每一组的长度不相等。
            rolling: [True, False],窗口是采用滚动模式还是截断分组,默认是每组数据重叠的截断分组;
            profit_mode: [True, False],计算盈利模式
            direction: [1, -1],计算盈利时多空的方向
            group_plot: [bool, default False], 绘制每一组数据的价格,当组数很大时将会绘制的很密集
            applied_price: ["open", "low", "high", "close", default "open"],分析采用的价格
            fig_save_path: [list, str, path] ,保存图片的路径,默认存储在
        Returns:

        """
        if isinstance(self._data_set, dict):
            self._group = {}
            for key in self._data_set:
                self.__data = self._data_set[key]
                self.__indicator = self._indicator[key]
                print(u"{}的{}指标描述性统计:".format(key, self.__indicator.name))
                print(self.__indicator.describe())
                self.__group = self._interval_analyst(condition, window,
                                                      rolling)
                self.__symbol = symbol[key]
                self.group_analyst(profit_mode,
                                   direction=direction,
                                   fig_save_path=self.check_fig_path(
                                       fig_save_path, key),
                                   group_plot=group_plot,
                                   applied_price=applied_price)
                self._group[key] = self.__group

        elif isinstance(self._data_set, DataFrame):
            self.__data = self._data_set
            key = self.__data["symbol"].iat[0]
            self.__indicator = self._indicator
            print(u"{}的{}指标描述性统计:".format(self.__data.iat[0, 5],
                                          self.__indicator.name))
            print(self.__indicator.describe())
            self.__group = self._interval_analyst(condition, window, rolling)
            self.__symbol = symbol
            self.group_analyst(profit_mode,
                               direction=direction,
                               fig_save_path=self.check_fig_path(
                                   fig_save_path, key),
                               group_plot=group_plot,
                               applied_price=applied_price)
            self._group = self.__group

    def _interval_analyst(self, condition, window, rolling):
        """
        分析指标满足条件下,在之后的窗口内价格的统计信息
        Args:
            condition: 返回True或False的函数对象
            window: 观察窗口的大小,默认是200个bar
            rolling: 窗口是采用滚动模式还是截断分组,默认是每组数据重叠的截断分组
        """
        if rolling:
            self.__identify = self._roll_identify
        else:
            self.__identify = self._group_identify

        if isinstance(self.__indicator, Series):
            self._ind_len = len(self.__indicator)
            assert self._ind_len == len(self.__data), u"指标的长度应当与数据集长度相同"
        else:
            raise ValueError(u"指标类型输入错误!")
        return self.__identify(condition, window=window)

    def _roll_identify(self, condition, window):
        """将满足条件的行及随后的window个数据识别成一类,并将其下标存储在groups中"""
        groups = {}
        count = 0
        on_state = False
        for i, ind in enumerate(self.__indicator):
            if on_state:
                if condition(ind):
                    continue
                else:
                    on_state = False

            if condition(ind):
                count += 1
                on_state = True
                # 当窗口为无限长,既到数据末尾
                if window == -1:
                    groups[count] = np.arange(i, self._ind_len)
                else:
                    if i + window < self._ind_len:
                        groups[count] = np.arange(i, i + window)
                    else:
                        groups[count] = np.arange(i, self._ind_len)
        return groups

    def _group_identify(self, condition, window, less_drop_num=10):
        """
        识别满足指标条件的行,并按照1到n的标志分组,原始数据添加一列name为指标name的标志数据,当window=-1时,为取满足
        区间内的数据分为一组,且组内数据小于less_drop_num的剔除
        Args:
            condition: 指标的条件
            window: 窗口的大小
            less_drop_num: 窗口内数据太小需要剔除的临界值

        Returns:

        """
        flag_list = [np.nan] * self._ind_len
        flag = 0
        count = 0
        last_position = 0
        on_state = False
        if isinstance(self.__data, DataFrame):
            for i, _ind in enumerate(self.__indicator.values):
                if on_state:

                    # 当窗口为符合条件的区间时
                    if window == -1:
                        if not condition(_ind):
                            # 当这组数据小于less_drop_num,不考虑这个样本
                            if i - last_position < less_drop_num:
                                flag_list[last_position:i] = [np.nan] * (
                                    i - last_position)
                                flag -= 1
                            on_state = False
                        else:
                            flag_list[i] = flag
                    else:
                        if count < window:
                            count += 1
                        else:
                            count = 0
                            on_state = False
                    continue

                if condition(_ind):
                    on_state = True
                    flag += 1
                    # 当窗口为符合条件的区间时
                    if window == -1:
                        last_position = i
                        flag_list[i] = flag
                    else:
                        if (i + window) < self._ind_len:
                            flag_list[i:(i + window)] = [flag] * window

        else:
            raise ValueError("数据集的结构必须是DataFrame")
        self.__data.loc[:, self.__indicator.name] = self.__indicator
        self.__data.loc[:, "group_flag"] = flag_list
        return self.__data.groupby("group_flag", as_index=False)

    def group_analyst(self,
                      profit_mode,
                      direction=1,
                      fig_save_path=None,
                      group_plot=False,
                      applied_price="open"):
        """
        数据分组分析,默认分析的是开盘价
        Args:
            profit_mode: [bool, default False],分析盈亏或价格
            direction: [1, -1],计算盈利时多空的方向
            fig_save_path:[list, str, path] ,保存图片的路径
            group_plot: [bool, default False], 绘制每一组数据的价格,当组数很大时将会绘制的很密集
            applied_price: ["open", "low", "high", "close", default "open"],分析采用的价格
        """
        # 开始分析绘制图表
        print(u"划分的区间数为{}".format(len(self.__group)))
        group_analyst = None
        fig, axe = plt.subplots(2, 2)

        g_fig, g_axe = None, None
        if group_plot:
            g_fig, g_axe = plt.subplots()

        fig3 = None
        if isinstance(self.__group, DataFrameGroupBy):
            group_analyst = self._frame_group_analyst(profit_mode, direction,
                                                      group_plot,
                                                      applied_price, g_axe)
            fig3, fig4 = self.group_density()
        elif isinstance(self.__group, dict):
            group_analyst = self._dict_group_analyst(profit_mode, direction,
                                                     group_plot, applied_price,
                                                     g_axe)
            fig3, fig4 = self.group_density()
        group_analyst["max"].plot.hist(ax=axe[(0, 0)],
                                       title=u"最大值分布",
                                       bins=60,
                                       legend=False)
        group_analyst["min"].plot.hist(ax=axe[(0, 1)],
                                       title=u"最小值分布",
                                       bins=60,
                                       legend=False)
        group_analyst["mean"].plot.hist(ax=axe[(1, 0)],
                                        title=u"平均值分布",
                                        bins=60,
                                        legend=False)
        group_analyst["std"].plot.hist(ax=axe[(1, 1)],
                                       title=u"标准差分布",
                                       bins=60,
                                       legend=False)

        if profit_mode:
            fig1, axe1 = plt.subplots(3, 2)
            group_analyst["max"].cumsum().plot(ax=axe1[0, 0],
                                               title=u"潜在的最大盈利变动",
                                               legend=False)
            group_analyst["max"].plot(ax=axe1[0, 1],
                                      title=u"每个样本的最大盈利",
                                      legend=False)
            group_analyst["min"].cumsum().plot(ax=axe1[1, 0],
                                               title=u"潜在的最大亏损变动",
                                               legend=False)
            group_analyst["min"].plot(ax=axe1[1, 1],
                                      title=u"每个样本的最大亏损",
                                      legend=False)
            group_analyst["mean"].cumsum().plot(ax=axe1[2, 0],
                                                title=u"潜在的平均盈亏变动",
                                                legend=False)
            group_analyst["mean"].plot(ax=axe1[2, 1],
                                       title=u"每个样本的平均盈亏",
                                       legend=False)
            fig1.savefig(os.path.join(fig_save_path, u"潜在盈亏分析图.png"))

        fig2, axe2 = plt.subplots(2)
        print(u"达到最大值的所需分钟数的描述统计")
        print(group_analyst["max_arg"].describe())
        group_analyst["max_arg"].plot.hist(ax=axe2[0],
                                           title=u"达到最大值的所需时间(minute)的分布",
                                           bins=60,
                                           legend=False)
        print(u"达到最小值的所需分钟数的描述统计")
        print(group_analyst["min_arg"].describe())
        group_analyst["min_arg"].plot.hist(ax=axe2[1],
                                           title=u"达到最小值的所需时间(minute)的分布",
                                           bins=60,
                                           legend=False)

        self.save_figure(fig_obj=[fig, fig2, fig3, fig4],
                         save_path=fig_save_path,
                         fig_name=[
                             u"每一组数据的统计分布.png", u"达到极值所需时间分布.png",
                             u"概率分布随时间的演化.png", u"统计特征随时间的演化"
                         ])
        if g_fig is not None:
            g_fig.savefig(os.path.join(fig_save_path, u"窗口盈亏变动图.png"))
        plt.show()

    def _group_apply_func(self,
                          x,
                          _direction=1,
                          my_func=None,
                          arg_func=None,
                          _profit_mode=True,
                          apply_price="open",
                          in_position=1,
                          symbol=None):
        """
        DataFrameGroupBy的具体的apply函数
        Args:
            x: [Series],每一组数据
            _direction: [1, -1],方向
            my_func: [func],Series自带一些统计函数
            arg_func: [func],numpy中的函数
            _profit_mode: [bool, default True],选择分析盈亏还是价格
            apply_price: ["open", "low", "high", "close", default "open"],分析采用的价格
            in_position: [int],计算盈亏时,进场点的位置
            symbol: [Symbol], 品种对象
        Returns:
           返回一个Series
        """
        assert _direction in (1, -1), u"direction只能取1和-1"
        assert len(x) > in_position, u"每组的长度不能为{}".format(in_position)
        # 分析的数据的选择
        if _profit_mode:
            group_data = self._profit_func[symbol.symbol_type](x, symbol,
                                                               _direction,
                                                               apply_price,
                                                               in_position)
        else:
            group_data = x[apply_price]

        if arg_func is None and my_func is not None:
            return Series(my_func(group_data))
        elif arg_func is not None:
            return Series((arg_func(group_data) -
                           x[apply_price].index[in_position]).seconds / 60)
        else:
            return group_data

    @staticmethod
    def _forex_profit(x, symbol, _direction, apply_price, in_position):
        """外汇盈亏的计算"""
        if symbol.exchange_kind is USD_CURRENCY:
            return symbol.size_value * (
                _direction * (x[apply_price] - x[apply_price][in_position]) -
                symbol.slippage)
        elif symbol.exchange_kind is NO_USD_CURRENCY:
            return symbol.size_value * (
                _direction * (x[apply_price] - x[apply_price][in_position]) -
                symbol.slippage) / x[apply_price]
        else:
            # 暂时不支持交叉货币的计算
            return None

    @staticmethod
    def _future_profit(x, symbol, _direction, apply_price, in_position):
        """期货盈亏计算"""
        open_cost, close_cost = 0.0, []
        if symbol.open_cost_rate != 0.0:
            open_cost = symbol.open_cost_rate * x[apply_price][
                in_position] * symbol.size_value
        if symbol.close_cost_rate != 0.0:
            close_cost = [
                symbol.close_cost_rate * price * symbol.size_value
                for price in x[apply_price]
            ]
        return symbol.size_value * (
            _direction * (x[apply_price] - x[apply_price][in_position]) -
            symbol.slippage) - close_cost - open_cost

    @staticmethod
    def _stock_profit(x, symbol, _direction, apply_price, in_position):
        close_cost = []
        if symbol.close_cost_rate != 0.0:
            close_cost = [
                symbol.close_cost_rate * price * symbol.size_value
                for price in x[apply_price]
            ]
        return symbol.size_value * (
            _direction * (x[apply_price] - x[apply_price][in_position]) -
            symbol.slippage) - close_cost

    def _frame_group_analyst(self, _profit_mode, _direction, _group_plot,
                             _applied_price, _axe):
        """使用DataFrameGroupBy类的分组分析"""
        if _profit_mode:
            max_plot = self.__group.apply(self._group_apply_func,
                                          symbol=self.__symbol,
                                          my_func=np.max,
                                          _direction=_direction,
                                          apply_price=_applied_price)
            max_arg = self.__group.apply(self._group_apply_func,
                                         symbol=self.__symbol,
                                         arg_func=np.argmax,
                                         _direction=_direction,
                                         apply_price=_applied_price)
            min_plot = self.__group.apply(self._group_apply_func,
                                          symbol=self.__symbol,
                                          my_func=np.min,
                                          _direction=_direction,
                                          apply_price=_applied_price)
            min_arg = self.__group.apply(self._group_apply_func,
                                         symbol=self.__symbol,
                                         arg_func=np.argmin,
                                         _direction=_direction,
                                         apply_price=_applied_price)
            mean_plot = self.__group.apply(self._group_apply_func,
                                           symbol=self.__symbol,
                                           my_func=np.mean,
                                           _direction=_direction,
                                           apply_price=_applied_price)
            std_plot = self.__group.apply(self._group_apply_func,
                                          symbol=self.__symbol,
                                          my_func=np.std,
                                          _direction=_direction,
                                          apply_price=_applied_price)

            # 每组数据绘制图片
            if _group_plot:
                self.__profit = self.__group.apply(self._group_apply_func,
                                                   symbol=self.__symbol,
                                                   _direction=_direction,
                                                   apply_price=_applied_price)
                for g in self.__profit.index.levels[0]:
                    _axe.plot(self.__profit[g].values)
        else:
            max_plot = self.__group.max()[_applied_price]
            max_arg = self.__group.apply(self._group_apply_func,
                                         arg_func=np.argmax,
                                         _profit_mode=False,
                                         _direction=_direction,
                                         apply_price=_applied_price)
            min_plot = self.__group.min()[_applied_price]
            min_arg = self.__group.apply(self._group_apply_func,
                                         arg_func=np.argmin,
                                         _profit_mode=False,
                                         _direction=_direction,
                                         apply_price=_applied_price)
            mean_plot = self.__group.mean()[_applied_price]
            std_plot = self.__group.std()[_applied_price]

        return pd.concat(
            [max_plot, max_arg, min_plot, min_arg, mean_plot, std_plot],
            axis=1,
            keys=["max", "max_arg", "min", "min_arg", "mean", "std"])

    def _dict_group_analyst(self, _profit_mode, _direction, _group_plot,
                            _applied_price, _axe):
        """字典形式的分组分析"""
        max_list, max_arg_list, min_list, min_arg_list, mean_list, std_list = [], [], [], [], [], []
        index_map = {"open": 0, "high": 1, "low": 2, "close": 3}
        index = index_map[_applied_price]
        in_position = 1
        profit_list = []
        top_index = []
        bottom_index = []
        for key in self.__group:
            data_ = self.__data.iloc[self.__group[key], index]
            if _profit_mode:
                profit = self._profit_func[self.__symbol.symbol_type](
                    data_, self.__symbol, _direction, _applied_price,
                    in_position)
                profit_list.extend(profit.values)
                top_index.extend([key] * len(profit.index))
                bottom_index.extend(profit.index.values)
                max_list.append(profit.max())
                max_arg_list.append(
                    (profit.argmax() -
                     profit.index[in_position]).total_seconds() / 60)
                min_list.append(profit.min())
                min_arg_list.append(
                    (profit.argmin() -
                     profit.index[in_position]).total_seconds() / 60)
                mean_list.append(profit.mean())
                std_list.append(profit.std())
                if _group_plot:
                    _axe.plot(profit.values)
            else:
                max_list.append(data_.max())
                max_arg_list.append(
                    (data_.argmax() -
                     data_.index[in_position]).total_seconds() / 60)
                min_list.append(data_.min())
                min_arg_list.append(
                    (data_.argmax() -
                     data_.index[in_position]).total_seconds() / 60)
                mean_list.append(data_.mean())
                std_list.append(data_.std())
        index = pd.MultiIndex.from_arrays([top_index, bottom_index],
                                          names=[None, 'date'])
        self.__profit = Series(profit_list, index=index)
        return DataFrame({
            "max": max_list,
            "max_arg": max_arg_list,
            "min": min_list,
            "min_arg": min_arg_list,
            "mean": mean_list,
            "std": std_list
        })

    def group_density(self, bin_num=40, window=200, plot_surface=True):
        """绘制每组数据的概率密度随时间的变化图"""
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        fig_1, ax_1 = plt.subplots(3)
        max_profit = self.__profit.max()
        min_profit = self.__profit.min()
        g_profit = self.__profit.groupby(level=0)
        max_list, min_list, mean_list = [], [], []
        xs = []
        ys = []
        zs = []
        for i in range(window):
            max_list.append(g_profit.nth(i).max())
            min_list.append(g_profit.nth(i).min())
            mean_list.append(g_profit.nth(i).mean())
            hist, bins = np.histogram(g_profit.nth(i).values,
                                      bins=np.linspace(min_profit, max_profit,
                                                       bin_num),
                                      density=True)
            xs.append(bins[:-1])
            ys.append(i * np.ones(bin_num - 1))
            zs.append(hist * np.diff(bins))
            if not plot_surface:
                ax.plot(xs[-1], ys[-1], zs=zs[-1])

        ax_1[0].plot(max_list)
        ax_1[0].set_title(u"最大值随时间的演化")
        ax_1[1].plot(min_list)
        ax_1[1].set_title(u"最小值随时间的演化")
        ax_1[2].plot(mean_list)
        ax_1[2].set_title(u"平均值随时间的演化")

        if plot_surface:
            surf = ax.plot_surface(xs,
                                   ys,
                                   zs,
                                   rstride=1,
                                   cstride=1,
                                   cmap=cm.coolwarm,
                                   linewidth=0,
                                   antialiased=False)
            ax.set_zlim(0, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter('% .02f'))
            fig.colorbar(surf, shrink=0.5, aspect=5)
        return fig, fig_1

    def save_group_data(self, file_path, file_patch):
        """保存分组数据"""
        for k in self._data_set:
            self._data_set[k].to_csv(
                os.path.join(file_path,
                             k.lower() + file_patch))

    @staticmethod
    def save_figure(fig_obj, save_path, fig_name):
        """保存图片"""
        for f, n in zip(fig_obj, fig_name):
            f.savefig(os.path.join(save_path, n))

    @staticmethod
    def check_fig_path(path, dir_name):
        """检查存储路径是否合法"""
        if path is None:
            path = os.path.join(os.getcwd(), "analyst_result", dir_name)
        else:
            path = os.path.join(path, "analyst_result", dir_name)
        if not os.path.exists(path):
            os.makedirs(path)
        return path
Exemplo n.º 52
0
    def test_iloc_mask(self):

        # GH 3631, iloc with a mask (of a series) should raise
        df = DataFrame(range(5), list('ABCDE'), columns=['a'])
        mask = (df.a % 2 == 0)
        self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask]))
        mask.index = range(len(mask))
        self.assertRaises(NotImplementedError, df.iloc.__getitem__,
                          tuple([mask]))

        # ndarray ok
        result = df.iloc[np.array([True] * len(mask), dtype=bool)]
        assert_frame_equal(result, df)

        # the possibilities
        locs = np.arange(4)
        nums = 2**locs
        reps = map(bin, nums)
        df = DataFrame({'locs': locs, 'nums': nums}, reps)

        expected = {
            (None, ''):
            '0b1100',
            (None, '.loc'):
            '0b1100',
            (None, '.iloc'):
            '0b1100',
            ('index', ''):
            '0b11',
            ('index', '.loc'):
            '0b11',
            ('index', '.iloc'):
            'iLocation based boolean indexing cannot use an indexable as a mask',
            ('locs', ''):
            'Unalignable boolean Series key provided',
            ('locs', '.loc'):
            'Unalignable boolean Series key provided',
            ('locs', '.iloc'):
            'iLocation based boolean indexing on an integer type is not available',
        }

        import warnings
        warnings.filterwarnings(action='ignore', category=UserWarning)
        result = dict()
        for idx in [None, 'index', 'locs']:
            mask = (df.nums > 2).values
            if idx:
                mask = Series(mask, list(reversed(getattr(df, idx))))
            for method in ['', '.loc', '.iloc']:
                try:
                    if method:
                        accessor = getattr(df, method[1:])
                    else:
                        accessor = df
                    ans = str(bin(accessor[mask]['nums'].sum()))
                except Exception, e:
                    ans = str(e)

                key = tuple([idx, method])
                r = expected.get(key)
                if r != ans:
                    raise AssertionError(
                        "[%s] does not match [%s], received [%s]" %
                        (key, ans, r))
Exemplo n.º 53
0
    def test_partial_setting(self):

        # GH2578, allow ix and friends to partially set

        ### series ###
        s_orig = Series([1,2,3])

        s = s_orig.copy()
        s[5] = 5
        expected = Series([1,2,3,5],index=[0,1,2,5])
        assert_series_equal(s,expected)

        s = s_orig.copy()
        s.loc[5] = 5
        expected = Series([1,2,3,5],index=[0,1,2,5])
        assert_series_equal(s,expected)

        s = s_orig.copy()
        s[5] = 5.
        expected = Series([1,2,3,5.],index=[0,1,2,5])
        assert_series_equal(s,expected)

        s = s_orig.copy()
        s.loc[5] = 5.
        expected = Series([1,2,3,5.],index=[0,1,2,5])
        assert_series_equal(s,expected)

        # iloc/iat raise
        s = s_orig.copy()
        def f():
            s.iloc[3] = 5.
        self.assertRaises(IndexError, f)
        def f():
            s.iat[3] = 5.
        self.assertRaises(IndexError, f)

        ### frame ###

        df_orig = DataFrame(np.arange(6).reshape(3,2),columns=['A','B'])

        # iloc/iat raise
        df = df_orig.copy()
        def f():
            df.iloc[4,2] = 5.
        self.assertRaises(IndexError, f)
        def f():
            df.iat[4,2] = 5.
        self.assertRaises(IndexError, f)

        # row setting where it exists
        expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] }))
        df = df_orig.copy()
        df.iloc[1] = df.iloc[2]
        assert_frame_equal(df,expected)

        expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] }))
        df = df_orig.copy()
        df.loc[1] = df.loc[2]
        assert_frame_equal(df,expected)

        expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] }),dtype='float64')
        df = df_orig.copy()
        df.loc[3] = df.loc[2]
        assert_frame_equal(df,expected)

        # single dtype frame, overwrite
        expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : [0,2,4] }))
        df = df_orig.copy()
        df.ix[:,'B'] = df.ix[:,'A']
        assert_frame_equal(df,expected)

        # mixed dtype frame, overwrite
        expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : Series([0.,2.,4.]) }))
        df = df_orig.copy()
        df['B'] = df['B'].astype(np.float64)
        df.ix[:,'B'] = df.ix[:,'A']
        assert_frame_equal(df,expected)

        # single dtype frame, partial setting
        expected = df_orig.copy()
        expected['C'] = df['A'].astype(np.float64)
        df = df_orig.copy()
        df.ix[:,'C'] = df.ix[:,'A']
        assert_frame_equal(df,expected)

        # mixed frame, partial setting
        expected = df_orig.copy()
        expected['C'] = df['A'].astype(np.float64)
        df = df_orig.copy()
        df.ix[:,'C'] = df.ix[:,'A']
        assert_frame_equal(df,expected)

        ### panel ###
        p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64')

        # panel setting via item
        p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64')
        expected = p_orig.copy()
        expected['Item3'] = expected['Item1']
        p = p_orig.copy()
        p.loc['Item3'] = p['Item1']
        assert_panel_equal(p,expected)

        # panel with aligned series
        expected = p_orig.copy()
        expected = expected.transpose(2,1,0)
        expected['C'] = DataFrame({ 'Item1' : [30,30,30,30], 'Item2' : [32,32,32,32] },index=p_orig.major_axis)
        expected = expected.transpose(2,1,0)
        p = p_orig.copy()
        p.loc[:,:,'C'] = Series([30,32],index=p_orig.items)
        assert_panel_equal(p,expected)
Exemplo n.º 54
0
    def setUp(self):
        self.ts = common.makeTimeSeries()
        self.series = common.makeStringSeries()
        self.objSeries = common.makeObjectSeries()

        self.empty = Series([], index=[])
Exemplo n.º 55
0
 def rmse(self):
     """Returns the rmse values."""
     return Series(self._rmse_raw, index=self._result_index)
Exemplo n.º 56
0
 def _make_result(self, result):
     return Series(result, index=self._cols)
Exemplo n.º 57
0
 def y_fitted(self):
     """Returns the fitted y values."""
     return Series(self._y_fitted_raw[self._valid_obs_labels],
                   index=self._result_index)
Exemplo n.º 58
0
    def test_iloc_getitem_array(self):

        # array like
        s = Series(index=range(1,4))
        self.check_result('array like', 'iloc', s.index, 'ix', { 0 : [2,4,6], 1 : [3,6,9], 2: [4,8,12] }, typs = ['ints'])
Exemplo n.º 59
0
    def r2_adj(self):
        """Returns the r-squared adjusted values."""
        index = self.r2.index

        return Series(self._r2_adj_raw, index=index)
Exemplo n.º 60
0
def makeSeries():
    return Series(np.random.randn(N), index=dateRange)