Exemplo n.º 1
0
    def create(cls) -> None:

        size_map = {
            5: 2_000,
            6: 3_500,
            7: 1_000,
            8: 2_000,
            9: 5_000,
            10: 7_625,
            15: 5_000,
            20: 5_000,
            25: 3_250,
            50: 2_625
        }

        groups = np.arange(sum(size_map.values()))

        group_sizes = []
        for k, v in size_map.items():
            group_sizes.extend([k] * v)

        def gen_group_values() -> tp.Iterator[np.ndarray]:
            for group in groups:
                yield np.full(group_sizes[group], group)

        def gen_group_unique_values() -> tp.Iterator[np.ndarray]:
            for group in groups:
                yield np.arange(group_sizes[group])

        group_values = sf.Series(np.concatenate(list(gen_group_values())),
                                 name='group')
        group_unique_values = sf.Series(np.concatenate(
            list(gen_group_unique_values())),
                                        name='group_unique')
        value_values = sf.Series(np.random.random(len(group_values)),
                                 name='data')
        frame = sf.Frame.from_concat(
            (group_values, group_unique_values, value_values), axis=1)

        cls._store['pivot_sf'] = frame
        cls._store['pivot_df'] = frame.to_pandas()

        cls._store['to_pandas_a'] = sf.Frame.from_element(np.nan,
                                                          index=range(75),
                                                          columns=range(75000))

        f1 = sf.FrameGO.from_element(np.nan,
                                     index=range(75),
                                     columns=range(50000))
        f2 = sf.FrameGO.from_element(0,
                                     index=range(75),
                                     columns=range(50000, 100000))
        f1.extend(f2)  #type: ignore
        cls._store['to_pandas_b'] = f1
Exemplo n.º 2
0
    def init_new_session(self, event, prob):
        """Add new summoning session after an existing session finishes."""
        if not self.summoner.should_start_new_session(event.targets_pulled):
            self.callback('quit summoning event')
            self.push_outcome(event, prob)
            return

        self.push_state(
            event,
            SessionState(
                prob_tier=event.dry_streak // SUMMONS_PER_SESSION,
                stone_summons=sf.Series(0, index=tuple(Colors)),
                stone_presences=sf.Series(True, index=tuple(Colors)),
            ), prob)
Exemplo n.º 3
0
 def test_series_dropna_b(self):
     s1 = sf.Series(np.nan,
                    index=sf.IndexHierarchy.from_product(['A', 'B'],
                                                         [1, 2]))
     s2 = s1.dropna()
     self.assertEqual(len(s2), 0)
     self.assertEqual(s1.__class__, s2.__class__)
Exemplo n.º 4
0
def get_sample_frame_mixed_string_index(
        size: int = 10000,
        columns: int = 100) -> tp.Tuple[pd.DataFrame, sf.FrameGO, np.ndarray]:
    '''Get frames with mixed types.
    '''
    # produces 14950 strings
    source_ids = list(''.join(x)
                      for x in it.combinations(string.ascii_lowercase, 4))
    assert size <= len(source_ids)

    index = source_ids[:size]
    cols = source_ids[:columns]

    dtypes = (float, int, object, bool)

    sff = sf.FrameGO(index=index)
    for idx, col in enumerate(cols):
        s = sf.Series(_typed_array(dtypes[idx % 4], size=size, shift=idx),
                      index=index)
        sff[col] = s

    npf = sff.values

    pdf = pd.DataFrame(index=index)
    for idx, col in enumerate(cols):
        s = pd.Series(_typed_array(dtypes[idx % 4], size=size, shift=idx),
                      index=index)
        pdf[col] = s

    return pdf, sff, npf
Exemplo n.º 5
0
    def test_display_float_scientific_b(self) -> None:

        s1 = sf.Series([3.1j, 5.2j])**40

        self.assertEqual(
            s1.display(sf.DisplayConfig(type_color=False)).to_rows(), [
                '<Series>', '<Index>', '0        4.51e+19+0.00e+00j',
                '1        4.37e+28+0.00e+00j', '<int64>  <complex128>'
            ])

        # non default config for scientifici will truncate values
        self.assertEqual(
            s1.display(
                sf.DisplayConfig(
                    type_color=False,
                    value_format_complex_scientific='{:f}')).to_rows(),
            [
                '<Series>', '<Index>', '0        45130251461102338...',
                '1        43665028242109283...', '<int64>  <complex128>'
            ])

        self.assertEqual(
            s1.display(
                sf.DisplayConfig(
                    type_color=False,
                    value_format_complex_scientific='{:.1e}')).to_rows(), [
                        '<Series>', '<Index>', '0        4.5e+19+0.0e+00j',
                        '1        4.4e+28+0.0e+00j', '<int64>  <complex128>'
                    ])
Exemplo n.º 6
0
    def test_display_float_scientific_a(self) -> None:

        s1 = sf.Series([3.1, 5.2])**40

        self.assertEqual(
            s1.display(sf.DisplayConfig(type_color=False)).to_rows(), [
                '<Series>', '<Index>', '0        4.51302515e+19',
                '1        4.36650282e+28', '<int64>  <float64>'
            ])

        # non default config for scientifici will truncate values
        self.assertEqual(
            s1.display(
                sf.DisplayConfig(
                    type_color=False,
                    value_format_float_scientific='{:f}')).to_rows(), [
                        '<Series>', '<Index>', '0        45130251461102272...',
                        '1        43665028242109266...', '<int64>  <float64>'
                    ])

        self.assertEqual(
            s1.display(
                sf.DisplayConfig(
                    type_color=False,
                    value_format_float_scientific='{:.2e}')).to_rows(), [
                        '<Series>', '<Index>', '0        4.51e+19',
                        '1        4.37e+28', '<int64>  <float64>'
                    ])
Exemplo n.º 7
0
def get_sample_series_float(
        size: int = 10000) -> tp.Tuple[pd.Series, sf.Series, np.ndarray]:
    a1 = np.arange(size) * .001
    a1[size // 2:] = np.nan
    pds = pd.Series(a1)
    sfs = sf.Series(a1)
    return pds, sfs, a1
Exemplo n.º 8
0
def get_sample_series_objstr(size=10000):
    sample = [None, 3, 0.123, np.nan, 'str']
    a1 = np.array(sample * int(size / len(sample)))

    pds = pd.Series(a1)
    sfs = sf.Series(a1)

    return pds, sfs, a1
Exemplo n.º 9
0
 def sf(cls):
     index = sf.IndexHierarchy.from_product(list(string.ascii_lowercase),
                                            list(string.ascii_lowercase))
     f1 = sf.FrameGO(index=index)
     for col in range(100):
         s = sf.Series(col * .1, index=index[col:col + 6])
         f1[col] = s
     assert f1.sum().sum() == 2970.0
Exemplo n.º 10
0
def get_sample_series_obj(
        size: int = 10000) -> tp.Tuple[pd.Series, sf.Series, np.ndarray]:
    sample = [None, 3, 0.123, np.nan]
    a1 = np.array(sample * int(size / len(sample)))

    pds = pd.Series(a1)
    sfs = sf.Series(a1)

    return pds, sfs, a1
Exemplo n.º 11
0
def get_sample_series_string_index_float_values(size=10000):
    a1 = np.arange(size) * .001
    a1[size // 2:] = np.nan
    # create hsa indices
    index = [
        hashlib.sha224(str(x).encode('utf-8')).hexdigest() for x in range(size)
    ]
    pds = pd.Series(a1, index=index)
    sfs = sf.Series(a1, index=index)
    return pds, sfs, a1
Exemplo n.º 12
0
    def test_series_loc_extract_d(self):
        s = sf.Series(range(5),
                      index=sf.IndexHierarchy.from_labels(
                          (('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b'),
                           ('b', 'c'))))
        # leaf loc selection must be terminal; using a slice or list is an exception
        with self.assertRaises(RuntimeError):
            s.loc['a', :]

        with self.assertRaises(RuntimeError):
            s.loc[['a', 'b'], 'b']
Exemplo n.º 13
0
    def test_series_min_max_b(self):
        # string objects work as expected; when fixed length strings, however, the do not

        s1 = Series(list('abc'), dtype=object)
        self.assertEqual(s1.min(), 'a')
        self.assertEqual(s1.max(), 'c')

        # get the same result from character arrays
        s2 = sf.Series(list('abc'))
        self.assertEqual(s2.min(), 'a')
        self.assertEqual(s2.max(), 'c')
Exemplo n.º 14
0
    def test_series_min_max_a(self):

        s1 = Series([1, 3, 4, 0])
        self.assertEqual(s1.min(), 0)
        self.assertEqual(s1.max(), 4)

        s2 = sf.Series([-1, 4, None, np.nan])
        self.assertEqual(s2.min(), -1)
        self.assertTrue(np.isnan(s2.min(skipna=False)))

        self.assertEqual(s2.max(), 4)
        self.assertTrue(np.isnan(s2.max(skipna=False)))
Exemplo n.º 15
0
    def test_series_loc_extract_c(self):
        s = sf.Series(range(5),
                      index=sf.IndexHierarchy.from_labels(
                          (('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b'),
                           ('b', 'c'))))

        # this selection returns just a single value
        s2 = s.loc[sf.HLoc[:, 'c']]
        self.assertEqual(s2.__class__, s.__class__)
        self.assertEqual(s2.to_pairs(), ((('b', 'c'), 4), ))

        # this selection yields a series
        self.assertEqual(s.loc[sf.HLoc[:, 'a']].to_pairs(),
                         ((('a', 'a'), 0), (('b', 'a'), 2)))
Exemplo n.º 16
0
    def color_count_probs(self, prob_tier):
        """Generate probabilities for number of session colors present."""
        color_probs = self.colorpool_probs(prob_tier)
        counts = stone_combinations
        probs = sf.Series([
            multinomial_prob(stone_counts, color_probs)
            for stone_counts in stone_combinations.iter_series(axis=1)
        ],
                          name='probability')
        assert probs.sum() == 1

        return (sf.Frame.from_concat([counts, probs],
                                     axis=1).set_index_hierarchy(
                                         tuple(Colors),
                                         drop=True)['probability'])
Exemplo n.º 17
0
def get_series_float_h2d_str_index(size=1000):
    '''
    Get a hierarchical index with
    '''
    labels = list(''.join(x)
                  for x in it.combinations(string.ascii_lowercase, 4))
    labels0 = labels[:int(size / 10)]
    labels1 = labels[:size]
    values = np.arange(len(labels0) * len(labels1)) * .001

    ih = sf.IndexHierarchy.from_product(labels0, labels1)
    sfs = sf.Series(values, index=ih)

    mi = pd.MultiIndex.from_product((labels0, labels1))
    pds = pd.Series(values, index=mi)
    return pds, sfs
Exemplo n.º 18
0
    def test_series_iter_group_index_b(self):

        colors = ('red', 'green')
        shapes = ('square', 'circle', 'triangle')
        s1 = sf.Series(range(6),
                       index=sf.IndexHierarchy.from_product(shapes, colors))

        post = tuple(s1.iter_group_index(0))
        self.assertTrue(len(post), 3)

        self.assertEqual(
            s1.iter_group_index(0).apply(np.sum).to_pairs(),
            (('circle', 5), ('square', 1), ('triangle', 9)))

        self.assertEqual(
            s1.iter_group_index(1).apply(np.sum).to_pairs(),
            (('green', 9), ('red', 6)))
Exemplo n.º 19
0
    def test_series_iter_group_index_c(self):

        colors = ('red', 'green')
        shapes = ('square', 'circle', 'triangle')
        textures = ('smooth', 'rough')

        s1 = sf.Series(range(12),
                       index=sf.IndexHierarchy.from_product(
                           shapes, colors, textures))

        post = tuple(s1.iter_group_index([0, 2]))
        self.assertTrue(len(post), 6)

        self.assertEqual(
            s1.iter_group_index([0, 2]).apply(np.sum).to_pairs(),
            ((('circle', 'rough'), 12), (('circle', 'smooth'), 10),
             (('square', 'rough'), 4), (('square', 'smooth'), 2),
             (('triangle', 'rough'), 20), (('triangle', 'smooth'), 18)))
Exemplo n.º 20
0
    def to_sf(year: int = 2018) -> sf.Frame:

        labels = []
        values = []

        for buoy in BUOYS:
            f = BuoyLoader.buoy_to_sf(buoy, year)
            for row in f.iter_series(1):
                for attr in (BuoyLoader.FIELD_WAVE_HEIGHT,
                             BuoyLoader.FIELD_WAVE_PERIOD):
                    label = (row[BuoyLoader.FIELD_STATION_ID],
                             row[BuoyLoader.FIELD_DATETIME], attr)
                    labels.append(label)
                    values.append(row[attr])

        index = sf.IndexHierarchy.from_labels(
            labels, index_constructors=(sf.Index, sf.IndexMinute, sf.Index))

        return sf.Series(values, index=index)
Exemplo n.º 21
0
    def branch_event(self, event, session, prob, stone_choice):
        """Split session into all potential following sub-sessions."""
        orbs_spent = event.orbs_spent + stone_cost(
            session.stone_summons.sum() - 1)

        choice_starpool_probs = (self.event_details.pool_probs(
            session.prob_tier)[sf.HLoc[:,
                                       stone_choice]].reindex_drop_level(-1))

        choice_starpool_probs = (choice_starpool_probs /
                                 choice_starpool_probs.sum())

        for starpool, subprob in choice_starpool_probs.iter_element_items():
            total_prob = prob * subprob

            if starpool.star_rating == StarRatings.x5_STAR:
                dry_streak = 0
            else:
                dry_streak = event.dry_streak + 1

            if (starpool, stone_choice) not in self.summoner.targets.index:
                pulls = ((event.targets_pulled, 1), )
            else:
                targets_pulled_success = event.targets_pulled + sf.Series(
                    [1],
                    index=sf.IndexHierarchy.from_labels([
                        (starpool, stone_choice)
                    ])).reindex(event.targets_pulled.index, fill_value=0)

                prob_success = Fraction(
                    int(self.summoner.targets[starpool, stone_choice]),
                    int(self.event_details.pool_counts[starpool,
                                                       stone_choice]))

                pulls = ((targets_pulled_success, prob_success),
                         (event.targets_pulled, 1 - prob_success))

            for targets_pulled, subsubprob in pulls:
                new_event = EventState(orbs_spent, dry_streak, targets_pulled)
                new_prob = total_prob * subsubprob

                self.push_state(new_event, session, new_prob)
Exemplo n.º 22
0
 def sf() -> None:
     a = SampleData.get('npa_obj_10k')
     post = sf.Series(a, index=SampleData.get('label_str')[:len(a)])
Exemplo n.º 23
0
 def sf(cls):
     f1 = sf.FrameGO(index=cls._index)
     for col in range(100):
         s = sf.Series(col * .1, index=cls._index[col:col + 20])
         f1[col] = s
     assert f1.sum().sum() == 9900.0
Exemplo n.º 24
0
 def test_series_dropna_c(self):
     s1 = sf.Series([1, np.nan, 2, np.nan],
                    index=sf.IndexHierarchy.from_product(['A', 'B'],
                                                         [1, 2]))
     s2 = s1.dropna()
     self.assertEqual(s2.to_pairs(), ((('A', 1), 1.0), (('B', 1), 2.0)))
Exemplo n.º 25
0
 def test_series_sum_b(self):
     s1 = Series(list('abc'), dtype=object)
     self.assertEqual(s1.sum(), 'abc')
     # get the same result from character arrays
     s2 = sf.Series(list('abc'))
     self.assertEqual(s2.sum(), 'abc')
Exemplo n.º 26
0
def get_sample_series_float(size=10000):
    a1 = np.arange(size) * .001
    a1[size // 2:] = np.nan
    pds = pd.Series(a1)
    sfs = sf.Series(a1)
    return pds, sfs, a1