Пример #1
0
    def test_eng_float_formatter(self):
        df = DataFrame({'A' : [1.41, 141., 14100, 1410000.]})

        fmt.set_eng_float_format()
        result = df.to_string()
        expected = ('             A\n'
                    '0    1.410E+00\n'
                    '1  141.000E+00\n'
                    '2   14.100E+03\n'
                    '3    1.410E+06')
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(use_eng_prefix=True)
        result = df.to_string()
        expected = ('         A\n'
                    '0    1.410\n'
                    '1  141.000\n'
                    '2  14.100k\n'
                    '3   1.410M')
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(accuracy=0)
        result = df.to_string()
        expected = ('         A\n'
                    '0    1E+00\n'
                    '1  141E+00\n'
                    '2   14E+03\n'
                    '3    1E+06')
        self.assertEqual(result, expected)

        fmt.reset_printoptions()
Пример #2
0
    def test_to_string_repr_unicode(self):
        buf = StringIO()

        unicode_values = [u'\u03c3'] * 10
        unicode_values = np.array(unicode_values, dtype=object)
        df = DataFrame({'unicode': unicode_values})
        df.to_string(col_space=10, buf=buf)

        # it works!
        repr(df)

        idx = Index(['abc', u'\u03c3a', 'aegdvg'])
        ser = Series(np.random.randn(len(idx)), idx)
        rs = repr(ser).split('\n')
        line_len = len(rs[0])
        for line in rs[1:]:
            try:
                line = line.decode(get_option("display.encoding"))
            except:
                pass
            if not line.startswith('Dtype:'):
                self.assert_(len(line) == line_len)

        # it works even if sys.stdin in None
        _stdin= sys.stdin
        try:
            sys.stdin = None
            repr(df)
        finally:
            sys.stdin = _stdin
Пример #3
0
    def test_to_string_repr_unicode(self):
        buf = StringIO()

        unicode_values = [u'\u03c3'] * 10
        unicode_values = np.array(unicode_values, dtype=object)
        df = DataFrame({'unicode' : unicode_values})
        df.to_string(col_space=10, buf=buf)

        # it works!
        repr(df)

        idx = Index(['abc', u'\u03c3a', 'aegdvg'])
        ser = Series(np.random.randn(len(idx)), idx)
        rs = repr(ser).split('\n')
        line_len = len(rs[0])
        for line in rs[1:]:
            try:
                line = line.decode('utf-8')
            except:
                pass
            self.assert_(len(line) == line_len)

        # it works even if sys.stdin in None
        sys.stdin = None
        repr(df)
        sys.stdin = sys.__stdin__
Пример #4
0
    def test_to_string_format_na(self):
        fmt.reset_printoptions()
        df = DataFrame({'A' : [np.nan, -1, -2.1234, 3, 4],
                        'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
        result = df.to_string()

        expected = ('        A       B\n'
                    '0     NaN     NaN\n'
                    '1 -1.0000     foo\n'
                    '2 -2.1234   foooo\n'
                    '3  3.0000  fooooo\n'
                    '4  4.0000     bar')
        self.assertEqual(result, expected)

        df = DataFrame({'A' : [np.nan, -1., -2., 3., 4.],
                        'B' : [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
        result = df.to_string()

        expected = ('    A       B\n'
                    '0 NaN     NaN\n'
                    '1  -1     foo\n'
                    '2  -2   foooo\n'
                    '3   3  fooooo\n'
                    '4   4     bar')
        self.assertEqual(result, expected)
Пример #5
0
class ToString(object):

    def setup(self):
        self.df = DataFrame(np.random.randn(100, 10))

    def time_to_string_floats(self):
        self.df.to_string()
Пример #6
0
    def test_repr_embedded_ndarray(self):
        arr = np.empty(10, dtype=[('err', object)])
        for i in range(len(arr)):
            arr['err'][i] = np.random.randn(i)

        df = DataFrame(arr)
        repr(df['err'])
        repr(df)
        df.to_string()
Пример #7
0
 def update_with_predictions(self, context, x, actuals, predictions):
     cm = metrics.confusion_matrix(actuals, predictions)
     self.config.target.context = context
     factors = self.config.target.get_prep_data()
     if factors:
         names = [f[0] for f in factors]
         df = DataFrame(cm, columns=names, index=names)
         print df.to_string()
     else:
         print cm
Пример #8
0
    def test_to_string_unicode_columns(self):
        df = DataFrame({u'\u03c3' : np.arange(10.)})

        buf = StringIO()
        df.to_string(buf=buf)
        buf.getvalue()

        buf = StringIO()
        df.info(buf=buf)
        buf.getvalue()

        result = self.frame.to_string()
        self.assert_(isinstance(result, unicode))
def test_cho_rst(year = 2014, verbose = False):
    # Tests that _chobrut which computes "chômage brut" from "imposable" yields an amount compatbe
    # with the one obtained from running openfisca satrting with a "chômage brut"

    period = periods.period(year)
    remplacement = {'cho': 'chobrut', 'rst': 'rstbrut'}

    for var, varbrut in remplacement.iteritems():
        maxrev = 24000

        simulation = base.tax_benefit_system.new_scenario().init_single_entity(
            axes = [dict(name = varbrut, max = maxrev, min = 0, count = 11)],
            period = period,
            parent1 = dict(
                birth = datetime.date(year - 40, 1, 1),
                ),
            ).new_simulation(debug = True)

        df_b2i = DataFrame({
            var: simulation.calculate(var),
            varbrut: simulation.calculate(varbrut),
            })

        vari = df_b2i[var].get_values()
        csg_rempl = vari * 0 + 3

        defaultP = simulation.get_reference_compact_legislation(period.start)
        if var == "cho":
            _vari_to_brut = inversion_revenus._chobrut_from_choi
        elif var == "rst":
            _vari_to_brut = inversion_revenus._rstbrut_from_rsti
        else:
            assert False, u'Unsupported value for var: {!r}'.format(var)

        df_i2b = DataFrame({var: vari, varbrut: _vari_to_brut(vari, csg_rempl, defaultP)})

        if verbose:
            print df_i2b.to_string()
            print df_b2i.to_string()

        for variable in [var, varbrut]:
            passed = ((df_b2i[variable] - df_i2b[variable]).abs() < 1).all()

            if (not passed) or verbose:
                print "Brut to imposable"
                print (df_b2i[[varbrut, var]] / 12).to_string()
                print "Imposable to brut"
                print (df_i2b / 12).to_string()

                assert passed, "difference in %s " % (var)
def test_case_study(year = 2013, verbose = False):
    '''
    Tests that _salbrut which computes "salaire brut" from "imposable" yields an amount compatbe
    with the one obtained from running openfisca satrting with a "salaire brut"
    '''

    for type_sal_category in ['prive_non_cadre', 'prive_cadre']:  # , 'public_titulaire_etat']:
        simulation = ScenarioSimulation()
        maxrev = 24000
        simulation.set_config(year = year, reforme = False, nmen = 11, maxrev = maxrev, x_axis = 'salbrut')
        simulation.scenario.indiv[0]['salbrut'] = maxrev
        simulation.scenario.indiv[0]['type_sal'] = CAT[type_sal_category]
        if type_sal_category == 'public_titulaire_etat':
            from openfisca_france.model.cotisations_sociales.travail import TAUX_DE_PRIME
            simulation.scenario.indiv[0]['primes'] = TAUX_DE_PRIME * maxrev

        simulation.set_param()

        # The aefa prestation can be disabled by uncommenting the following line:
        # simulation.disable_prestations( ['aefa'])
        df = simulation.get_results_dataframe(index_by_code = True)

        from openfisca_france.model.inversion_revenus import _salbrut
        df_b2i = df.transpose()
        if verbose:

            print df_b2i.to_string()

        sali = df_b2i['sal'].get_values()
        hsup = simulation.input_table.table['hsup'].get_values()
        type_sal = simulation.input_table.table['type_sal'].get_values()
        primes = simulation.input_table.table['hsup'].get_values()

        defaultP = simulation.P_default
        from pandas import DataFrame
        df_i2b = DataFrame({'sal': sali, 'salbrut' : _salbrut(sali, hsup, type_sal, defaultP) })

        if verbose:
            print df_i2b.to_string()


        for var in ['sal', 'salbrut']:
            passed = ((df_b2i[var] - df_i2b[var]).abs() < .01).all()

            if (not passed) or type_sal_category in ['public_titulaire_etat']:
                print (df_b2i / 12).to_string()
                print (df_i2b / 12).to_string()

            assert passed, "difference in %s for %s" % (var, type_sal_category)
Пример #11
0
    def test_to_string_repr_unicode(self):
        buf = StringIO()

        unicode_values = [u'\u03c3'] * 10
        unicode_values = np.array(unicode_values, dtype=object)
        df = DataFrame({'unicode' : unicode_values})
        df.to_string(col_space=10, buf=buf)

        # it works!
        repr(df)

        # it works even if sys.stdin in None
        sys.stdin = None
        repr(df)
        sys.stdin = sys.__stdin__
Пример #12
0
    def test_to_string_small_float_values(self):
        df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]})

        result = df.to_string()
        # sadness per above
        if "%.4g" % 1.7e8 == "1.7e+008":
            expected = "               a\n" "0  1.500000e+000\n" "1  1.000000e-017\n" "2 -5.500000e-007"
        else:
            expected = "              a\n" "0  1.500000e+00\n" "1  1.000000e-17\n" "2 -5.500000e-07"
        self.assertEqual(result, expected)

        # but not all exactly zero
        df = df * 0
        result = df.to_string()
        expected = "   0\n" "0  0\n" "1  0\n" "2 -0"
def test_case_study(year = 2013, verbose = False):
    for type_sal_category in ['prive_non_cadre', 'prive_cadre', 'public_titulaire_etat']:

        simulation = ScenarioSimulation()
        maxrev = 24000
        simulation.set_config(year = year, reforme = False, nmen = 1, x_axis = 'salbrut')
        # Add husband/wife on the same tax sheet (foyer).
    #    simulation.scenario.addIndiv(1, datetime.date(1975, 1, 1), 'conj', 'part')
        simulation.scenario.indiv[0]['salbrut'] = maxrev
        simulation.scenario.indiv[0]['type_sal'] = CAT[type_sal_category]
        if type_sal_category == 'public_titulaire_etat':
            from openfisca_france.model.cotisations_sociales.travail import TAUX_DE_PRIME
            simulation.scenario.indiv[0]['primes'] = TAUX_DE_PRIME * maxrev

        simulation.set_param()

        # The aefa prestation can be disabled by uncommenting the following line:
        # simulation.disable_prestations( ['aefa'])
        df = simulation.get_results_dataframe(index_by_code = True)


        from openfisca_france.model.cotisations_sociales.travail import _salbrut
        df_b2i = df.transpose()
        if verbose:

            print df_b2i.to_string()

        sali = df_b2i['sal'].get_values()
        hsup = simulation.input_table.table['hsup'].get_values()
        type_sal = simulation.input_table.table['type_sal'].get_values()
        primes = simulation.input_table.table['hsup'].get_values()

        defaultP = simulation.P_default
        from pandas import DataFrame
        df_i2b = DataFrame({'sal': sali, 'salbrut' : _salbrut(sali, hsup, type_sal, defaultP) })

        if verbose:
            print df_i2b.to_string()


        for var in ['sal', 'salbrut']:
            test = ((df_b2i[var] - df_i2b[var]).abs() < .01).all()

            if (not test) or type_sal_category in ['public_titulaire_etat']:
                print (df_b2i / 12).to_string()
                print (df_i2b / 12).to_string()

            assert test, "difference in %s for %s" % (var, type_sal_category)
Пример #14
0
    def test_to_string_float_formatting(self):
        fmt.reset_printoptions()
        fmt.set_printoptions(precision=6, column_space=12,
                             notebook_repr_html=False)

        df = DataFrame({'x' : [0, 0.25, 3456.000, 12e+45, 1.64e+6,
                               1.7e+8, 1.253456, np.pi, -1e6]})

        df_s = df.to_string()

        # Python 2.5 just wants me to be sad. And debian 32-bit
        #sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = ('              x\n0  0.00000e+000\n1  2.50000e-001\n'
                        '2  3.45600e+003\n3  1.20000e+046\n4  1.64000e+006\n'
                        '5  1.70000e+008\n6  1.25346e+000\n7  3.14159e+000\n'
                        '8 -1.00000e+006')
        else:
            expected = ('             x\n0  0.00000e+00\n1  2.50000e-01\n'
                        '2  3.45600e+03\n3  1.20000e+46\n4  1.64000e+06\n'
                        '5  1.70000e+08\n6  1.25346e+00\n7  3.14159e+00\n'
                        '8 -1.00000e+06')
        assert(df_s == expected)

        df = DataFrame({'x' : [3234, 0.253]})
        df_s = df.to_string()

        expected = ('          x\n'
                    '0  3234.000\n'
                    '1     0.253')
        assert(df_s == expected)

        fmt.reset_printoptions()
        self.assertEqual(get_option("display.precision"), 7)

        df = DataFrame({'x': [1e9, 0.2512]})
        df_s = df.to_string()
        # Python 2.5 just wants me to be sad. And debian 32-bit
        #sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = ('               x\n'
                        '0  1.000000e+009\n'
                        '1  2.512000e-001')
        else:
            expected = ('              x\n'
                        '0  1.000000e+09\n'
                        '1  2.512000e-01')
        assert(df_s == expected)
Пример #15
0
    def test_to_string_float_index(self):
        index = Index([1.5, 2, 3, 4, 5])
        df = DataFrame(range(5), index=index)

        result = df.to_string()
        expected = "     0\n" "1.5  0\n" "2    1\n" "3    2\n" "4    3\n" "5    4"
        self.assertEqual(result, expected)
Пример #16
0
    def test_to_string_no_index(self):
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

        df_s = df.to_string(index=False)
        expected = " x  y\n 1  4\n 2  5\n 3  6"

        assert df_s == expected
Пример #17
0
    def test_to_string_with_formatters(self):
        df = DataFrame({'int': [1, 2, 3],
                        'float': [1.0, 2.0, 3.0],
                        'object': [(1,2), True, False]},
                        columns=['int', 'float', 'object'])

        formatters = [('int', lambda x: '0x%x' % x),
                      ('float', lambda x: '[% 4.1f]' % x),
                      ('object', lambda x: '-%s-' % str(x))]
        result = df.to_string(formatters=dict(formatters))
        result2 = df.to_string(formatters=lzip(*formatters)[1])
        self.assertEqual(result, ('  int  float    object\n'
                                  '0 0x1 [ 1.0]  -(1, 2)-\n'
                                  '1 0x2 [ 2.0]    -True-\n'
                                  '2 0x3 [ 3.0]   -False-'))
        self.assertEqual(result, result2)
Пример #18
0
 def test_to_string_with_formatters_unicode(self):
     df = DataFrame({u'c/\u03c3':[1,2,3]})
     result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x})
     self.assertEqual(result, (u'  c/\u03c3\n'
                                '0   1\n'
                                '1   2\n'
                                '2   3'))
Пример #19
0
    def test_to_string_no_header(self):
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

        df_s = df.to_string(header=False)
        expected = "0  1  4\n1  2  5\n2  3  6"

        assert df_s == expected
Пример #20
0
    def test_to_string_int_formatting(self):
        df = DataFrame({"x": [-15, 20, 25, -35]})
        self.assert_(issubclass(df["x"].dtype.type, np.integer))

        output = df.to_string()
        expected = "    x\n" "0 -15\n" "1  20\n" "2  25\n" "3 -35"
        self.assertEqual(output, expected)
Пример #21
0
    def test_to_string_small_float_values(self):
        df = DataFrame({'a': [1.5, 1e-17, -5.5e-7]})

        result = df.to_string()
        expected = ('              a\n'
                    '0  1.500000e+00\n'
                    '1  1.000000e-17\n'
                    '2 -5.500000e-07')
        self.assertEqual(result, expected)

        # but not all exactly zero
        df = df * 0
        result = df.to_string()
        expected = ('   0\n'
                    '0  0\n'
                    '1  0\n'
                    '2 -0')
Пример #22
0
 def test_to_string_left_justify_cols(self):
     fmt.reset_printoptions()
     df = DataFrame({'x' : [3234, 0.253]})
     df_s = df.to_string(justify='left')
     expected = ('   x       \n'
                 '0  3234.000\n'
                 '1     0.253')
     assert(df_s == expected)
Пример #23
0
    def test_to_string_with_formatters(self):
        df = DataFrame(
            {"int": [1, 2, 3], "float": [1.0, 2.0, 3.0], "object": [(1, 2), True, False]},
            columns=["int", "float", "object"],
        )

        formatters = [
            ("int", lambda x: "0x%x" % x),
            ("float", lambda x: "[% 4.1f]" % x),
            ("object", lambda x: "-%s-" % str(x)),
        ]
        result = df.to_string(formatters=dict(formatters))
        result2 = df.to_string(formatters=lzip(*formatters)[1])
        self.assertEqual(
            result,
            ("  int  float    object\n" "0 0x1 [ 1.0]  -(1, 2)-\n" "1 0x2 [ 2.0]    -True-\n" "2 0x3 [ 3.0]   -False-"),
        )
        self.assertEqual(result, result2)
Пример #24
0
    def test_to_string_float_formatting(self):
        fmt.reset_printoptions()
        fmt.set_printoptions(precision=6, column_space=12, notebook_repr_html=False)

        df = DataFrame({"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]})

        df_s = df.to_string()

        # Python 2.5 just wants me to be sad. And debian 32-bit
        # sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = (
                "              x\n0  0.00000e+000\n1  2.50000e-001\n"
                "2  3.45600e+003\n3  1.20000e+046\n4  1.64000e+006\n"
                "5  1.70000e+008\n6  1.25346e+000\n7  3.14159e+000\n"
                "8 -1.00000e+006"
            )
        else:
            expected = (
                "             x\n0  0.00000e+00\n1  2.50000e-01\n"
                "2  3.45600e+03\n3  1.20000e+46\n4  1.64000e+06\n"
                "5  1.70000e+08\n6  1.25346e+00\n7  3.14159e+00\n"
                "8 -1.00000e+06"
            )
        assert df_s == expected

        df = DataFrame({"x": [3234, 0.253]})
        df_s = df.to_string()

        expected = "          x\n" "0  3234.000\n" "1     0.253"
        assert df_s == expected

        fmt.reset_printoptions()
        self.assertEqual(fmt.print_config.precision, 7)

        df = DataFrame({"x": [1e9, 0.2512]})
        df_s = df.to_string()
        # Python 2.5 just wants me to be sad. And debian 32-bit
        # sys.version_info[0] == 2 and sys.version_info[1] < 6:
        if _three_digit_exp():
            expected = "               x\n" "0  1.000000e+009\n" "1  2.512000e-001"
        else:
            expected = "              x\n" "0  1.000000e+09\n" "1  2.512000e-01"
        assert df_s == expected
Пример #25
0
def getAllChampionContestRates(minGames=1):
    contestRateString = ""
    contestRateString += "Total Games: %d\n\n" % stats.getTotalGames()
    
    contestRates = stats.getAllChampionContestRates(minGames=minGames)
    dataFrame = DataFrame(contestRates, columns=(["champion", "picked", "banned", "contested", "contestRate"]))
    dataFrame = dataFrame.set_index("champion")
    contestRateString += dataFrame.to_string()
    
    return contestRateString
Пример #26
0
    def run(self):
        df = DataFrame(np.random.rand(self.nrows, self.ncolumns), columns=ALPHABET[0:self.ncolumns])
        df.index.name = 'index'
        output = df.to_string()

        conn = S3Connection(ACCESS_KEY, ACCESS_SECRET)
        bucket = conn.get_bucket(BUCKET)
        file = Key(bucket)
        file.key = 'random_numbers.csv'
        file.set_contents_from_string(output)
Пример #27
0
    def test_eng_float_formatter(self):
        df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]})

        fmt.set_eng_float_format()
        result = df.to_string()
        expected = "             A\n" "0    1.410E+00\n" "1  141.000E+00\n" "2   14.100E+03\n" "3    1.410E+06"
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(use_eng_prefix=True)
        result = df.to_string()
        expected = "         A\n" "0    1.410\n" "1  141.000\n" "2  14.100k\n" "3   1.410M"
        self.assertEqual(result, expected)

        fmt.set_eng_float_format(accuracy=0)
        result = df.to_string()
        expected = "         A\n" "0    1E+00\n" "1  141E+00\n" "2   14E+03\n" "3    1E+06"
        self.assertEqual(result, expected)

        fmt.reset_printoptions()
Пример #28
0
 def __repr__(self):
     repr_table = DataFrame(self.cluster_info,
                            columns=['Cluster_no', 'Cluster_size', 'Description', 'Non_red_size'])
     repr_table = repr_table[['Cluster_no', 'Cluster_size', 'Non_red_size', 'Description']]
     repr_table = repr_table.sort_values('Non_red_size', ascending=False)[:20]
     repr_string = "Id value for clustering: {}\n\n".format(self.id_val)
     repr_string += repr_table.to_string(index=False)
     if len(self.cluster_info) > 20:
         repr_string += "\n... {} more entries...".format(len(self.cluster_info)-20)
     return repr_string
Пример #29
0
    def test_to_string_int_formatting(self):
        df = DataFrame({'x' : [-15, 20, 25, -35]})
        self.assert_(issubclass(df['x'].dtype.type, np.integer))

        output = df.to_string()
        expected = ('    x\n'
                    '0 -15\n'
                    '1  20\n'
                    '2  25\n'
                    '3 -35')
        self.assertEqual(output, expected)
Пример #30
0
    def test_to_string_index_formatter(self):
        df = DataFrame([range(5), range(5, 10), range(10, 15)])

        rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]})

        xp = """\
    0   1   2   3   4
a   0   1   2   3   4
b   5   6   7   8   9
c  10  11  12  13  14\
"""
        self.assertEqual(rs, xp)
Пример #31
0
def test_repr_tuples():
    buf = StringIO()

    df = DataFrame({"tups": list(zip(range(10), range(10)))})
    repr(df)
    df.to_string(col_space=10, buf=buf)
Пример #32
0
def test_to_string_unicode_two():
    dm = DataFrame({"c/\u03c3": []})
    buf = StringIO()
    dm.to_string(buf)
Пример #33
0
def test_to_string_unicode_three():
    dm = DataFrame(["\xc2"])
    buf = StringIO()
    dm.to_string(buf)
Пример #34
0
 def test_unicode_problem_decoding_as_ascii(self):
     dm = DataFrame({u'c/\u03c3': Series({'test': np.NaN})})
     unicode(dm.to_string())
Пример #35
0
def test_truncation_no_index(max_cols, max_rows, expected):
    df = DataFrame([[0] * 11] * 4)
    assert df.to_string(index=False, max_cols=max_cols,
                        max_rows=max_rows) == expected
Пример #36
0
 def test_to_string_unicode_two(self):
     dm = DataFrame({u'c/\u03c3': []})
     buf = StringIO()
     dm.to_string(buf)
Пример #37
0
class ToString:
    def setup(self):
        self.df = DataFrame(np.random.randn(100, 10))

    def time_to_string_floats(self):
        self.df.to_string()
#
# formatters['Done'] = "%s"
# formatters['settings_name'] = "%s"
# formatters['te_AreaUnderROC'] = "%s"
# formatters['te_F1'] = "%s"
# formatters['test_programs'] = "%s"
# formatters['tr_AreaUnderROC'] = "%s"
# # formatters['tr_F1'] = "%s"
# formatters['training_programs'] = "%s"
#
# formatters = {key: lambda x: val % x for key, val in formatters.items()}
#
# print(table.to_string(formatters=formatters))


# Print table
print(table.to_string())

print("\n\nLatex:\n")
scores = ["AreaUnderROC", "F1", "TPR"]
rows = sorted(list(table.iterrows()))
for name, row in rows:
    row_str = name

    for val in scores:
        row_str += " & \\perfsplit{{{:.2f}}}{{{:.2f}}}"\
            .format(row.get("tr_" + val), row.get("te_" + val))
    print(row_str + "\\\\")


Пример #39
0
 def saveHistory_txt(self, path) -> None:
     """ Save history attribute to .txt file. Extension can be omitted from path string """
     if path[-3:] != '.txt': path += '.txt'
     df = DataFrame(self.history)
     with open(path, 'w') as txt:
         txt.write(df.to_string(index=False))
Пример #40
0
    def test_repr_tuples(self):
        buf = StringIO()

        df = DataFrame({'tups': zip(range(10), range(10))})
        repr(df)
        df.to_string(col_space=10, buf=buf)
        initial_value.append(i)
for i in [
        "Infectious_initial(I0)", "number of students(N)", "beta", "sigma",
        "gamma"
]:
    for j in range(0, 4):
        parameters_name.append(i)
for i in range(0, 20):
    amount_before.append(456)
for i in range(0, 20):
    difference_rate.append(results_change[i] / 456)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
data = {
    "parameters": parameters_name,
    "initial value": initial_value,
    "change rate": rate_change,
    "value after change": value_afterchange,
    "the number of affected students after change": results,
    "the number of affected students before change": amount_before,
    "difference value": results_change,
    "difference rate": difference_rate
}
df = DataFrame(data)
print(df.to_string(justify='center', index=False))

writer = pd.ExcelWriter("Sensitivity_Analysis(R0=3.2).xlsx")
df.to_excel(writer, sheet_name="Sensitivity Analysis")
writer.save()
Пример #42
0
 def _save_embedding(file_name: str, emb: pd.DataFrame):
     header = " ".join(map(str, emb.shape)) + "\n"
     emb_string = header + re.sub("\s\s+", " ",
                                  emb.to_string(header=False, index=True))
     with open(file_name, "w+") as file:
         file.write(emb_string)
def pretty2d(arr):
    pretty_data = DataFrame(arr)
    print(pretty_data.to_string(index=False, header=False))
Пример #44
0
def print_rich_table(
    df: pd.DataFrame,
    show_index: bool = False,
    title: str = "",
    index_name: str = "",
    headers: Union[List[str], pd.Index] = None,
    floatfmt: Union[str, List[str]] = ".2f",
):
    """Prepare a table from df in rich

    Parameters
    ----------
    df: pd.DataFrame
        Dataframe to turn into table
    show_index: bool
        Whether to include index
    title: str
        Title for table
    index_name : str
        Title for index column
    headers: List[str]
        Titles for columns
    floatfmt: str
        String to
    """

    if gtff.USE_TABULATE_DF:
        table = Table(title=title, show_lines=True)

        if show_index:
            table.add_column(index_name)

        if headers is not None:
            if isinstance(headers, pd.Index):
                headers = list(headers)
            if len(headers) != len(df.columns):
                log_and_raise(
                    ValueError("Length of headers does not match length of DataFrame")
                )
            for header in headers:
                table.add_column(str(header))
        else:
            for column in df.columns:
                table.add_column(str(column))

        if isinstance(floatfmt, list):
            if len(floatfmt) != len(df.columns):
                log_and_raise(
                    ValueError(
                        "Length of floatfmt list does not match length of DataFrame columns."
                    )
                )
        if isinstance(floatfmt, str):
            floatfmt = [floatfmt for _ in range(len(df.columns))]

        for idx, values in zip(df.index.tolist(), df.values.tolist()):
            row = [str(idx)] if show_index else []
            row += [
                str(x) if not isinstance(x, float) else f"{x:{floatfmt[idx]}}"
                for idx, x in enumerate(values)
            ]
            table.add_row(*row)
        console.print(table)
    else:
        console.print(df.to_string(col_space=0))
Пример #45
0
    def test_to_string(self):
        from pandas import read_table
        import re

        # big mixed
        biggie = DataFrame({
            'A': randn(200),
            'B': tm.makeStringIndex(200)
        },
                           index=range(200))

        biggie['A'][:20] = nan
        biggie['B'][:20] = nan
        s = biggie.to_string()

        buf = StringIO()
        retval = biggie.to_string(buf=buf)
        self.assert_(retval is None)
        self.assertEqual(buf.getvalue(), s)

        self.assert_(isinstance(s, basestring))

        # print in right order
        result = biggie.to_string(columns=['B', 'A'],
                                  col_space=17,
                                  float_format='%.5f'.__mod__)
        lines = result.split('\n')
        header = lines[0].strip().split()
        joined = '\n'.join([re.sub('\s+', ' ', x).strip() for x in lines[1:]])
        recons = read_table(StringIO(joined), names=header, sep=' ')
        tm.assert_series_equal(recons['B'], biggie['B'])
        self.assertEqual(recons['A'].count(), biggie['A'].count())
        self.assert_(
            (np.abs(recons['A'].dropna() - biggie['A'].dropna()) < 0.1).all())

        # expected = ['B', 'A']
        # self.assertEqual(header, expected)

        result = biggie.to_string(columns=['A'], col_space=17)
        header = result.split('\n')[0].strip().split()
        expected = ['A']
        self.assertEqual(header, expected)

        biggie.to_string(columns=['B', 'A'],
                         formatters={'A': lambda x: '%.1f' % x})

        biggie.to_string(columns=['B', 'A'], float_format=str)
        biggie.to_string(columns=['B', 'A'], col_space=12, float_format=str)

        frame = DataFrame(index=np.arange(200))
        frame.to_string()
Пример #46
0
 def test_to_string_left_justify_cols(self):
     fmt.reset_printoptions()
     df = DataFrame({'x': [3234, 0.253]})
     df_s = df.to_string(justify='left')
     expected = ('   x       \n' '0  3234.000\n' '1     0.253')
     assert (df_s == expected)
Пример #47
0
 def test_to_string_unicode_three(self):
     dm = DataFrame(['\xc2'])
     buf = StringIO()
     dm.to_string(buf)
Пример #48
0
    def test_dict_entries(self):
        df = DataFrame({'A': [{'a':1, 'b':2}]})

        val = df.to_string()
        self.assertTrue("{'a': 1, 'b': 2}" in val)
Пример #49
0
def print_report_cpfc(aim: Cpfc, real: Cpfc):
    report = report_cpfc(aim, real)
    df = DataFrame(columns=report[0], data=report[1:])
    print(df.to_string(index=False))
Пример #50
0
 def test_to_string_with_formatters_unicode(self):
     df = DataFrame({u'c/\u03c3': [1, 2, 3]})
     result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x})
     self.assertEqual(result, (u'  c/\u03c3\n' '0   1\n' '1   2\n' '2   3'))
Пример #51
0
def test_truncation_col_placement_no_index(max_cols, expected):
    df = DataFrame([[0] * 11] * 2)
    assert df.to_string(index=False, max_cols=max_cols).split("\n") == expected
Пример #52
0
    def test_frame_index_to_string(self):
        index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M")
        frame = DataFrame(np.random.randn(3, 4), index=index)

        # it works!
        frame.to_string()
Пример #53
0
 def writeDataFrameTextFile(self, filenameSuffix, df: pd.DataFrame):
     p = self.path(filenameSuffix, extensionToAdd="df.txt", validOtherExtensions="txt")
     self._log.info(f"Saving data frame text file {p}")
     with open(p, "w") as f:
         f.write(df.to_string())
     return p
Пример #54
0
def test_to_string_with_formatters_unicode():
    df = DataFrame({"c/\u03c3": [1, 2, 3]})
    result = df.to_string(formatters={"c/\u03c3": str})
    assert result == "  c/\u03c3\n" + "0   1\n1   2\n2   3"
Пример #55
0
def main():
    from pandas import DataFrame
    from vbench.api import BenchmarkRunner
    from vbench.db import BenchmarkDB
    from suite import REPO_PATH, BUILD, DB_PATH, PREPARE, dependencies, benchmarks

    if not args.base_commit:
        args.base_commit = BASELINE_COMMIT

    # GitRepo wants exactly 7 character hash?
    args.base_commit = args.base_commit[:7]
    if args.target_commit:
        args.target_commit = args.target_commit[:7]

    if not args.log_file:
        args.log_file = os.path.abspath(os.path.join(REPO_PATH,
                                                     'vb_suite.log'))

    TMP_DIR = tempfile.mkdtemp()
    prprint("TMP_DIR = %s" % TMP_DIR)
    prprint("LOG_FILE = %s\n" % args.log_file)

    try:
        logfile = open(args.log_file, 'w')

        prprint("Opening DB at '%s'...\n" % DB_PATH)
        db = BenchmarkDB(DB_PATH)

        prprint("Initializing Runner...")
        runner = BenchmarkRunner(
            benchmarks,
            REPO_PATH,
            REPO_PATH,
            BUILD,
            DB_PATH,
            TMP_DIR,
            PREPARE,
            always_clean=True,
            # run_option='eod', start_date=START_DATE,
            module_dependencies=dependencies)

        repo = runner.repo  # (steal the parsed git repo used by runner)

        # ARGH. reparse the repo, without discarding any commits,
        # then overwrite the previous parse results
        # prprint ("Slaughtering kittens..." )
        (repo.shas, repo.messages, repo.timestamps,
         repo.authors) = _parse_commit_log(REPO_PATH)

        h_head = args.target_commit or repo.shas[-1]
        h_baseline = args.base_commit

        prprint('Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, "")))
        prprint('Baseline [%s] : %s\n' %
                (h_baseline, repo.messages.get(h_baseline, "")))

        prprint("removing any previous measurements for the commits.")
        db.delete_rev_results(h_baseline)
        db.delete_rev_results(h_head)

        # TODO: we could skip this, but we need to make sure all
        # results are in the DB, which is a little tricky with
        # start dates and so on.
        prprint("Running benchmarks for baseline [%s]" % h_baseline)
        runner._run_and_write_results(h_baseline)

        prprint("Running benchmarks for target [%s]" % h_head)
        runner._run_and_write_results(h_head)

        prprint('Processing results...')

        head_res = get_results_df(db, h_head)
        baseline_res = get_results_df(db, h_baseline)
        ratio = head_res['timing'] / baseline_res['timing']
        totals = DataFrame(dict(t_head=head_res['timing'],
                                t_baseline=baseline_res['timing'],
                                ratio=ratio,
                                name=baseline_res.name),
                           columns=["t_head", "t_baseline", "ratio", "name"])
        totals = totals.ix[totals.t_head > args.min_duration]
        # ignore below threshold
        totals = totals.dropna().sort("ratio").set_index(
            'name')  # sort in ascending order

        s = "\n\nResults:\n"
        s += totals.to_string(
            float_format=lambda x: "{:4.4f}".format(x).rjust(10))
        s += "\n\n"
        s += "Columns: test_name | target_duration [ms] | baseline_duration [ms] | ratio\n\n"
        s += "- a Ratio of 1.30 means the target commit is 30% slower then the baseline.\n\n"

        s += 'Target [%s] : %s\n' % (h_head, repo.messages.get(h_head, ""))
        s += 'Baseline [%s] : %s\n\n' % (h_baseline,
                                         repo.messages.get(h_baseline, ""))

        logfile.write(s)
        logfile.close()

        prprint(s)
        prprint("Results were also written to the logfile at '%s'\n" %
                args.log_file)

    finally:
        #        print("Disposing of TMP_DIR: %s" % TMP_DIR)
        shutil.rmtree(TMP_DIR)
        logfile.close()
Пример #56
0
    def test_frame_index_to_string(self):
        index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M')
        frame = DataFrame(np.random.randn(3, 4), index=index)

        # it works!
        frame.to_string()
Пример #57
0
# build the dataframe
dict_cohort = {}
icol = 0
for i in colnames:
    dict_cohort[i] = list(occ_matrix[1:, icol])
    icol = icol + 1

cohort = DataFrame(dict_cohort, columns=colnames, index=rownames)
# we round the entries
cohort = cohort.round(2)
# and include the number of occurrences in each row as an integer
cohort[colnames[0]] = pd.to_numeric(cohort[colnames[0]], downcast='integer')

# we see the result
print(cohort.to_string())
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #

# export to latex
with open('myCohort.tex', 'w') as op:
    op.write(cohort.to_latex(bold_rows=True, index_names=True))

# check sum of numbers is at most as large as the number of ID's
check = cohort.loc[:, 'number']
check = check.sum()
print('\nThis should be <=', number_of_IDs, '\n')
print(check)

# check sum of percentages
check = cohort
check.drop('number', axis=1, inplace=True)
Пример #58
0
 def test_nonunicode_nonascii_alignment(self):
     df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]])
     rep_str = df.to_string()
     lines = rep_str.split('\n')
     self.assert_(len(lines[1]) == len(lines[2]))
Пример #59
0
class TestMultiLevel(unittest.TestCase):
    def setUp(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        self.frame = DataFrame(np.random.randn(10, 3),
                               index=index,
                               columns=Index(['A', 'B', 'C'], name='exp'))

        self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
                                       labels=[[0, 1, 2, 3]],
                                       names=['first'])

        # create test series object
        arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
        tuples = zip(*arrays)
        index = MultiIndex.from_tuples(tuples)
        s = Series(randn(8), index=index)
        s[3] = np.NaN
        self.series = s

        tm.N = 100
        self.tdf = tm.makeTimeDataFrame()
        self.ymd = self.tdf.groupby(
            [lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()

        # use Int64Index, to make sure things work
        self.ymd.index.levels = [
            lev.astype('i8') for lev in self.ymd.index.levels
        ]
        self.ymd.index.names = ['year', 'month', 'day']

    def test_append(self):
        a, b = self.frame[:5], self.frame[5:]

        result = a.append(b)
        tm.assert_frame_equal(result, self.frame)

        result = a['A'].append(b['A'])
        tm.assert_series_equal(result, self.frame['A'])

    def test_reindex_level(self):
        # axis=0
        month_sums = self.ymd.sum(level='month')
        result = month_sums.reindex(self.ymd.index, level=1)
        expected = self.ymd.groupby(level='month').transform(np.sum)

        assert_frame_equal(result, expected)

        # Series
        result = month_sums['A'].reindex(self.ymd.index, level=1)
        expected = self.ymd['A'].groupby(level='month').transform(np.sum)
        assert_series_equal(result, expected)

        # axis=1
        month_sums = self.ymd.T.sum(axis=1, level='month')
        result = month_sums.reindex(columns=self.ymd.index, level=1)
        expected = self.ymd.groupby(level='month').transform(np.sum).T
        assert_frame_equal(result, expected)

    def test_binops_level(self):
        def _check_op(opname):
            op = getattr(DataFrame, opname)
            month_sums = self.ymd.sum(level='month')
            result = op(self.ymd, month_sums, level='month')
            broadcasted = self.ymd.groupby(level='month').transform(np.sum)
            expected = op(self.ymd, broadcasted)
            assert_frame_equal(result, expected)

            # Series
            op = getattr(Series, opname)
            result = op(self.ymd['A'], month_sums['A'], level='month')
            broadcasted = self.ymd['A'].groupby(level='month').transform(
                np.sum)
            expected = op(self.ymd['A'], broadcasted)
            assert_series_equal(result, expected)

        _check_op('sub')
        _check_op('add')
        _check_op('mul')
        _check_op('div')

    def test_pickle(self):
        import cPickle

        def _test_roundtrip(frame):
            pickled = cPickle.dumps(frame)
            unpickled = cPickle.loads(pickled)
            assert_frame_equal(frame, unpickled)

        _test_roundtrip(self.frame)
        _test_roundtrip(self.frame.T)
        _test_roundtrip(self.ymd)
        _test_roundtrip(self.ymd.T)

    def test_reindex(self):
        reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]]
        expected = self.frame.ix[[0, 3]]
        assert_frame_equal(reindexed, expected)

    def test_reindex_preserve_levels(self):
        new_index = self.ymd.index[::10]
        chunk = self.ymd.reindex(new_index)
        self.assert_(chunk.index is new_index)

        chunk = self.ymd.ix[new_index]
        self.assert_(chunk.index is new_index)

        ymdT = self.ymd.T
        chunk = ymdT.reindex(columns=new_index)
        self.assert_(chunk.columns is new_index)

        chunk = ymdT.ix[:, new_index]
        self.assert_(chunk.columns is new_index)

    def test_sort_index_preserve_levels(self):
        result = self.frame.sort_index()
        self.assertEquals(result.index.names, self.frame.index.names)

    def test_repr_to_string(self):
        repr(self.frame)
        repr(self.ymd)
        repr(self.frame.T)
        repr(self.ymd.T)

        buf = StringIO()
        self.frame.to_string(buf=buf)
        self.ymd.to_string(buf=buf)
        self.frame.T.to_string(buf=buf)
        self.ymd.T.to_string(buf=buf)

    def test_getitem_simple(self):
        df = self.frame.T

        col = df['foo', 'one']
        assert_almost_equal(col.values, df.values[:, 0])
        self.assertRaises(KeyError, df.__getitem__, ('foo', 'four'))
        self.assertRaises(KeyError, df.__getitem__, 'foobar')

    def test_series_getitem(self):
        s = self.ymd['A']

        result = s[2000, 3]
        result2 = s.ix[2000, 3]
        expected = s.reindex(s.index[42:65])
        expected.index = expected.index.droplevel(0).droplevel(0)
        assert_series_equal(result, expected)

        result = s[2000, 3, 10]
        expected = s[49]
        self.assertEquals(result, expected)

        # fancy
        result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
        expected = s.reindex(s.index[49:51])
        assert_series_equal(result, expected)

        # key error
        self.assertRaises(KeyError, s.__getitem__, (2000, 3, 4))

    def test_series_getitem_corner(self):
        s = self.ymd['A']

        # don't segfault, GH #495
        # out of bounds access
        self.assertRaises(IndexError, s.__getitem__, len(self.ymd))

        # generator
        result = s[(x > 0 for x in s)]
        expected = s[s > 0]
        assert_series_equal(result, expected)

    def test_series_setitem(self):
        s = self.ymd['A']

        s[2000, 3] = np.nan
        self.assert_(isnull(s.values[42:65]).all())
        self.assert_(notnull(s.values[:42]).all())
        self.assert_(notnull(s.values[65:]).all())

        s[2000, 3, 10] = np.nan
        self.assert_(isnull(s[49]))

    def test_series_slice_partial(self):
        pass

    def test_frame_getitem_setitem_slice(self):
        # getitem
        result = self.frame.ix[:4]
        expected = self.frame[:4]
        assert_frame_equal(result, expected)

        # setitem
        cp = self.frame.copy()
        cp.ix[:4] = 0

        self.assert_((cp.values[:4] == 0).all())
        self.assert_((cp.values[4:] != 0).all())

    def test_frame_getitem_setitem_multislice(self):
        levels = [['t1', 't2'], ['a', 'b', 'c']]
        labels = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
        midx = MultiIndex(labels=labels, levels=levels, names=[None, 'id'])
        df = DataFrame({'value': [1, 2, 3, 7, 8]}, index=midx)

        result = df.ix[:, 'value']
        assert_series_equal(df['value'], result)

        result = df.ix[1:3, 'value']
        assert_series_equal(df['value'][1:3], result)

        result = df.ix[:, :]
        assert_frame_equal(df, result)

        result = df
        df.ix[:, 'value'] = 10
        result['value'] = 10
        assert_frame_equal(df, result)

        df.ix[:, :] = 10
        assert_frame_equal(df, result)

    def test_getitem_tuple_plus_slice(self):
        # GH #671
        df = DataFrame({
            'a': range(10),
            'b': range(10),
            'c': np.random.randn(10),
            'd': np.random.randn(10)
        })

        idf = df.set_index(['a', 'b'])

        result = idf.ix[(0, 0), :]
        expected = idf.ix[0, 0]
        expected2 = idf.xs((0, 0))

        assert_series_equal(result, expected)
        assert_series_equal(result, expected2)

    def test_xs(self):
        xs = self.frame.xs(('bar', 'two'))
        xs2 = self.frame.ix[('bar', 'two')]

        assert_series_equal(xs, xs2)
        assert_almost_equal(xs.values, self.frame.values[4])

    def test_xs_partial(self):
        result = self.frame.xs('foo')
        result2 = self.frame.ix['foo']
        expected = self.frame.T['foo'].T
        assert_frame_equal(result, expected)
        assert_frame_equal(result, result2)

    def test_xs_level(self):
        result = self.frame.xs('two', level='second')
        expected = self.frame[self.frame.index.get_level_values(1) == 'two']
        expected.index = expected.index.droplevel(1)

        assert_frame_equal(result, expected)

        index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'),
                                        ('p', 'q', 'r')])
        df = DataFrame(np.random.randn(3, 5), index=index)
        result = df.xs('c', level=2)
        expected = df[1:2]
        expected.index = expected.index.droplevel(2)
        assert_frame_equal(result, expected)

    def test_xs_level_multiple(self):
        from pandas import read_table
        from StringIO import StringIO
        text = """                      A       B       C       D        E
one two three   four
a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""

        df = read_table(StringIO(text), sep='\s+')

        result = df.xs(('a', 4), level=['one', 'four'])
        expected = df.xs('a').xs(4, level='four')
        assert_frame_equal(result, expected)

    def test_xs_level0(self):
        from pandas import read_table
        from StringIO import StringIO
        text = """                      A       B       C       D        E
one two three   four
a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""

        df = read_table(StringIO(text), sep='\s+')

        result = df.xs('a', level=0)
        expected = df.xs('a')
        self.assertEqual(len(result), 2)
        assert_frame_equal(result, expected)

    def test_xs_level_series(self):
        s = self.frame['A']
        result = s[:, 'two']
        expected = self.frame.xs('two', level=1)['A']
        assert_series_equal(result, expected)

        s = self.ymd['A']
        result = s[2000, 5]
        expected = self.ymd.ix[2000, 5]['A']
        assert_series_equal(result, expected)

        # not implementing this for now

        self.assertRaises(TypeError, s.__getitem__, (2000, slice(3, 4)))

        # result = s[2000, 3:4]
        # lv =s.index.get_level_values(1)
        # expected = s[(lv == 3) | (lv == 4)]
        # expected.index = expected.index.droplevel(0)
        # assert_series_equal(result, expected)

        # can do this though

    def test_get_loc_single_level(self):
        s = Series(np.random.randn(len(self.single_level)),
                   index=self.single_level)
        for k in self.single_level.values:
            s[k]

    def test_getitem_toplevel(self):
        df = self.frame.T

        result = df['foo']
        expected = df.reindex(columns=df.columns[:3])
        expected.columns = expected.columns.droplevel(0)
        assert_frame_equal(result, expected)

        result = df['bar']
        result2 = df.ix[:, 'bar']

        expected = df.reindex(columns=df.columns[3:5])
        expected.columns = expected.columns.droplevel(0)
        assert_frame_equal(result, expected)
        assert_frame_equal(result, result2)

    def test_getitem_setitem_slice_integers(self):
        index = MultiIndex(levels=[[0, 1, 2], [0, 2]],
                           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

        frame = DataFrame(np.random.randn(len(index), 4),
                          index=index,
                          columns=['a', 'b', 'c', 'd'])
        res = frame.ix[1:2]
        exp = frame.reindex(frame.index[2:])
        assert_frame_equal(res, exp)

        frame.ix[1:2] = 7
        self.assert_((frame.ix[1:2] == 7).values.all())

        series = Series(np.random.randn(len(index)), index=index)

        res = series.ix[1:2]
        exp = series.reindex(series.index[2:])
        assert_series_equal(res, exp)

        series.ix[1:2] = 7
        self.assert_((series.ix[1:2] == 7).values.all())

    def test_getitem_int(self):
        levels = [[0, 1], [0, 1, 2]]
        labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
        index = MultiIndex(levels=levels, labels=labels)

        frame = DataFrame(np.random.randn(6, 2), index=index)

        result = frame.ix[1]
        expected = frame[-3:]
        expected.index = expected.index.droplevel(0)
        assert_frame_equal(result, expected)

        # raises exception
        self.assertRaises(KeyError, frame.ix.__getitem__, 3)

        # however this will work
        result = self.frame.ix[2]
        expected = self.frame.xs(self.frame.index[2])
        assert_series_equal(result, expected)

    def test_getitem_partial(self):
        ymd = self.ymd.T
        result = ymd[2000, 2]

        expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1])
        expected.columns = expected.columns.droplevel(0).droplevel(0)
        assert_frame_equal(result, expected)

    def test_getitem_slice_not_sorted(self):
        df = self.frame.sortlevel(1).T

        # buglet with int typechecking
        result = df.ix[:, :np.int32(3)]
        expected = df.reindex(columns=df.columns[:3])
        assert_frame_equal(result, expected)

    def test_setitem_change_dtype(self):
        dft = self.frame.T
        s = dft['foo', 'two']
        dft['foo', 'two'] = s > s.median()
        assert_series_equal(dft['foo', 'two'], s > s.median())
        self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex))

        reindexed = dft.reindex(columns=[('foo', 'two')])
        assert_series_equal(reindexed['foo', 'two'], s > s.median())

    def test_frame_setitem_ix(self):
        self.frame.ix[('bar', 'two'), 'B'] = 5
        self.assertEquals(self.frame.ix[('bar', 'two'), 'B'], 5)

        # with integer labels
        df = self.frame.copy()
        df.columns = range(3)
        df.ix[('bar', 'two'), 1] = 7
        self.assertEquals(df.ix[('bar', 'two'), 1], 7)

    def test_fancy_slice_partial(self):
        result = self.frame.ix['bar':'baz']
        expected = self.frame[3:7]
        assert_frame_equal(result, expected)

        result = self.ymd.ix[(2000, 2):(2000, 4)]
        lev = self.ymd.index.labels[1]
        expected = self.ymd[(lev >= 1) & (lev <= 3)]
        assert_frame_equal(result, expected)

    def test_sortlevel(self):
        df = self.frame.copy()
        df.index = np.arange(len(df))
        self.assertRaises(Exception, df.sortlevel, 0)

        # axis=1

        # series
        a_sorted = self.frame['A'].sortlevel(0)
        self.assertRaises(Exception, self.frame.reset_index()['A'].sortlevel)

        # preserve names
        self.assertEquals(a_sorted.index.names, self.frame.index.names)

    def test_delevel_infer_dtype(self):
        tuples = [
            tuple
            for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])
        ]
        index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2'])
        df = DataFrame(np.random.randn(8, 3),
                       columns=['A', 'B', 'C'],
                       index=index)
        deleveled = df.reset_index()
        self.assert_(com.is_integer_dtype(deleveled['prm1']))
        self.assert_(com.is_float_dtype(deleveled['prm2']))

    def test_reset_index_with_drop(self):
        deleveled = self.ymd.reset_index(drop=True)
        self.assertEquals(len(deleveled.columns), len(self.ymd.columns))

        deleveled = self.series.reset_index()
        self.assert_(isinstance(deleveled, DataFrame))
        self.assert_(
            len(deleveled.columns) == len(self.series.index.levels) + 1)

        deleveled = self.series.reset_index(drop=True)
        self.assert_(isinstance(deleveled, Series))

    def test_sortlevel_by_name(self):
        self.frame.index.names = ['first', 'second']
        result = self.frame.sortlevel(level='second')
        expected = self.frame.sortlevel(level=1)
        assert_frame_equal(result, expected)

    def test_sortlevel_mixed(self):
        sorted_before = self.frame.sortlevel(1)

        df = self.frame.copy()
        df['foo'] = 'bar'
        sorted_after = df.sortlevel(1)
        assert_frame_equal(sorted_before, sorted_after.drop(['foo'], axis=1))

        dft = self.frame.T
        sorted_before = dft.sortlevel(1, axis=1)
        dft['foo', 'three'] = 'bar'

        sorted_after = dft.sortlevel(1, axis=1)
        assert_frame_equal(sorted_before.drop([('foo', 'three')], axis=1),
                           sorted_after.drop([('foo', 'three')], axis=1))

    def test_count_level(self):
        def _check_counts(frame, axis=0):
            index = frame._get_axis(axis)
            for i in range(index.nlevels):
                result = frame.count(axis=axis, level=i)
                expected = frame.groupby(axis=axis, level=i).count(axis=axis)
                expected = expected.reindex_like(result).astype('i8')
                assert_frame_equal(result, expected)

        self.frame.ix[1, [1, 2]] = np.nan
        self.frame.ix[7, [0, 1]] = np.nan
        self.ymd.ix[1, [1, 2]] = np.nan
        self.ymd.ix[7, [0, 1]] = np.nan

        _check_counts(self.frame)
        _check_counts(self.ymd)
        _check_counts(self.frame.T, axis=1)
        _check_counts(self.ymd.T, axis=1)

        # can't call with level on regular DataFrame
        df = tm.makeTimeDataFrame()
        self.assertRaises(Exception, df.count, level=0)

        self.frame['D'] = 'foo'
        result = self.frame.count(level=0, numeric_only=True)
        assert_almost_equal(result.columns, ['A', 'B', 'C'])

    def test_count_level_series(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz'],
                                   ['one', 'two', 'three', 'four']],
                           labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]])

        s = Series(np.random.randn(len(index)), index=index)

        result = s.count(level=0)
        expected = s.groupby(level=0).count()
        assert_series_equal(result.astype('f8'),
                            expected.reindex(result.index).fillna(0))

        result = s.count(level=1)
        expected = s.groupby(level=1).count()
        assert_series_equal(result.astype('f8'),
                            expected.reindex(result.index).fillna(0))

    def test_count_level_corner(self):
        s = self.frame['A'][:0]
        result = s.count(level=0)
        expected = Series(0, index=s.index.levels[0])
        assert_series_equal(result, expected)

        df = self.frame[:0]
        result = df.count(level=0)
        expected = DataFrame({}, index=s.index.levels[0],
                             columns=df.columns).fillna(0).astype(int)
        assert_frame_equal(result, expected)

    def test_unstack(self):
        # just check that it works for now
        unstacked = self.ymd.unstack()
        unstacked2 = unstacked.unstack()

        # test that ints work
        unstacked = self.ymd.astype(int).unstack()

    def test_stack(self):
        # regular roundtrip
        unstacked = self.ymd.unstack()
        restacked = unstacked.stack()
        assert_frame_equal(restacked, self.ymd)

        unlexsorted = self.ymd.sortlevel(2)

        unstacked = unlexsorted.unstack(2)
        restacked = unstacked.stack()
        assert_frame_equal(restacked.sortlevel(0), self.ymd)

        unlexsorted = unlexsorted[::-1]
        unstacked = unlexsorted.unstack(1)
        restacked = unstacked.stack().swaplevel(1, 2)
        assert_frame_equal(restacked.sortlevel(0), self.ymd)

        unlexsorted = unlexsorted.swaplevel(0, 1)
        unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1)
        restacked = unstacked.stack(0).swaplevel(1, 2)
        assert_frame_equal(restacked.sortlevel(0), self.ymd)

        # columns unsorted
        unstacked = self.ymd.unstack()
        unstacked = unstacked.sort(axis=1, ascending=False)
        restacked = unstacked.stack()
        assert_frame_equal(restacked, self.ymd)

        # more than 2 levels in the columns
        unstacked = self.ymd.unstack(1).unstack(1)

        result = unstacked.stack(1)
        expected = self.ymd.unstack()
        assert_frame_equal(result, expected)

        result = unstacked.stack(2)
        expected = self.ymd.unstack(1)
        assert_frame_equal(result, expected)

        result = unstacked.stack(0)
        expected = self.ymd.stack().unstack(1).unstack(1)
        assert_frame_equal(result, expected)

        # not all levels present in each echelon
        unstacked = self.ymd.unstack(2).ix[:, ::3]
        stacked = unstacked.stack().stack()
        ymd_stacked = self.ymd.stack()
        assert_series_equal(stacked, ymd_stacked.reindex(stacked.index))

        # stack with negative number
        result = self.ymd.unstack(0).stack(-2)
        expected = self.ymd.unstack(0).stack(0)

    def test_stack_mixed_dtype(self):
        df = self.frame.T
        df['foo', 'four'] = 'foo'
        df = df.sortlevel(1, axis=1)

        stacked = df.stack()
        assert_series_equal(stacked['foo'], df['foo'].stack())
        self.assert_(stacked['bar'].dtype == np.float_)

    def test_unstack_bug(self):
        df = DataFrame({
            'state': ['naive', 'naive', 'naive', 'activ', 'activ', 'activ'],
            'exp': ['a', 'b', 'b', 'b', 'a', 'a'],
            'barcode': [1, 2, 3, 4, 1, 3],
            'v': ['hi', 'hi', 'bye', 'bye', 'bye', 'peace'],
            'extra':
            np.arange(6.)
        })

        result = df.groupby(['state', 'exp', 'barcode', 'v']).apply(len)

        unstacked = result.unstack()
        restacked = unstacked.stack()
        assert_series_equal(restacked,
                            result.reindex(restacked.index).astype(float))

    def test_stack_unstack_preserve_names(self):
        unstacked = self.frame.unstack()
        self.assertEquals(unstacked.index.name, 'first')
        self.assertEquals(unstacked.columns.names, ['exp', 'second'])

        restacked = unstacked.stack()
        self.assertEquals(restacked.index.names, self.frame.index.names)

    def test_unstack_level_name(self):
        result = self.frame.unstack('second')
        expected = self.frame.unstack(level=1)
        assert_frame_equal(result, expected)

    def test_stack_level_name(self):
        unstacked = self.frame.unstack('second')
        result = unstacked.stack('exp')
        expected = self.frame.unstack().stack(0)
        assert_frame_equal(result, expected)

        result = self.frame.stack('exp')
        expected = self.frame.stack()
        assert_series_equal(result, expected)

    def test_stack_unstack_multiple(self):
        unstacked = self.ymd.unstack(['year', 'month'])
        expected = self.ymd.unstack('year').unstack('month')
        assert_frame_equal(unstacked, expected)
        self.assertEquals(unstacked.columns.names, expected.columns.names)

        # series
        s = self.ymd['A']
        s_unstacked = s.unstack(['year', 'month'])
        assert_frame_equal(s_unstacked, expected['A'])

        restacked = unstacked.stack(['year', 'month'])
        restacked = restacked.swaplevel(0, 1).swaplevel(1, 2)
        restacked = restacked.sortlevel(0)

        assert_frame_equal(restacked, self.ymd)
        self.assertEquals(restacked.index.names, self.ymd.index.names)

        # GH #451
        unstacked = self.ymd.unstack([1, 2])
        expected = self.ymd.unstack(1).unstack(1)
        assert_frame_equal(unstacked, expected)

        unstacked = self.ymd.unstack([2, 1])
        expected = self.ymd.unstack(2).unstack(1)
        assert_frame_equal(unstacked, expected)

    def test_groupby_transform(self):
        s = self.frame['A']
        grouper = s.index.get_level_values(0)

        grouped = s.groupby(grouper)

        applied = grouped.apply(lambda x: x * 2)
        expected = grouped.transform(lambda x: x * 2)
        assert_series_equal(applied.reindex(expected.index), expected)

    def test_groupby_corner(self):
        midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']],
                          labels=[[0], [0], [0]],
                          names=['one', 'two', 'three'])
        df = DataFrame([np.random.rand(4)],
                       columns=['a', 'b', 'c', 'd'],
                       index=midx)
        # should work
        df.groupby(level='three')

    def test_join(self):
        a = self.frame.ix[:5, ['A']]
        b = self.frame.ix[2:, ['B', 'C']]

        joined = a.join(b, how='outer').reindex(self.frame.index)
        expected = self.frame.copy()
        expected.values[np.isnan(joined.values)] = np.nan

        self.assert_(not np.isnan(joined.values).all())

        assert_frame_equal(joined, expected)

    def test_swaplevel(self):
        swapped = self.frame['A'].swaplevel(0, 1)
        swapped2 = self.frame['A'].swaplevel('first', 'second')
        self.assert_(not swapped.index.equals(self.frame.index))
        assert_series_equal(swapped, swapped2)

        back = swapped.swaplevel(0, 1)
        back2 = swapped.swaplevel('second', 'first')
        self.assert_(back.index.equals(self.frame.index))
        assert_series_equal(back, back2)

        ft = self.frame.T
        swapped = ft.swaplevel('first', 'second', axis=1)
        exp = self.frame.swaplevel('first', 'second').T
        assert_frame_equal(swapped, exp)

    def test_swaplevel_panel(self):
        panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2})

        result = panel.swaplevel(0, 1, axis='major')
        expected = panel.copy()
        expected.major_axis = expected.major_axis.swaplevel(0, 1)
        tm.assert_panel_equal(result, expected)

    def test_reorder_levels(self):
        result = self.ymd.reorder_levels(['month', 'day', 'year'])
        expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2)
        assert_frame_equal(result, expected)

        result = self.ymd['A'].reorder_levels(['month', 'day', 'year'])
        expected = self.ymd['A'].swaplevel(0, 1).swaplevel(1, 2)
        assert_series_equal(result, expected)

        result = self.ymd.T.reorder_levels(['month', 'day', 'year'], axis=1)
        expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1)
        assert_frame_equal(result, expected)

        self.assertRaises(Exception, self.ymd.index.reorder_levels, [1, 2, 3])

    def test_insert_index(self):
        df = self.ymd[:5].T
        df[2000, 1, 10] = df[2000, 1, 7]
        self.assert_(isinstance(df.columns, MultiIndex))
        self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all())

    def test_alignment(self):
        x = Series(data=[1, 2, 3],
                   index=MultiIndex.from_tuples([("A", 1), ("A", 2),
                                                 ("B", 3)]))

        y = Series(data=[4, 5, 6],
                   index=MultiIndex.from_tuples([("Z", 1), ("Z", 2),
                                                 ("B", 3)]))

        res = x - y
        exp_index = x.index.union(y.index)
        exp = x.reindex(exp_index) - y.reindex(exp_index)
        assert_series_equal(res, exp)

        # hit non-monotonic code path
        res = x[::-1] - y[::-1]
        exp_index = x.index.union(y.index)
        exp = x.reindex(exp_index) - y.reindex(exp_index)
        assert_series_equal(res, exp)

    def test_is_lexsorted(self):
        levels = [[0, 1], [0, 1, 2]]

        index = MultiIndex(levels=levels,
                           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
        self.assert_(index.is_lexsorted())

        index = MultiIndex(levels=levels,
                           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]])
        self.assert_(not index.is_lexsorted())

        index = MultiIndex(levels=levels,
                           labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]])
        self.assert_(not index.is_lexsorted())
        self.assert_(index.lexsort_depth == 0)

    def test_frame_getitem_view(self):
        df = self.frame.T
        df['foo'].values[:] = 0
        self.assert_((df['foo'].values == 0).all())

        # but not if it's mixed-type
        df['foo', 'four'] = 'foo'
        df = df.sortlevel(0, axis=1)
        df['foo']['one'] = 2
        self.assert_((df['foo', 'one'] == 0).all())

    def test_frame_getitem_not_sorted(self):
        df = self.frame.T
        df['foo', 'four'] = 'foo'

        arrays = [np.array(x) for x in zip(*df.columns.get_tuple_index())]

        result = df['foo']
        result2 = df.ix[:, 'foo']
        expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
        expected.columns = expected.columns.droplevel(0)
        assert_frame_equal(result, expected)
        assert_frame_equal(result2, expected)

        df = df.T
        result = df.xs('foo')
        result2 = df.ix['foo']
        expected = df.reindex(df.index[arrays[0] == 'foo'])
        expected.index = expected.index.droplevel(0)
        assert_frame_equal(result, expected)
        assert_frame_equal(result2, expected)

    def test_series_getitem_not_sorted(self):
        arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
        tuples = zip(*arrays)
        index = MultiIndex.from_tuples(tuples)
        s = Series(randn(8), index=index)

        arrays = [np.array(x) for x in zip(*index.get_tuple_index())]

        result = s['qux']
        result2 = s.ix['qux']
        expected = s[arrays[0] == 'qux']
        expected.index = expected.index.droplevel(0)
        assert_series_equal(result, expected)
        assert_series_equal(result2, expected)

    AGG_FUNCTIONS = [
        'sum', 'prod', 'min', 'max', 'median', 'mean', 'skew', 'mad', 'std',
        'var'
    ]

    def test_series_group_min_max(self):
        for op, level, skipna in cart_product(self.AGG_FUNCTIONS, range(2),
                                              [False, True]):
            grouped = self.series.groupby(level=level)
            aggf = lambda x: getattr(x, op)(skipna=skipna)
            # skipna=True
            leftside = grouped.agg(aggf)
            rightside = getattr(self.series, op)(level=level, skipna=skipna)
            assert_series_equal(leftside, rightside)

    def test_frame_group_ops(self):
        self.frame.ix[1, [1, 2]] = np.nan
        self.frame.ix[7, [0, 1]] = np.nan

        for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS,
                                                    range(2), range(2),
                                                    [False, True]):
            if axis == 0:
                frame = self.frame
            else:
                frame = self.frame.T

            grouped = frame.groupby(level=level, axis=axis)

            aggf = lambda x: getattr(x, op)(skipna=skipna, axis=axis)
            leftside = grouped.agg(aggf)
            rightside = getattr(frame, op)(level=level,
                                           axis=axis,
                                           skipna=skipna)

            # for good measure, groupby detail
            level_index = frame._get_axis(axis).levels[level]

            self.assert_(leftside._get_axis(axis).equals(level_index))
            self.assert_(rightside._get_axis(axis).equals(level_index))

            assert_frame_equal(leftside, rightside)

    def test_frame_series_agg_multiple_levels(self):
        result = self.ymd.sum(level=['year', 'month'])
        expected = self.ymd.groupby(level=['year', 'month']).sum()
        assert_frame_equal(result, expected)

        result = self.ymd['A'].sum(level=['year', 'month'])
        expected = self.ymd['A'].groupby(level=['year', 'month']).sum()
        assert_series_equal(result, expected)

    def test_groupby_multilevel(self):
        result = self.ymd.groupby(level=[0, 1]).mean()

        k1 = self.ymd.index.get_level_values(0)
        k2 = self.ymd.index.get_level_values(1)

        expected = self.ymd.groupby([k1, k2]).mean()

        assert_frame_equal(result, expected)
        self.assertEquals(result.index.names, self.ymd.index.names[:2])

        result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean()
        assert_frame_equal(result, result2)

    def test_groupby_multilevel_with_transform(self):
        pass

    def test_multilevel_consolidate(self):
        index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'),
                                        ('bar', 'one'), ('bar', 'two')])
        df = DataFrame(np.random.randn(4, 4), index=index, columns=index)
        df['Totals', ''] = df.sum(1)
        df = df.consolidate()

    def test_ix_preserve_names(self):
        result = self.ymd.ix[2000]
        result2 = self.ymd['A'].ix[2000]
        self.assertEquals(result.index.names, self.ymd.index.names[1:])
        self.assertEquals(result2.index.names, self.ymd.index.names[1:])

        result = self.ymd.ix[2000, 2]
        result2 = self.ymd['A'].ix[2000, 2]
        self.assertEquals(result.index.name, self.ymd.index.names[2])
        self.assertEquals(result2.index.name, self.ymd.index.names[2])

    def test_partial_set(self):
        # GH #397
        df = self.ymd.copy()
        exp = self.ymd.copy()
        df.ix[2000, 4] = 0
        exp.ix[2000, 4].values[:] = 0
        assert_frame_equal(df, exp)

        df['A'].ix[2000, 4] = 1
        exp['A'].ix[2000, 4].values[:] = 1
        assert_frame_equal(df, exp)

        df.ix[2000] = 5
        exp.ix[2000].values[:] = 5
        assert_frame_equal(df, exp)

        # this works...for now
        df['A'].ix[14] = 5
        self.assertEquals(df['A'][14], 5)

    def test_unstack_preserve_types(self):
        # GH #403
        self.ymd['E'] = 'foo'
        self.ymd['F'] = 2

        unstacked = self.ymd.unstack('month')
        self.assert_(unstacked['A', 1].dtype == np.float64)
        self.assert_(unstacked['E', 1].dtype == np.object_)
        self.assert_(unstacked['F', 1].dtype == np.float64)

    def test_getitem_lowerdim_corner(self):
        self.assertRaises(KeyError, self.frame.ix.__getitem__,
                          (('bar', 'three'), 'B'))

        self.assertRaises(KeyError, self.frame.ix.__setitem__,
                          (('bar', 'three'), 'B'), 0)

    #----------------------------------------------------------------------
    # AMBIGUOUS CASES!

    def test_partial_ix_missing(self):
        raise nose.SkipTest

        result = self.ymd.ix[2000, 0]
        expected = self.ymd.ix[2000]['A']
        assert_series_equal(result, expected)

        # need to put in some work here

        # self.ymd.ix[2000, 0] = 0
        # self.assert_((self.ymd.ix[2000]['A'] == 0).all())

        self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6))
        self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0)

    def test_fancy_2d(self):
        raise nose.SkipTest

        result = self.frame.ix['foo', 'B']
        expected = self.frame.xs('foo')['B']
        assert_series_equal(result, expected)

        ft = self.frame.T
        result = ft.ix['B', 'foo']
        expected = ft.xs('B')['foo']
        assert_series_equal(result, expected)

    #----------------------------------------------------------------------

    def test_to_html(self):
        self.ymd.columns.name = 'foo'
        self.ymd.to_html()
        self.ymd.T.to_html()

    def test_level_with_tuples(self):
        index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0),
                                    ('foo', 'qux', 0)], [0, 1]],
                           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

        series = Series(np.random.randn(6), index=index)
        frame = DataFrame(np.random.randn(6, 4), index=index)

        result = series[('foo', 'bar', 0)]
        result2 = series.ix[('foo', 'bar', 0)]
        expected = series[:2]
        expected.index = expected.index.droplevel(0)
        assert_series_equal(result, expected)
        assert_series_equal(result2, expected)

        self.assertRaises(KeyError, series.__getitem__, (('foo', 'bar', 0), 2))

        result = frame.ix[('foo', 'bar', 0)]
        result2 = frame.xs(('foo', 'bar', 0))
        expected = frame[:2]
        expected.index = expected.index.droplevel(0)
        assert_frame_equal(result, expected)
        assert_frame_equal(result2, expected)

        index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'),
                                    ('foo', 'qux')], [0, 1]],
                           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]])

        series = Series(np.random.randn(6), index=index)
        frame = DataFrame(np.random.randn(6, 4), index=index)

        result = series[('foo', 'bar')]
        result2 = series.ix[('foo', 'bar')]
        expected = series[:2]
        expected.index = expected.index.droplevel(0)
        assert_series_equal(result, expected)
        assert_series_equal(result2, expected)

        result = frame.ix[('foo', 'bar')]
        result2 = frame.xs(('foo', 'bar'))
        expected = frame[:2]
        expected.index = expected.index.droplevel(0)
        assert_frame_equal(result, expected)
        assert_frame_equal(result2, expected)

    def test_int_series_slicing(self):
        s = self.ymd['A']
        result = s[5:]
        expected = s.reindex(s.index[5:])
        assert_series_equal(result, expected)

        exp = self.ymd['A'].copy()
        s[5:] = 0
        exp.values[5:] = 0
        self.assert_(np.array_equal(s.values, exp.values))

        result = self.ymd[5:]
        expected = self.ymd.reindex(s.index[5:])
        assert_frame_equal(result, expected)

    def test_mixed_depth_get(self):
        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                  ['', 'wx', 'wy', '', '', '']]

        tuples = zip(*arrays)
        tuples.sort()
        index = MultiIndex.from_tuples(tuples)
        df = DataFrame(randn(4, 6), columns=index)

        result = df['a']
        expected = df['a', '', '']
        assert_series_equal(result, expected)
        self.assertEquals(result.name, 'a')

        result = df['routine1', 'result1']
        expected = df['routine1', 'result1', '']
        assert_series_equal(result, expected)
        self.assertEquals(result.name, ('routine1', 'result1'))

    def test_mixed_depth_insert(self):
        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                  ['', 'wx', 'wy', '', '', '']]

        tuples = zip(*arrays)
        tuples.sort()
        index = MultiIndex.from_tuples(tuples)
        df = DataFrame(randn(4, 6), columns=index)

        result = df.copy()
        expected = df.copy()
        result['b'] = [1, 2, 3, 4]
        expected['b', '', ''] = [1, 2, 3, 4]
        assert_frame_equal(result, expected)

    def test_mixed_depth_drop(self):
        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                  ['', 'wx', 'wy', '', '', '']]

        tuples = zip(*arrays)
        tuples.sort()
        index = MultiIndex.from_tuples(tuples)
        df = DataFrame(randn(4, 6), columns=index)

        result = df.drop('a', axis=1)
        expected = df.drop([('a', '', '')], axis=1)
        assert_frame_equal(expected, result)

        result = df.drop(['top'], axis=1)
        expected = df.drop([('top', 'OD', 'wx')], axis=1)
        expected = expected.drop([('top', 'OD', 'wy')], axis=1)
        assert_frame_equal(expected, result)

    def test_mixed_depth_pop(self):
        arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                  ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                  ['', 'wx', 'wy', '', '', '']]

        tuples = zip(*arrays)
        tuples.sort()
        index = MultiIndex.from_tuples(tuples)
        df = DataFrame(randn(4, 6), columns=index)

        df1 = df.copy()
        df2 = df.copy()
        result = df1.pop('a')
        expected = df2.pop(('a', '', ''))
        assert_series_equal(expected, result)
        assert_frame_equal(df1, df2)
        self.assertEquals(result.name, 'a')

        expected = df1['top']
        df1 = df1.drop(['top'], axis=1)
        result = df2.pop('top')
        assert_frame_equal(expected, result)
        assert_frame_equal(df1, df2)
Пример #60
0
            Query_Classificiation = pd.concat(
                [Query_Classificiation, Query_temp_frame])

        else:
            type = 'PuUbiq'
            hit_data_1 = list(chr_dict.values())[0][0]
            Query_class_list = [[
                query, type, unique_hit_locations, 'Not assessed'
            ]]
            Query_temp_frame = DataFrame(Query_class_list,
                                         columns=classification_cols)
            Query_Classificiation = pd.concat(
                [Query_Classificiation, Query_temp_frame])
        current_pct = ((tracker_count / total) * 100)
        update = '%dpct done, on record %d of %d.' % (current_pct,
                                                      tracker_count, total)
        print(update)

print('Writing output files')

Location_by_hit.to_string()
Query_Classificiation.to_string()

Hit_location_out = 'Hit_Locations_' + args.input
Location_by_hit.to_csv(Hit_location_out, sep='\t', index=False)

Query_Classificiation_out = 'Hit_Classifications_' + args.input
Query_Classificiation.to_csv(Query_Classificiation_out, sep='\t', index=False)

print('Done!')