Python DataFrame.to_json示例，pandas.DataFrame.to_json Python示例

示例#1

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [
            [10, 1, "foo", 0.1, 0.01],
            [20, 2, "bar", 0.2, 0.02],
            [30, 3, "baz", 0.3, 0.03],
            [40, 4, "qux", 0.4, 0.04],
        ]

        df = DataFrame(vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ["split", "index", "columns"]:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient="records")
        left = read_json(inp, orient="records", convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient="values")
        left = read_json(inp, orient="values", convert_axes=False)
        assert_frame_equal(left, right)

示例#2

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_frame_non_unique_columns(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")
        self.assertRaises(ValueError, df.to_json, orient="records")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split", dtype=False))
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"])
        result = read_json(df.to_json(orient="split"), orient="split")
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"])
            assert_frame_equal(result, df)

        for o in [
            [["a", "b"], ["c", "d"]],
            [[1.5, 2.5], [3.5, 4.5]],
            [[1, 2.5], [3, 4.5]],
            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
        ]:
            _check(DataFrame(o, index=[1, 2], columns=["x", "x"]))

示例#3

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit="ms")

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, "timedelta64[ns]")
        # index will be float dtype
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter), check_index_type=False)

        s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float))
        self.assertEqual(s.dtype, "timedelta64[ns]")
        assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter))

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, "timedelta64[ns]")
        assert_frame_equal(
            frame, pd.read_json(frame.to_json()).apply(converter), check_index_type=False, check_column_type=False
        )

        frame = DataFrame(
            {
                "a": [timedelta(days=23), timedelta(seconds=5)],
                "b": [1, 2],
                "c": pd.date_range(start="20130101", periods=2),
            }
        )

        result = pd.read_json(frame.to_json(date_unit="ns"))
        result["a"] = pd.to_timedelta(result.a, unit="ns")
        result["c"] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result, check_index_type=False)

示例#4

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [[10, 1, 'foo', .1, .01],
                [20, 2, 'bar', .2, .02],
                [30, 3, 'baz', .3, .03],
                [40, 4, 'qux', .4, .04]]

        df = DataFrame(vals, index=list('abcd'),
                       columns=['1st', '2nd', '3rd', '4th', '5th'])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ['split', 'index', 'columns']:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient='records')
        left = read_json(inp, orient='records', convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient='values')
        left = read_json(inp, orient='values', convert_axes=False)
        assert_frame_equal(left, right)

示例#5

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

    def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit='ms')

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, 'timedelta64[ns]')

        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        s = Series([timedelta(23), timedelta(seconds=5)],
                   index=pd.Index([0, 1]))
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        result = pd.read_json(s.to_json(), typ='series').apply(converter)
        assert_series_equal(result, s)

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, 'timedelta64[ns]')
        assert_frame_equal(frame, pd.read_json(frame.to_json())
                           .apply(converter))

        frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
                           'b': [1, 2],
                           'c': pd.date_range(start='20130101', periods=2)})

        result = pd.read_json(frame.to_json(date_unit='ns'))
        result['a'] = pd.to_timedelta(result.a, unit='ns')
        result['c'] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result)

示例#6

0

显示文件

文件： main.py 项目： gfyoung/Tamr-Hackathon-2015

def convertToPutJson(csv_file):
    df = cleanColumns(read_csv(csv_file))
    putColumns = ["method", "recordId", "body"]
    putDf = DataFrame(columns = putColumns)

    for recordId in df.index:
        print "Converting data for recordId {recordId}...".format(recordId = recordId)
        body = {}
        
        for col in df.columns:
            body[str(col).strip()] = [str(df[col][recordId]).strip()]
        
        putDfRow = DataFrame([["PUT", str(recordId), body]], columns = putColumns)
        putDf = putDf.append(putDfRow)
    
    json_file = sub("csv|txt", "json", csv_file)
    putDf.to_json(json_file, orient="records")

    with open(json_file, 'r') as target:
        putData = target.read()

    target = open(json_file, 'w')
    putData = putData.replace("},{", "}\n\n{")[1:-1]
    target.write(putData)
    target.close()

    print "Successfully created put data!"
    return json_file

示例#7

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_frame_non_unique_columns(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2],
                       columns=['x', 'x'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')
        self.assertRaises(ValueError, df.to_json, orient='records')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split', dtype=False))
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([['a','b'],['c','d']], index=[1,2], columns=['x','y'])
        result = read_json(df.to_json(orient='split'), orient='split')
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient='split'), orient='split',
                               convert_dates=['x'])
            assert_frame_equal(result, df)

        for o in [[['a','b'],['c','d']],
                  [[1.5,2.5],[3.5,4.5]],
                  [[1,2.5],[3,4.5]],
                  [[Timestamp('20130101'),3.5],[Timestamp('20130102'),4.5]]]:
            _check(DataFrame(o, index=[1,2], columns=['x','x']))

示例#8

0

显示文件

文件： json.py 项目： BobMcFry/pandas

 def setup(self, index):
     N = 100000
     indexes = {'int': np.arange(N),
                'datetime': date_range('20000101', periods=N, freq='H')}
     df = DataFrame(np.random.randn(N, 5),
                    columns=['float_{}'.format(i) for i in range(5)],
                    index=indexes[index])
     df.to_json(self.fname, orient='records', lines=True)

示例#9

0

显示文件

文件： test_pandas.py 项目： ARF1/pandas

    def test_frame_empty(self):
        df = DataFrame(columns=['jim', 'joe'])
        self.assertFalse(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json()), df)

        # mixed type
        df['joe'] = df['joe'].astype('i8')
        self.assertTrue(df._is_mixed_type)
        assert_frame_equal(read_json(df.to_json()), df)

示例#10

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

    def test_data_frame_size_after_to_json(self):
        # GH15344
        df = DataFrame({'a': [str(1)]})

        size_before = df.memory_usage(index=True, deep=True).sum()
        df.to_json()
        size_after = df.memory_usage(index=True, deep=True).sum()

        self.assertEqual(size_before, size_after)

示例#11

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_frame_double_encoded_labels(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=['index " 1', "index / 2"], columns=["a \\ b", "y / z"])

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        assert_frame_equal(df, read_json(df.to_json(orient="columns"), orient="columns"))
        assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index"))
        df_unser = read_json(df.to_json(orient="records"), orient="records")
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)

示例#12

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C'])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)

示例#13

0

显示文件

文件： test_pandas.py 项目： ChristopherShort/pandas

    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        assert_frame_equal(result, df)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)

示例#14

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_frame_non_unique_index(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split"))
        unser = read_json(df.to_json(orient="records"), orient="records")
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)

示例#15

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        self.assertEqual(result.index.dtype, np.float64)
        self.assertEqual(result.columns.dtype, np.float64)
        assert_frame_equal(result, df, check_index_type=False, check_column_type=False)

        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"])
        result = read_json(df.to_json())
        assert_frame_equal(result, df)

示例#16

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

    def test_categorical(self):
        # GH4377 df.to_json segfaults with non-ndarray blocks
        df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]})
        df["B"] = df["A"]
        expected = df.to_json()

        df["B"] = df["A"].astype('category')
        self.assertEqual(expected, df.to_json())

        s = df["A"]
        sc = df["B"]
        self.assertEqual(s.to_json(), sc.to_json())

示例#17

0

显示文件

文件： test_pandas.py 项目： Winterflower/pandas

    def test_to_jsonl(self):
        # GH9180
        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
        self.assertEqual(result, expected)

        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
        result = df.to_json(orient="records", lines=True)
        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
        self.assertEqual(result, expected)
        assert_frame_equal(pd.read_json(result, lines=True), df)

示例#18

0

显示文件

文件： read_surveypages.py 项目： jonrobinson2/economist_poll

def main():

    # Get links to survey pages
    home_url = "http://www.igmchicago.org/igm-economic-experts-panel"
    home_contents = get_page_contents(home_url)
    urls = re.findall(
        r"<h2><a href=\"(\S+?results\?SurveyID=\S+?)\"", home_contents)
    urls = ["http://www.igmchicago.org" + url for url in urls]

    # Loop through survey pages
    df = DataFrame()
    question_count = 0
    for url in reversed(urls):

        contents = get_page_contents(url)

        questions = re.findall(r"surveyQuestion\">([\s\S]+?)</h3>", contents)
        responder_list = re.findall(
            r"\?id=([\d]+)?\">([\s\w.]+?)</a>", contents)

        responses = re.findall(
            r"<span class=\"option-[\d]+?\">([\s\w.]+?)</span>", contents)
        num_responders = len(responses) / len(questions)

        # Loop through sub-questions (A, B, etc) within each page
        for i, question in enumerate(questions):
            question = clean_string(question)
            question_count += 1
            print(question)

            # Restrict range to responses for this sub-question
            rng = (i * num_responders, (i + 1) * num_responders)

            # Collect sub-question, its url suffix, and the responses
            prefix = "(%03d" % question_count + ") "
            q_responses = Series(
                responses[rng[0]:rng[1]], index=responder_list[rng[0]:rng[1]])
            q_url_suffix = re.findall("=(.+)", url)[0]
            q_responses = q_responses.append(
                Series([q_url_suffix], index=['q_url_suffix']))
            q_responses.name = prefix + question.strip()

            # Add question data to dataframe
            df = df.join(q_responses, how='outer')

    # Move responder id from index to column, only after all joins are complete
    df['responder_id'] = [pair[0] for pair in df.index]
    df.index = [pair[1] if type(pair) == tuple else pair for pair in df.index]

    # Write to file
    df.to_json("survey_results.json")

示例#19

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        self.assertRaises(ValueError, df.to_json, orient='index')
        self.assertRaises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        self.assertTrue(df.columns.equals(unser.columns))
        np.testing.assert_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        np.testing.assert_equal(df.values, unser.values)

示例#20

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_frame_double_encoded_labels(self):
        df = DataFrame([['a', 'b'], ['c', 'd']],
                       index=['index " 1', 'index / 2'],
                       columns=['a \\ b', 'y / z'])

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        assert_frame_equal(df, read_json(df.to_json(orient='columns'),
                                         orient='columns'))
        assert_frame_equal(df, read_json(df.to_json(orient='index'),
                                         orient='index'))
        df_unser = read_json(df.to_json(orient='records'), orient='records')
        assert_index_equal(df.columns, df_unser.columns)
        np.testing.assert_equal(df.values, df_unser.values)

示例#21

0

显示文件

文件： test_pandas.py 项目： Jengel1/SunriseSunsetTimeFinder

    def test_frame_non_unique_index(self):
        df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1],
                       columns=['x', 'y'])

        pytest.raises(ValueError, df.to_json, orient='index')
        pytest.raises(ValueError, df.to_json, orient='columns')

        assert_frame_equal(df, read_json(df.to_json(orient='split'),
                                         orient='split'))
        unser = read_json(df.to_json(orient='records'), orient='records')
        tm.assert_index_equal(df.columns, unser.columns)
        tm.assert_almost_equal(df.values, unser.values)
        unser = read_json(df.to_json(orient='values'), orient='values')
        tm.assert_numpy_array_equal(df.values, unser.values)

示例#22

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        # the index is serialized as strings....correct?
        assert_frame_equal(result, df)

示例#23

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_blocks_compat_GH9037(self):
        index = pd.date_range('20000101', periods=10, freq='H')
        df_mixed = DataFrame(OrderedDict(
            float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
                     -0.60316077, 0.24653374, 0.28668979, -2.51969012,
                     0.95748401, -1.02970536],
            int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
                   40314334, 21290235,  4991321, 41903419, 16008365],
            str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
                   'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
            float_2=[-0.0428278, -1.80872357,  3.36042349, -0.7573685,
                     -0.48217572, 0.86229683, 1.08935819, 0.93898739,
                     -0.03030452, 1.43366348],
            str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
                   '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
            int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
                   34193846, 10561746, 24867120, 76131025]
        ), index=index)

        # JSON deserialisation always creates unicode strings
        df_mixed.columns = df_mixed.columns.astype('unicode')

        df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
                                    orient='split')
        assert_frame_equal(df_mixed, df_roundtrip,
                           check_index_type=True,
                           check_column_type=True,
                           check_frame_type=True,
                           by_blocks=True,
                           check_exact=True)

示例#24

0

显示文件

文件： views.py 项目： uddhavpgautam/mining

    def post(self):
        post = json.loads(self.request.body)

        MyClient = riak.RiakClient(protocol=RIAK_PROTOCOL,
                                   http_port=RIAK_HTTP_PORT,
                                   host=RIAK_HOST)

        MyAdminBucket = MyClient.bucket(ADMIN_BUCKET_NAME)

        connection = None
        for c in MyAdminBucket.get('connection').data:
            if c['slug'] == post.get('connection', None):
                connection = c['connection']

        sql = """SELECT * FROM ({}) AS CUBE LIMIT 10;""".format(
            post.get('sql', None))

        e = create_engine(connection)
        connection = e.connect()
        try:
            resoverall = connection.execute(text(sql))
        except:
            self.write({'sql': '', 'msg': 'Error!'})
            self.finish()

        df = DataFrame(resoverall.fetchall())
        if df.empty:
            self.finish()
        df.columns = resoverall.keys()
        df.head()

        self.write({'sql': df.to_json(orient='records'), 'msg': 'Success!'})
        self.finish()

示例#25

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

 def test_frame_empty_mixedtype(self):
     # mixed type
     df = DataFrame(columns=['jim', 'joe'])
     df['joe'] = df['joe'].astype('i8')
     self.assertTrue(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)

示例#26

0

显示文件

文件： test_pandas.py 项目： llllllllll/pandas

 def test_frame_empty(self):
     df = DataFrame(columns=['jim', 'joe'])
     self.assertFalse(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df,
                        check_index_type=False)
     # GH 7445
     result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns')
     expected = '{"test":{}}'
     tm.assert_equal(result, expected)

示例#27

0

显示文件

文件： test_pandas.py 项目： Jengel1/SunriseSunsetTimeFinder

 def test_default_handler_numpy_unsupported_dtype(self):
     # GH12554 to_json raises 'Unhandled numpy dtype 15'
     df = DataFrame({'a': [1, 2.3, complex(4, -5)],
                     'b': [float('nan'), None, complex(1.2, 0)]},
                    columns=['a', 'b'])
     expected = ('[["(1+0j)","(nan+0j)"],'
                 '["(2.3+0j)","(nan+0j)"],'
                 '["(4-5j)","(1.2+0j)"]]')
     assert df.to_json(default_handler=str, orient="values") == expected

示例#28

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_mixed_timedelta_datetime(self):
        frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]},
                          dtype=object)

        expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value,
                                    pd.Timestamp(frame.a[1]).value]})
        result = pd.read_json(frame.to_json(date_unit='ns'),
                              dtype={'a': 'int64'})
        assert_frame_equal(result, expected)

示例#29

0

显示文件

文件： test_pandas.py 项目： FedericoCeratto/pandas

    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB'))
        dfj2['date'] = Timestamp('20130101')
        dfj2['ints'] = lrange(5)
        dfj2['bools'] = True
        dfj2.index = pd.date_range('20130101',periods=5)

        json = dfj2.to_json()
        result = read_json(json,dtype={'ints' : np.int64, 'bools' : np.bool_})
        assert_frame_equal(result,result)

示例#30

0

显示文件

文件： test_pandas.py 项目： israelzuniga/pandas

    def test_doc_example(self):
        dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB"))
        dfj2["date"] = Timestamp("20130101")
        dfj2["ints"] = lrange(5)
        dfj2["bools"] = True
        dfj2.index = pd.date_range("20130101", periods=5)

        json = dfj2.to_json()
        result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_})
        assert_frame_equal(result, result)

示例#31

0

显示文件

class TestRun:
    """
    represents the collected data of a particular (set of) log file(s)
    """
    FILE_EXTENSION = ".trn"
    """ the file extension for saving and loading test runs from """
    def __init__(self, filenames=[]):
        self.inputfromstdin = False
        self.filenames = []
        for filename in filenames:
            self.appendFilename(filename)
        self.data = DataFrame(dtype=object)

        self.datadict = {}
        self.currentproblemdata = {}
        self.currentproblemid = 0
        """ meta data represent problem-independent data """
        self.metadatadict = {}
        self.parametervalues = {}
        self.defaultparametervalues = {}
        self.keyset = set()

        self.currentfileiterator = None
        self.currentfile = None
        self.consumedStdinput = []

    def __iter__(self):
        if (self.currentfile != ""):
            with open(self.currentfile, "r") as f:
                for line in enumerate(f):
                    yield line
        else:
            for line in enumerate(self.consumedStdinput):
                yield line
            for line in enumerate(sys.stdin, len(self.consumedStdinput)):
                yield line

    def iterationPrepare(self):
        filenames = sorted(
            self.filenames,
            key=lambda x: misc.sortingKeyContext(misc.filenameGetContext(x)))
        self.currentfileiterator = iter(filenames)

    def iterationNextFile(self):
        try:
            self.currentfile = next(self.currentfileiterator)
            return True
        except StopIteration:
            return False

    def iterationAddConsumedStdinput(self, consumedlines):
        if self.currentfile == "":
            for line in consumedlines:
                self.consumedStdinput.append(line)

    def iterationCleanUp(self):
        self.currentfileiterator = None

    def iterationGetCurrentFile(self):
        return self.currentfile

    def setInputFromStdin(self):
        self.filenames.append("")

    def appendFilename(self, filename):
        # TODO test this
        """Append a file name to the list of filenames of this test run
        """
        filename = os.path.abspath(filename)
        if filename not in self.filenames:
            self.filenames.append(filename)
        else:
            return

        extension = misc.filenameGetContext(filename)
        if extension in [Key.CONTEXT_ERRFILE, Key.CONTEXT_LOGFILE]:
            metafile = os.path.splitext(filename)[0] + ".meta"

            if os.path.isfile(metafile) and (metafile not in self.filenames):
                self.filenames.append(metafile)

    def addDataByName(self, datakeys, data, problem):
        """Add the current data under the specified dataname

        Readers can use this method to add data, either as a single datakey, or as list,
        where in the latter case it is required that datakeys and data are both lists of the same length

        after data was added, the method getProblemDataById() can be used for access
        """
        for problemid, name in self.datadict.setdefault(Key.ProblemName,
                                                        {}).items():
            if name == problem:
                self.addDataById(datakeys, data, problemid)

    def addData(self, datakey, data):
        """Add data to current problem

        readers can use this method to add data, either as a single datakey, or as list,
        where in the latter case it is required that datakeys and data are both lists of the same length
        """
        logging.debug("TestRun %s receives data Datakey %s, %s" %
                      (self.getName(), repr(datakey), repr(data)))

        if type(datakey) is list and type(data) is list:
            for key, datum in zip(datakey, data):
                self.currentproblemdata[key] = datum
        else:
            self.currentproblemdata[datakey] = data

    def getCurrentProblemData(self, datakey: str = None):
        """Return current problem data, either entirely or for specified data key
        """
        if datakey is None:
            return self.currentproblemdata
        else:
            return self.currentproblemdata.get(datakey)

    def addDataById(self, datakeys, data, problemid):
        """Add the data or to the specified problem

        readers can use this method to add data, either as a single datakey, or as list,
        where in the latter case it is required that datakeys and data are both lists of the same length

        after data was added, the method getProblemDataById() can be used for access if a problemid was given
        """
        # check for the right dictionary to store the data
        logging.debug("TestRun %s receives data Datakey %s, %s to problem %s" %
                      (self.getName(), repr(datakeys), repr(data), problemid))

        if type(datakeys) is list and type(data) is list:
            for key, datum in zip(datakeys, data):
                self.datadict.setdefault(key, {})[problemid] = datum
        else:
            self.datadict.setdefault(datakeys, {})[problemid] = data

    def addParameterValue(self, paramname, paramval):
        """Store the value for a parameter of a given name for this test run
        """
        self.parametervalues[paramname] = paramval

    def addDefaultParameterValue(self, paramname, defaultval):
        """Store the value for a parameter of a given name for this test run
        """
        self.defaultparametervalues[paramname] = defaultval

    def getParameterData(self):
        """Return two dictionaries that map parameter names to  their value and default value
        """
        return (self.parametervalues, self.defaultparametervalues)

    def getLogFile(self, fileextension=".out"):
        """Returns the name of the logfile
        """
        for filename in self.filenames:
            if filename.endswith(fileextension):
                return filename
        return None

    def getKeySet(self):
        """Return a list or set of keys (which are the columns headers of the data)
        """
        if self.datadict != {}:
            return list(self.datadict.keys())
        else:
            return set(self.data.columns)

    def emptyData(self):
        """Empty all data of current testrun
        """
        self.data = DataFrame(dtype=object)

    def getMetaData(self):
        """Return a data frame containing meta data
        """
        return DataFrame(self.metadatadict)

    def finalizeCurrentCollection(self, solver):
        """ Any data of the current problem is saved as a new row in datadict
        """
        if self.currentproblemdata != {}:
            # Add data collected by solver into currentproblemdata, such as primal and dual bound,
            self.addData(*solver.getData())
            for key in self.metadatadict.keys():
                self.addData(key, self.metadatadict[key])

            for key in self.currentproblemdata.keys():
                self.datadict.setdefault(
                    key,
                    {})[self.currentproblemid] = self.currentproblemdata[key]
            self.currentproblemdata = {}
            self.currentproblemid = self.currentproblemid + 1

    def finishedReadingFile(self, solver):
        """ Save data of current problem
        """
        self.finalizeCurrentCollection(solver)

    def setupForDataCollection(self):
        """ Save data in a python dictionary for easier data collection
        """
        self.datadict = self.data.to_dict()
        self.data = DataFrame(dtype=object)

    def setupAfterDataCollection(self):
        """ Save data in a pandas dataframe for futher use (i.e. reading and finding data)
        """
        self.data = DataFrame(self.datadict)
        self.datadict = {}

    def hasProblemName(self, problemname):
        """ Return if already collected data for a problem with given name
        """
        if self.datadict != {}:
            return problemname in self.datadict.get(Key.ProblemName,
                                                    {}).values()
        else:
            if Key.ProblemName in self.data.keys():
                for name in self.data[Key.ProblemName]:
                    if problemname == name:
                        return True
            return False

    def hasProblemId(self, problemid):
        """ Returns if there is already data collected for a problem with given id
        """
        return problemid in range(self.currentproblemid)

    def getProblemIds(self):
        """ Return a list of problemids
        """
        return list(range(self.currentproblemid))

    def getProblemNames(self):
        """ Return an (unsorted) list of problemnames
        """
        if self.datadict != {}:
            return list(self.datadict.get(Key.ProblemName, []))
        else:
            if Key.ProblemName in self.data.columns:
                return list(self.data[Key.ProblemName])
            else:
                return []

    def getProblemDataByName(self, problemname, datakey):
        """Return the data collected for problems with given name
        """
        collecteddata = []
        if self.datadict != {}:
            for key, dat in self.datadict.get("ProblemName", None):
                if dat == problemname:
                    collecteddata.append(self.getProblemDataById(key, datakey))
        else:
            collecteddata = list(self.data[self.data[Key.ProblemName] ==
                                           problemname].loc[:, datakey])
        try:
            return collecteddata[0]
        except IndexError:
            return None

    def getProblemDataById(self, problemid, datakey=None):
        """Return data for a specific datakey, or None, if no such data exists for this (probname, datakey) key pair
        """
        if datakey is None:
            try:
                return ",".join("%s: %s" %
                                (key, self.getProblemDataById(problemid, key))
                                for key in self.getKeySet())
            except KeyError:
                return "<%s> not contained in keys, have only\n%s" % \
                    (problemid, ",".join((ind for ind in self.getProblemIds())))
        else:
            if self.datadict != {}:
                return self.datadict.get(datakey, {}).get(problemid, None)
            else:
                try:
                    data = self.data.loc[problemid, datakey]
                except KeyError:
                    data = None
                if type(data) is list or notnull(data):
                    return data
                else:
                    return None

    def getProblemsDataById(self, problemids, datakey):
        """ Return data for a list of problems
        """
        if self.datadict != {}:
            return [
                self.datadict.get(datakey, {}).get(id, None)
                for id in problemids
            ]
        else:
            return self.data.loc[problemids, datakey]

    def deleteProblemDataById(self, problemid):
        """ Delete all data acquired so far for problemid
        """
        if self.datadict != {}:
            for key in list(self.datadict.keys()):
                try:
                    del self.datadict[key][problemid]
                except KeyError:
                    pass
        else:
            try:
                self.data.drop(problemid, inplace=True)
            except TypeError:
                # needs to be caught for pandas version < 0.13
                self.data = self.data.drop(problemid)

    def saveToFile(self, filename):
        """ Dump the pickled instance of itself into a .trn-file
        """
        try:
            f = open(filename, 'wb')
            pickle.dump(self, f, protocol=2)
            f.close()
        except IOError:
            print("Could not open %s for saving test run" % filename)

    def emptyCurrentProblemData(self):
        """ Empty data of currently read problem
        """
        return self.currentproblemdata == {}

    def printToConsole(self, formatstr="{idx}: {d}"):
        """ Print data to console
        """
        for idx, d in self.data.iterrows():
            #            pd.set_option('display.max_rows', len(d))
            print(formatstr.format(d=d, idx=idx))
#            pd.reset_option('display.max_rows')

    def toJson(self):
        """ Return the data-object in json
        """
        return self.data.to_json()

    @staticmethod
    def loadFromFile(filename):
        """ Loads a .trn-File containing a particular instance of TestRun
        """
        try:
            if filename.endswith(".gz"):
                import gzip
                f = gzip.open(filename, 'rb')
            else:
                f = open(filename, 'rb')
        except IOError:
            print("Could not open %s for loading test run" % filename)
            return None
        testrun = pickle.load(f)
        f.close()
        return testrun

    def getData(self, datakey=None):
        """Return a data frame object of the acquired data
        """
        return self.data

    def getCurrentLogfilename(self):
        """ Return the name of the current logfile 
        """
        return os.path.basename(self.filenames[0])

    def getSettings(self):
        """ Return the settings associated with this test run
        """
        try:
            return self.data['Settings'][0]
        except KeyError:
            return os.path.basename(self.filenames[0]).split('.')[-2]
#

    def getName(self):
        """ Convenience method to make test run a manageable object
        """
        return self.getIdentification()

    def getIdentification(self):
        """ Return identification string of this test run
        """
        # TODO Is this still the way to do this? What if we are reading from stdin?
        return os.path.splitext(os.path.basename(self.filenames[0]))[0]

    def problemGetOptimalSolution(self, problemid):
        """ Return objective of an optimal or a best known solution

        ... from solu file, or None, if no such data has been acquired
        """
        try:
            return self.getProblemDataById(problemid, 'OptVal')
        except KeyError:
            #            print(self.getIdentification() + " has no solu file value for ", problemid)
            return None

    def problemGetSoluFileStatus(self, problemid):
        """ Return 'unkn', 'inf', 'best', 'opt'

        ... as solu file status, or None, if no solu file status
        exists for this problem
        """
        try:
            return self.getProblemDataById(problemid, 'SoluFileStatus')
        except KeyError:
            #            print(self.getIdentification() + " has no solu file status for ", problemid)
            return None

示例#32

0

显示文件

 def test_to_jsonl(self):
     # GH9180
     df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
     result = df.to_json(orient="records", lines=True)
     expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
     self.assertEqual(result, expected)

示例#33

0

显示文件

 def test_frame_empty(self):
     df = DataFrame(columns=['jim', 'joe'])
     self.assertFalse(df._is_mixed_type)
     assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)),
                        df,
                        check_index_type=False)

示例#34

0

显示文件

def val(epoch, dataset, config, log_dir):
    """Validate model."""
    model_config = config['model']
    sess_config = config['session']

    answerset = pd.read_csv(os.path.join(config['preprocess_dir'],
                                         'answer_set.txt'),
                            header=None)[0]

    example_id = 0

    with tf.Graph().as_default():
        model = GRA(model_config)
        model.build_inference()
        result = DataFrame(columns=['id', 'answer'])
        with tf.Session(config=sess_config) as sess:
            sum_dir = os.path.join(log_dir, 'summary')
            summary_writer = tf.summary.FileWriter(sum_dir)

            ckpt_dir = os.path.join(log_dir, 'checkpoint')
            save_path = tf.train.latest_checkpoint(ckpt_dir)
            saver = tf.train.Saver()
            if save_path:
                print('load checkpoint {}.'.format(save_path))
                saver.restore(sess, save_path)
            else:
                print('no checkpoint.')
                exit()

            stats_dir = os.path.join(log_dir, 'stats')
            stats_path = os.path.join(stats_dir, 'val.json')
            if os.path.exists(stats_path):
                print('load stats file {}.'.format(stats_path))
                stats = pd.read_json(stats_path, 'records')
            else:
                print('no stats file.')
                if not os.path.exists(stats_dir):
                    os.makedirs(stats_dir)
                stats = pd.DataFrame(columns=['epoch', 'acc'])

            # val iterate over examples
            correct = 0

            while dataset.has_val_example:
                vgg, c3d, question, answer = dataset.get_val_example()
                c3d = np.zeros((len(c3d), len(c3d[0])))
                feed_dict = {
                    model.appear: [vgg],
                    model.motion: [c3d],
                    model.question_encode: [question],
                }
                prediction = sess.run(model.prediction, feed_dict=feed_dict)
                prediction = prediction[1]
                for i, row in enumerate(prediction):
                    for index in row:
                        if answer[index] == 1:
                            correct += 1
                            break
                result = result.append({
                    'id': example_id,
                    'answer': prediction
                },
                                       ignore_index=True)
                example_id += 1
            acc = correct / dataset.val_example_total
            result.to_json(
                os.path.join(
                    log_dir, 'validation_' + str(int(acc * 100)) + '_' +
                    str(epoch + lajidaima) + '.json'), 'records')
            print('\n[VAL] epoch {}, acc {:.5f}.\n'.format(
                epoch + lajidaima, acc))

            summary = tf.Summary()
            summary.value.add(tag='val/acc', simple_value=float(acc))
            summary_writer.add_summary(summary, epoch + lajidaima)

            record = Series([epoch + lajidaima, acc], ['epoch', 'acc'])
            stats = stats.append(record, ignore_index=True)
            stats.to_json(stats_path, 'records')

            dataset.reset_val()
            return acc

示例#35

0

显示文件

文件： csv2json3.py 项目： potlus/PythonScripts

# -*- coding: utf-8 -*-
"""
Created on Wed Jul  3 09:31:39 2019

@author: potlus
"""

import pandas as pd
from pandas import DataFrame

#Read File Path
path = 'S:/DATA CENTER/Autosys/Working On/Sreenivas/SCRIPTS/Python/SN_CMDB_Apps.csv'
cmdb = pd.read_csv(path, header=0,encoding = 'unicode_escape')
cmdb.shape
df = DataFrame(cmdb, columns= ['Sys_Id', 'SW_Name', 'Technical_Lead', 'Support_Group', 'Operational_Status'])

Export = df.to_json (r'S:/DATA CENTER/Autosys/Working On/Sreenivas/SCRIPTS/Python/sampleCMDB.json', orient='records', lines=True)

示例#36

0

显示文件

文件： format_data.py 项目： fuyuan-li/data

def dataframe_to_json(data: DataFrame, path: Path, **kwargs):
    ''' Saves a pandas DataFrame into a UTF-8 encoded JSON file '''
    with open(path, 'w', encoding='UTF-8') as file:
        data.to_json(file, force_ascii=False, **kwargs)

示例#37

0

显示文件

文件： test_pandas.py 项目： vanusy/pandas

 def test_default_handler(self):
     value = object()
     frame = DataFrame({'a': [7, value]})
     expected = DataFrame({'a': [7, str(value)]})
     result = pd.read_json(frame.to_json(default_handler=str))
     assert_frame_equal(expected, result, check_index_type=False)

示例#38

0

显示文件

 def test_default_handler(self):
     value = object()
     frame = DataFrame({'a': ['a', value]})
     expected = frame.applymap(str)
     result = pd.read_json(frame.to_json(default_handler=str))
     assert_frame_equal(expected, result)

示例#39

0

显示文件

                    f["properties"]["G"],
                    f["properties"]["B"],
                ])
            closest_index = cdist(XA=np.array([d]),
                                  XB=np.array(fl),
                                  metric=func).argmin()
            rgb = fl[closest_index]
            final_data.append([d[0], d[1], d[2], rgb[2], rgb[3], rgb[4]])
            print(len(final_data))
            time.sleep(1)
    except Exception as e:
        print(e)


features_size = 1000

groups = grouper(features_size, xa)

part_func = partial(gen_feature_color, space_color=space, func=func)

with concurrent.futures.ProcessPoolExecutor(max_workers=60) as executor:
    executor.map(part_func, groups, chunksize=3)

from pandas import DataFrame

df = DataFrame(list(final_data))

df.to_json("FINAL_DATA.json", orient="values")

####################################

示例#40

0

显示文件

def test(dataset, config, log_dir, question_type_dict):
    """Test model, output prediction as json file."""
    model_config = config['model']
    sess_config = config['session']

    question_type_correct_count = copy.deepcopy(question_type_dict)
    question_type_all_count = copy.deepcopy(question_type_dict)
    for k in question_type_dict:
        question_type_correct_count[k] = 0
        question_type_all_count[k] = 0

    answerset = pd.read_csv(os.path.join(config['preprocess_dir'],
                                         'answer_set.txt'),
                            header=None)[0]

    with tf.Graph().as_default():
        model = Multimodal_DMN_VM(model_config)
        model.build_inference()

        with tf.Session(config=sess_config) as sess:
            ckpt_dir = os.path.join(log_dir, 'checkpoint')
            save_path = tf.train.latest_checkpoint(ckpt_dir)
            saver = tf.train.Saver()
            if save_path:
                print('load checkpoint {}.'.format(save_path))
                saver.restore(sess, save_path)
            else:
                print('no checkpoint.')
                exit()

            # test iterate over examples
            result = DataFrame(columns=['id', 'answer'])
            correct = 0

            groundtruth_answer_list = []
            predict_answer_list = []
            while dataset.has_test_example:
                vgg, c3d, vgg_conv5, vgg_conv5_3, question, answer, example_id, question_len = dataset.get_test_example(
                )
                input_len = 20
                feed_dict = {
                    model.c3d_video_feature: [c3d],
                    model.vgg_video_feature: [vgg],
                    model.question_encode: [question],
                    model.question_len_placeholder: [question_len],
                    model.video_len_placeholder: [input_len],
                    model.keep_placeholder: 1.0
                }
                prediction = sess.run(model.prediction, feed_dict=feed_dict)
                prediction = prediction[0]

                result = result.append(
                    {
                        'id': example_id,
                        'answer': answerset[prediction]
                    },
                    ignore_index=True)
                if answerset[prediction] == answer:
                    correct += 1
                    question_type_correct_count[question[0]] += 1
                question_type_all_count[question[0]] += 1

                groundtruth_answer_list.append(answer)
                predict_answer_list.append(answerset[prediction])

            result.to_json(os.path.join(log_dir, 'prediction.json'), 'records')
            acc = correct * 1.0 / dataset.test_example_total
            WUPS_0_0 = metrics.compute_wups(groundtruth_answer_list,
                                            predict_answer_list, 0.0)
            WUPS_0_9 = metrics.compute_wups(groundtruth_answer_list,
                                            predict_answer_list, 0.9)
            WUPS_acc = metrics.compute_wups(groundtruth_answer_list,
                                            predict_answer_list, -1)
            print('[TEST] acc {:.5f}.\n'.format(acc))
            print('[TEST], WUPS@acc {:.5f}.\n'.format(WUPS_acc))
            print('[TEST], [email protected] {:.5f}.\n'.format(WUPS_0_0))
            print('[TEST], [email protected] {:.5f}.\n'.format(WUPS_0_9))

            print('######## question type acc list ######### ')
            for k in question_type_dict:
                print(question_type_dict[k] +
                      ' acc {:.5f}.'.format(question_type_correct_count[k] *
                                            1.0 / question_type_all_count[k]))
                print('correct = {:d}, all = {:d}'.format(
                    question_type_correct_count[k],
                    question_type_all_count[k]))

            dataset.reset_test()
            return acc

示例#41

0

显示文件

文件： test_json_table_schema.py 项目： znicholls/pandas

class TestTableOrient:
    def setup_method(self, method):
        self.df = DataFrame(
            {
                "A": [1, 2, 3, 4],
                "B": ["a", "b", "c", "c"],
                "C": pd.date_range("2016-01-01", freq="d", periods=4),
                "D": pd.timedelta_range("1H", periods=4, freq="T"),
                "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])),
                "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)),
                "G": [1.0, 2.0, 3, 4.0],
                "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"),
            },
            index=pd.Index(range(4), name="idx"),
        )

    def test_build_series(self):
        s = pd.Series([1, 2], name="a")
        s.index.name = "id"
        result = s.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result["schema"]
        result["schema"].pop("pandas_version")

        fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}]

        schema = {"fields": fields, "primaryKey": ["id"]}

        expected = OrderedDict(
            [
                ("schema", schema),
                (
                    "data",
                    [
                        OrderedDict([("id", 0), ("a", 1)]),
                        OrderedDict([("id", 1), ("a", 2)]),
                    ],
                ),
            ]
        )

        assert result == expected

    def test_read_json_from_to_json_results(self):
        # GH32383
        df = pd.DataFrame(
            {
                "_id": {"row_0": 0},
                "category": {"row_0": "Goods"},
                "recommender_id": {"row_0": 3},
                "recommender_name_jp": {"row_0": "浦田"},
                "recommender_name_en": {"row_0": "Urata"},
                "name_jp": {"row_0": "博多人形（松尾吉将まつお よしまさ）"},
                "name_en": {"row_0": "Hakata Dolls Matsuo"},
            }
        )
        result1 = pd.read_json(df.to_json())
        result2 = pd.DataFrame.from_dict(json.loads(df.to_json()))
        tm.assert_frame_equal(result1, df)
        tm.assert_frame_equal(result2, df)

    def test_to_json(self):
        df = self.df.copy()
        df.index.name = "idx"
        result = df.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result["schema"]
        result["schema"].pop("pandas_version")

        fields = [
            {"name": "idx", "type": "integer"},
            {"name": "A", "type": "integer"},
            {"name": "B", "type": "string"},
            {"name": "C", "type": "datetime"},
            {"name": "D", "type": "duration"},
            {
                "constraints": {"enum": ["a", "b", "c"]},
                "name": "E",
                "ordered": False,
                "type": "any",
            },
            {
                "constraints": {"enum": ["a", "b", "c"]},
                "name": "F",
                "ordered": True,
                "type": "any",
            },
            {"name": "G", "type": "number"},
            {"name": "H", "type": "datetime", "tz": "US/Central"},
        ]

        schema = {"fields": fields, "primaryKey": ["idx"]}
        data = [
            OrderedDict(
                [
                    ("idx", 0),
                    ("A", 1),
                    ("B", "a"),
                    ("C", "2016-01-01T00:00:00.000Z"),
                    ("D", "P0DT1H0M0S"),
                    ("E", "a"),
                    ("F", "a"),
                    ("G", 1.0),
                    ("H", "2016-01-01T06:00:00.000Z"),
                ]
            ),
            OrderedDict(
                [
                    ("idx", 1),
                    ("A", 2),
                    ("B", "b"),
                    ("C", "2016-01-02T00:00:00.000Z"),
                    ("D", "P0DT1H1M0S"),
                    ("E", "b"),
                    ("F", "b"),
                    ("G", 2.0),
                    ("H", "2016-01-02T06:00:00.000Z"),
                ]
            ),
            OrderedDict(
                [
                    ("idx", 2),
                    ("A", 3),
                    ("B", "c"),
                    ("C", "2016-01-03T00:00:00.000Z"),
                    ("D", "P0DT1H2M0S"),
                    ("E", "c"),
                    ("F", "c"),
                    ("G", 3.0),
                    ("H", "2016-01-03T06:00:00.000Z"),
                ]
            ),
            OrderedDict(
                [
                    ("idx", 3),
                    ("A", 4),
                    ("B", "c"),
                    ("C", "2016-01-04T00:00:00.000Z"),
                    ("D", "P0DT1H3M0S"),
                    ("E", "c"),
                    ("F", "c"),
                    ("G", 4.0),
                    ("H", "2016-01-04T06:00:00.000Z"),
                ]
            ),
        ]
        expected = OrderedDict([("schema", schema), ("data", data)])

        assert result == expected

    def test_to_json_float_index(self):
        data = pd.Series(1, index=[1.0, 2.0])
        result = data.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        expected = OrderedDict(
            [
                (
                    "schema",
                    {
                        "fields": [
                            {"name": "index", "type": "number"},
                            {"name": "values", "type": "integer"},
                        ],
                        "primaryKey": ["index"],
                    },
                ),
                (
                    "data",
                    [
                        OrderedDict([("index", 1.0), ("values", 1)]),
                        OrderedDict([("index", 2.0), ("values", 1)]),
                    ],
                ),
            ]
        )

        assert result == expected

    def test_to_json_period_index(self):
        idx = pd.period_range("2016", freq="Q-JAN", periods=2)
        data = pd.Series(1, idx)
        result = data.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        fields = [
            {"freq": "Q-JAN", "name": "index", "type": "datetime"},
            {"name": "values", "type": "integer"},
        ]

        schema = {"fields": fields, "primaryKey": ["index"]}
        data = [
            OrderedDict([("index", "2015-11-01T00:00:00.000Z"), ("values", 1)]),
            OrderedDict([("index", "2016-02-01T00:00:00.000Z"), ("values", 1)]),
        ]
        expected = OrderedDict([("schema", schema), ("data", data)])

        assert result == expected

    def test_to_json_categorical_index(self):
        data = pd.Series(1, pd.CategoricalIndex(["a", "b"]))
        result = data.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        expected = OrderedDict(
            [
                (
                    "schema",
                    {
                        "fields": [
                            {
                                "name": "index",
                                "type": "any",
                                "constraints": {"enum": ["a", "b"]},
                                "ordered": False,
                            },
                            {"name": "values", "type": "integer"},
                        ],
                        "primaryKey": ["index"],
                    },
                ),
                (
                    "data",
                    [
                        OrderedDict([("index", "a"), ("values", 1)]),
                        OrderedDict([("index", "b"), ("values", 1)]),
                    ],
                ),
            ]
        )

        assert result == expected

    def test_date_format_raises(self):
        with pytest.raises(ValueError):
            self.df.to_json(orient="table", date_format="epoch")

        # others work
        self.df.to_json(orient="table", date_format="iso")
        self.df.to_json(orient="table")

    def test_convert_pandas_type_to_json_field_int(self, index_or_series):
        kind = index_or_series
        data = [1, 2, 3]
        result = convert_pandas_type_to_json_field(kind(data, name="name"))
        expected = {"name": "name", "type": "integer"}
        assert result == expected

    def test_convert_pandas_type_to_json_field_float(self, index_or_series):
        kind = index_or_series
        data = [1.0, 2.0, 3.0]
        result = convert_pandas_type_to_json_field(kind(data, name="name"))
        expected = {"name": "name", "type": "number"}
        assert result == expected

    @pytest.mark.parametrize(
        "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})]
    )
    @pytest.mark.parametrize("wrapper", [None, pd.Series])
    def test_convert_pandas_type_to_json_field_datetime(
        self, dt_args, extra_exp, wrapper
    ):
        data = [1.0, 2.0, 3.0]
        data = pd.to_datetime(data, **dt_args)
        if wrapper is pd.Series:
            data = pd.Series(data, name="values")
        result = convert_pandas_type_to_json_field(data)
        expected = {"name": "values", "type": "datetime"}
        expected.update(extra_exp)
        assert result == expected

    def test_convert_pandas_type_to_json_period_range(self):
        arr = pd.period_range("2016", freq="A-DEC", periods=4)
        result = convert_pandas_type_to_json_field(arr)
        expected = {"name": "values", "type": "datetime", "freq": "A-DEC"}
        assert result == expected

    @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex])
    @pytest.mark.parametrize("ordered", [True, False])
    def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered):
        data = ["a", "b", "c"]
        if kind is pd.Categorical:
            arr = pd.Series(kind(data, ordered=ordered), name="cats")
        elif kind is pd.CategoricalIndex:
            arr = kind(data, ordered=ordered, name="cats")

        result = convert_pandas_type_to_json_field(arr)
        expected = {
            "name": "cats",
            "type": "any",
            "constraints": {"enum": data},
            "ordered": ordered,
        }
        assert result == expected

    @pytest.mark.parametrize(
        "inp,exp",
        [
            ({"type": "integer"}, "int64"),
            ({"type": "number"}, "float64"),
            ({"type": "boolean"}, "bool"),
            ({"type": "duration"}, "timedelta64"),
            ({"type": "datetime"}, "datetime64[ns]"),
            ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"),
            ({"type": "any"}, "object"),
            (
                {
                    "type": "any",
                    "constraints": {"enum": ["a", "b", "c"]},
                    "ordered": False,
                },
                CategoricalDtype(categories=["a", "b", "c"], ordered=False),
            ),
            (
                {
                    "type": "any",
                    "constraints": {"enum": ["a", "b", "c"]},
                    "ordered": True,
                },
                CategoricalDtype(categories=["a", "b", "c"], ordered=True),
            ),
            ({"type": "string"}, "object"),
        ],
    )
    def test_convert_json_field_to_pandas_type(self, inp, exp):
        field = {"name": "foo"}
        field.update(inp)
        assert convert_json_field_to_pandas_type(field) == exp

    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
    def test_convert_json_field_to_pandas_type_raises(self, inp):
        field = {"type": inp}
        with pytest.raises(
            ValueError, match=f"Unsupported or invalid field type: {inp}"
        ):
            convert_json_field_to_pandas_type(field)

    def test_categorical(self):
        s = pd.Series(pd.Categorical(["a", "b", "a"]))
        s.index.name = "idx"
        result = s.to_json(orient="table", date_format="iso")
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result["schema"].pop("pandas_version")

        fields = [
            {"name": "idx", "type": "integer"},
            {
                "constraints": {"enum": ["a", "b"]},
                "name": "values",
                "ordered": False,
                "type": "any",
            },
        ]

        expected = OrderedDict(
            [
                ("schema", {"fields": fields, "primaryKey": ["idx"]}),
                (
                    "data",
                    [
                        OrderedDict([("idx", 0), ("values", "a")]),
                        OrderedDict([("idx", 1), ("values", "b")]),
                        OrderedDict([("idx", 2), ("values", "a")]),
                    ],
                ),
            ]
        )

        assert result == expected

    @pytest.mark.parametrize(
        "idx,nm,prop",
        [
            (pd.Index([1]), "index", "name"),
            (pd.Index([1], name="myname"), "myname", "name"),
            (
                pd.MultiIndex.from_product([("a", "b"), ("c", "d")]),
                ["level_0", "level_1"],
                "names",
            ),
            (
                pd.MultiIndex.from_product(
                    [("a", "b"), ("c", "d")], names=["n1", "n2"]
                ),
                ["n1", "n2"],
                "names",
            ),
            (
                pd.MultiIndex.from_product(
                    [("a", "b"), ("c", "d")], names=["n1", None]
                ),
                ["n1", "level_1"],
                "names",
            ),
        ],
    )
    def test_set_names_unset(self, idx, nm, prop):
        data = pd.Series(1, idx)
        result = set_default_names(data)
        assert getattr(result.index, prop) == nm

    @pytest.mark.parametrize(
        "idx",
        [
            pd.Index([], name="index"),
            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")),
            pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")),
        ],
    )
    def test_warns_non_roundtrippable_names(self, idx):
        # GH 19130
        df = pd.DataFrame(index=idx)
        df.index.name = "index"
        with tm.assert_produces_warning():
            set_default_names(df)

    def test_timestamp_in_columns(self):
        df = pd.DataFrame(
            [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")]
        )
        result = df.to_json(orient="table")
        js = json.loads(result)
        assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z"
        assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S"

    @pytest.mark.parametrize(
        "case",
        [
            pd.Series([1], index=pd.Index([1], name="a"), name="a"),
            pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
            pd.DataFrame(
                {"A": [1]},
                index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]),
            ),
        ],
    )
    def test_overlapping_names(self, case):
        with pytest.raises(ValueError, match="Overlapping"):
            case.to_json(orient="table")

示例#42

0

显示文件

def to_session(df: pd.DataFrame):
    # Salva o conteúdo em memória, de forma comprimida
    # return compressStringToBytes(df.to_json())
    return df.to_json()

示例#43

0

显示文件

 def test_read_json_table_orient(self, index_nm, vals):
     df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
     out = df.to_json(orient="table")
     result = pd.read_json(out, orient="table")
     tm.assert_frame_equal(df, result)

示例#44

0

显示文件

def get_and_store_usage_data(selected_month):
    rows = session.execute(admin_queries['data_usage_by_month'],
                           [selected_month])
    df = DataFrame(rows)
    return df.to_json(date_format='iso')

示例#45

0

显示文件

def process(request, exec_instance):
    dataset_list = []
    service_exec = ServiceInstance.objects.get(pk=int(exec_instance))
    try:
        service_exec.arguments = {
            "filter-arguments": [],
            "algorithm-arguments": [{}, {}]
        }

        spill_infos, wave_model, ocean_model, natura_layer, ais_layer, time_interval, sim_length, oil_density, valid_points, valid_points_count, scenario, start_date, latitude, longitude = parse_request_params(
            request)
        depth = 0
        if (scenario == '1') or (scenario == '3'):
            service_exec.arguments["algorithm-arguments"][0][
                "latitude"] = spill_infos[0]['latitude']
            service_exec.arguments["algorithm-arguments"][0][
                "longitude"] = spill_infos[0]['longitude']
            if scenario == '3':
                cursor_presto = get_presto_cursor()
                resolution = 1
                if wave_model == '202':
                    query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_aeg_bathymetry WHERE round(latitude," + str(
                        resolution) + " )=" + str(
                            round(float(spill_infos[0]['latitude']),
                                  resolution)) + " AND round(longitude," + str(
                                      resolution) + ")=" + str(
                                          round(
                                              float(
                                                  spill_infos[0]['longitude']),
                                              resolution)) + ")"
                    cursor_presto.execute(query)
                    try:
                        dataset_list.append((Dataset.objects.get(
                            table_name='hcmr_poseidon_aeg_bathymetry')).id)
                    except:
                        print 'Dataset does not exist in database'
                else:
                    query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_med_bathymetry WHERE round(latitude," + str(
                        resolution) + " )=" + str(
                            round(float(spill_infos[0]['latitude']),
                                  resolution)) + " AND round(longitude," + str(
                                      resolution) + ")=" + str(
                                          round(
                                              float(
                                                  spill_infos[0]['longitude']),
                                              resolution)) + ")"
                cursor_presto.execute(query)
                try:
                    dataset_list.append((Dataset.objects.get(
                        table_name='hcmr_poseidon_med_bathymetry')).id)
                except:
                    print 'Dataset does not exist in database'
                result = cursor_presto.fetchall()
                try:
                    depth = float(result[0][0])
                except:
                    resolution = 0
                    print 'exception: trying with less precise resolution'
                    if wave_model == '202':
                        query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_aeg_bathymetry WHERE round(latitude," + str(
                            resolution) + " )=" + str(
                                round(float(spill_infos[0]['latitude']),
                                      resolution)
                            ) + " AND round(longitude," + str(
                                resolution) + ")=" + str(
                                    round(float(spill_infos[0]['longitude']),
                                          resolution)) + ")"
                        cursor_presto.execute(query)
                    else:
                        query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_med_bathymetry WHERE round(latitude," + str(
                            resolution) + " )=" + str(
                                round(float(spill_infos[0]['latitude']),
                                      resolution)
                            ) + " AND round(longitude," + str(
                                resolution) + ")=" + str(
                                    round(float(spill_infos[0]['longitude']),
                                          resolution)) + ")"
                    cursor_presto.execute(query)
                    result = cursor_presto.fetchall()
                    try:
                        depth = float(result[0][0])
                    except:
                        depth = 0
                service_exec.arguments["algorithm-arguments"][0][
                    "depth"] = depth
                print query
                print 'Oilspill depth:' + str(depth)
                # service_exec.arguments["algorithm-arguments"][0]["depth"] = spill_infos[0]['depth']

        elif scenario == '2':
            count = 1
            for el in spill_infos:
                service_exec.arguments["algorithm-arguments"][0][
                    "latitude" + str(count)] = spill_infos[count -
                                                           1]['latitude']
                service_exec.arguments["algorithm-arguments"][0][
                    "longitude" + str(count)] = spill_infos[count -
                                                            1]['longitude']
                count = count + 1
            service_exec.arguments["algorithm-arguments"][0][
                "number_of_points"] = count - 1

        service_exec.arguments["algorithm-arguments"][0][
            "start_date"] = spill_infos[0]['start_date']
        service_exec.arguments["algorithm-arguments"][0][
            "oil_volume"] = spill_infos[0]['oil_volume']
        service_exec.arguments["algorithm-arguments"][0]["sim_length"] = str(
            sim_length)
        if wave_model == '202':
            service_exec.arguments["algorithm-arguments"][0][
                "wave_model"] = 'Poseidon WAM Cycle 4 for the Aegean'
        elif wave_model == '201':
            service_exec.arguments["algorithm-arguments"][0][
                "wave_model"] = 'Poseidon WAM Cycle 4 for the Mediterranean'
        elif wave_model == '203':
            service_exec.arguments["algorithm-arguments"][0][
                "wave_model"] = 'Copernicus Wave Model for the Mediterranean'
        else:
            service_exec.arguments["algorithm-arguments"][0]["wave_model"] = ''

        if ocean_model == '001':
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = 'Poseidon High Resolution Aegean Model'
        elif ocean_model == '002':
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = 'Poseidon Mediterranean Model'
        elif ocean_model == '003':
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = 'Copernicus Mediterranean Model'
        else:
            service_exec.arguments["algorithm-arguments"][0][
                "ocean_model"] = ''

        service_exec.arguments["algorithm-arguments"][0][
            "natura_layer"] = natura_layer
        service_exec.arguments["algorithm-arguments"][0][
            "ais_layer"] = ais_layer

        # 1)Create input file
        if service_exec.status == 'failed':
            raise Exception
        service_exec.status = "Creating simulation request"
        service_exec.save()
        filename, url_params = create_inp_file_from_request_and_upload(
            request, depth)
        # 2)Calculate oil spill
        if service_exec.status == 'failed':
            raise Exception
        service_exec.status = "Simulation running"
        service_exec.save()
        found = wait_until_output_ready(url_params, request)
        if found:
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "Simulation results received"
            service_exec.save()
            filename_output = str(filename).replace("_F.inp", "_F.out")
            hcmr_data_filename = str(filename).replace("_F.inp", ".json")
            red_points_filename = str(filename).replace("_F.inp", ".txt")

            # 3)Transforming data to be shown on map
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "Transforming data to be shown on map"
            service_exec.save()
            output_path = 'service_builder/static/services_files/hcmr_service_1/' + filename_output
            spill_data, parcel_data = create_json_from_out_file(output_path)
            # spill_data = [spill_infos[0]['start_date']+':00', spill_infos[0]['latitude'], spill_infos[0]['longitude'], spill_data[0][3], spill_data[0][4], spill_data[0][3], spill_infos[0]['oil_volume'],spill_data[0][5], spill_data[0][6]]
            # print str(spill_infos[0]['latitude']) + ' ' + spill_infos[0]['longitude']
            # print str(valid_points[0][0]) + ' ' + str(valid_points[0][1])
            # for el in valid_points:
            #     parcel_data.insert(0,[spill_infos[0]['start_date'].encode('ascii') + ':00', float(el[0]),float(el[1]),
            #                   parcel_data[0][3], parcel_data[0][4], float(spill_infos[0]['oil_volume']),
            #                   parcel_data[0][6], parcel_data[0][7]])
            # spill_data.insert(0,
            #                    [spill_infos[0]['start_date'].encode('ascii') + ':00', spill_data[0][1], spill_data[0][2], spill_data[0][3], spill_data[0][4], spill_data[0][5], spill_data[0][6], spill_data[0][7], spill_data[0][8], spill_data[0][9], spill_data[0][10]])

            print 'create_json_from_out_file done'
            headers_parcel = [
                "time", "Lat", "Lon", "Dpth", "Status", "Volume(m3)", "Dens",
                "Visc"
            ]
            parcel_df = DataFrame(parcel_data, columns=headers_parcel)
            print 'parcel_df = DataFrame done'
            print(parcel_df.head(2))
            parcel_df.to_json('visualizer/static/visualizer/files/' +
                              hcmr_data_filename,
                              orient='records')
            print 'parcel_df.to_json done'

            headers_spill = [
                'time', 'N', '%ev', '%srf', '%em', '%disp', '%cst', '%btm',
                'max_visc', 'min_visc', 'dens'
            ]
            service_exec.arguments["algorithm-arguments"][1][
                "headers_spill"] = headers_spill
            service_exec.arguments["algorithm-arguments"][1][
                "spill_data"] = spill_data
            service_exec.save()

            print 'spill_data done'

            # 4)Calculate red points
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "Calculating oil spill intersections with protected areas"
            service_exec.save()
            if natura_layer == "true":
                # red_points_calc.calculate(hcmr_data_filename, red_points_filename)
                pass
            if ais_layer == "true":
                try:
                    dataset_list.append(
                        (Dataset.objects.get(table_name='xmile_ais',
                                             stored_at='UBITECH_PRESTO')).id)
                except:
                    print 'Dataset does not exist in database'
            print 'red points calculated'
            # 5)Create Visualization

            print valid_points
            oil_spill_start = ''
            v_count = 1
            for el in valid_points:
                oil_spill_start = oil_spill_start + 'start_lat' + str(
                    v_count) + '=' + str(
                        el[0]) + '&start_lon' + str(v_count) + '=' + str(
                            el[1]) + '&'
                v_count = v_count + 1
            visualization_url = "http://" + request.META[
                'HTTP_HOST'] + "/visualizations/map_markers_in_time_hcmr/" + "?" + oil_spill_start + "markerType=circle&lat_col=Lat&lon_col=Lon" + "&data_file=" + hcmr_data_filename + "&red_points_file=" + red_points_filename + "&natura_layer=" + natura_layer + "&ais_layer=" + ais_layer + "&time_interval=" + time_interval + "&valid_points=" + str(
                    len(valid_points))
            visualization_url = "http://" + request.META['HTTP_HOST'] + "/visualizations/map_markers_in_time_hcmr/" + "?"+oil_spill_start \
                                + "&markerType=circle&lat_col=Lat&lon_col=Lon" \
                                + "&data_file=" + hcmr_data_filename + "&red_points_file=" \
                                + red_points_filename + "&natura_layer=" + natura_layer + "&ais_layer=" + ais_layer \
                                + "&time_interval=" + time_interval + "&start_date=" + start_date + \
                                '&latitude=' + latitude + "&longitude=" + longitude + "&length="+ sim_length + "&valid_points="+ str(len(valid_points))

            service_exec.dataframe_visualizations = {"v1": visualization_url}
            service_exec.arguments["algorithm-arguments"][0][
                "out_filepath"] = filename_output
            if service_exec.status == 'failed':
                raise Exception
            service_exec.status = "done"
            service_exec.save()
            service_obj = service_exec.service
            for dataset_list_el_id in dataset_list:
                try:
                    dataset_obj = Dataset.objects.get(id=dataset_list_el_id)
                    dataset_service_execution(dataset_obj, service_obj)
                except:
                    pass
            service_use(service_obj)
            unique_service_use(service_obj, request.user)
            hcmr_statistics(scenario, sim_length, time_interval, ocean_model,
                            wave_model, str_to_bool(natura_layer),
                            str_to_bool(ais_layer))
            # context = {
            #     'url': visualization_url,
            #     'out_filepath': filename_output,
            # }
            # return render(request, 'hcmr_pilot/scenario1-results.html', context)
        else:
            # html = "<html><body>Something went wrong. Please, try again.</body></html>"
            # return HttpResponse(html)
            service_exec.status = "failed"
            service_exec.save()
    except:
        service_exec.status = "failed"
        service_exec.save()

示例#46

0

显示文件

 def test_frame_from_json_precise_float(self):
     df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
     result = read_json(df.to_json(), precise_float=True)
     assert_frame_equal(result, df)

示例#47

0

显示文件

文件： test_pandas.py 项目： vanusy/pandas

 def test_frame_from_json_precise_float(self):
     df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]])
     result = read_json(df.to_json(), precise_float=True)
     assert_frame_equal(result, df, check_index_type=False,
                        check_column_type=False)

示例#48

0

显示文件

    def test_frame_from_json_nones(self):
        df = DataFrame([[1, 2], [4, 5, 6]])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))

        df = DataFrame([['1', '2'], ['4', '5', '6']])
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(), convert_axes=False, dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        unser = read_json(df.to_json(), numpy=False)
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), numpy=False, dtype=False)
        self.assertTrue(unser[2][0] is None)
        unser = read_json(df.to_json(),
                          numpy=False,
                          convert_axes=False,
                          dtype=False)
        self.assertTrue(unser['2']['0'] is None)

        # infinities get mapped to nulls which get mapped to NaNs during
        # deserialisation
        df = DataFrame([[1, 2], [4, 5, 6]])
        df.loc[0, 2] = np.inf
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))

        df.loc[0, 2] = np.NINF
        unser = read_json(df.to_json())
        self.assertTrue(np.isnan(unser[2][0]))
        unser = read_json(df.to_json(), dtype=False)
        self.assertTrue(np.isnan(unser[2][0]))

示例#49

0

显示文件

文件： test_json_table_schema.py 项目： zarinn3pal/pandas

class TestTableOrient(object):
    def setup_method(self, method):
        self.df = DataFrame(
            {
                'A': [1, 2, 3, 4],
                'B': ['a', 'b', 'c', 'c'],
                'C':
                pd.date_range('2016-01-01', freq='d', periods=4),
                'D':
                pd.timedelta_range('1H', periods=4, freq='T'),
                'E':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
                'F':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)),
                'G': [1., 2., 3, 4.],
                'H':
                pd.date_range(
                    '2016-01-01', freq='d', periods=4, tz='US/Central'),
            },
            index=pd.Index(range(4), name='idx'))

    def test_build_series(self):
        s = pd.Series([1, 2], name='a')
        s.index.name = 'id'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'id',
            'type': 'integer'
        }, {
            'name': 'a',
            'type': 'integer'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['id'],
        }

        expected = OrderedDict([('schema', schema),
                                ('data', [
                                    OrderedDict([('id', 0), ('a', 1)]),
                                    OrderedDict([('id', 1), ('a', 2)])
                                ])])
        assert result == expected

    def test_to_json(self):
        df = self.df.copy()
        df.index.name = 'idx'
        result = df.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'name': 'A',
            'type': 'integer'
        }, {
            'name': 'B',
            'type': 'string'
        }, {
            'name': 'C',
            'type': 'datetime'
        }, {
            'name': 'D',
            'type': 'duration'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'E',
            'ordered': False,
            'type': 'any'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'F',
            'ordered': True,
            'type': 'any'
        }, {
            'name': 'G',
            'type': 'number'
        }, {
            'name': 'H',
            'type': 'datetime',
            'tz': 'US/Central'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['idx'],
        }
        data = [
            OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
                         ('C', '2016-01-01T00:00:00.000Z'),
                         ('D', 'P0DT1H0M0S'), ('E', 'a'), ('F', 'a'),
                         ('G', 1.), ('H', '2016-01-01T06:00:00.000Z')]),
            OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
                         ('C', '2016-01-02T00:00:00.000Z'),
                         ('D', 'P0DT1H1M0S'), ('E', 'b'), ('F', 'b'),
                         ('G', 2.), ('H', '2016-01-02T06:00:00.000Z')]),
            OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
                         ('C', '2016-01-03T00:00:00.000Z'),
                         ('D', 'P0DT1H2M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 3.), ('H', '2016-01-03T06:00:00.000Z')]),
            OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
                         ('C', '2016-01-04T00:00:00.000Z'),
                         ('D', 'P0DT1H3M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 4.), ('H', '2016-01-04T06:00:00.000Z')]),
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_float_index(self):
        data = pd.Series(1, index=[1., 2.])
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'number'
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 1.0),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 2.0),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_to_json_period_index(self):
        idx = pd.period_range('2016', freq='Q-JAN', periods=2)
        data = pd.Series(1, idx)
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'freq': 'Q-JAN',
            'name': 'index',
            'type': 'datetime'
        }, {
            'name': 'values',
            'type': 'integer'
        }]

        schema = {'fields': fields, 'primaryKey': ['index']}
        data = [
            OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
                         ('values', 1)]),
            OrderedDict([('index', '2016-02-01T00:00:00.000Z'), ('values', 1)])
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_categorical_index(self):
        data = pd.Series(1, pd.CategoricalIndex(['a', 'b']))
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'any',
                'constraints': {
                    'enum': ['a', 'b']
                },
                'ordered': False
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 'a'),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 'b'),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_date_format_raises(self):
        with pytest.raises(ValueError):
            self.df.to_json(orient='table', date_format='epoch')

        # others work
        self.df.to_json(orient='table', date_format='iso')
        self.df.to_json(orient='table')

    def test_make_field_int(self):
        data = [1, 2, 3]
        kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "name", "type": 'integer'}
            assert result == expected

    def test_make_field_float(self):
        data = [1., 2., 3.]
        kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "name", "type": 'number'}
            assert result == expected

    def test_make_field_datetime(self):
        data = [1., 2., 3.]
        kinds = [
            pd.Series(pd.to_datetime(data), name='values'),
            pd.to_datetime(data)
        ]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "values", "type": 'datetime'}
            assert result == expected

        kinds = [
            pd.Series(pd.to_datetime(data, utc=True), name='values'),
            pd.to_datetime(data, utc=True)
        ]
        for kind in kinds:
            result = make_field(kind)
            expected = {"name": "values", "type": 'datetime', "tz": "UTC"}
            assert result == expected

        arr = pd.period_range('2016', freq='A-DEC', periods=4)
        result = make_field(arr)
        expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
        assert result == expected

    def test_make_field_categorical(self):
        data = ['a', 'b', 'c']
        ordereds = [True, False]

        for ordered in ordereds:
            arr = pd.Series(pd.Categorical(data, ordered=ordered), name='cats')
            result = make_field(arr)
            expected = {
                "name": "cats",
                "type": "any",
                "constraints": {
                    "enum": data
                },
                "ordered": ordered
            }
            assert result == expected

            arr = pd.CategoricalIndex(data, ordered=ordered, name='cats')
            result = make_field(arr)
            expected = {
                "name": "cats",
                "type": "any",
                "constraints": {
                    "enum": data
                },
                "ordered": ordered
            }
            assert result == expected

    def test_categorical(self):
        s = pd.Series(pd.Categorical(['a', 'b', 'a']))
        s.index.name = 'idx'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'constraints': {
                'enum': ['a', 'b']
            },
            'name': 'values',
            'ordered': False,
            'type': 'any'
        }]

        expected = OrderedDict([('schema', {
            'fields': fields,
            'primaryKey': ['idx']
        }),
                                ('data', [
                                    OrderedDict([('idx', 0), ('values', 'a')]),
                                    OrderedDict([('idx', 1), ('values', 'b')]),
                                    OrderedDict([('idx', 2), ('values', 'a')])
                                ])])
        assert result == expected

    def test_set_default_names_unset(self):
        data = pd.Series(1, pd.Index([1]))
        result = set_default_names(data)
        assert result.index.name == 'index'

    def test_set_default_names_set(self):
        data = pd.Series(1, pd.Index([1], name='myname'))
        result = set_default_names(data)
        assert result.index.name == 'myname'

    def test_set_default_names_mi_unset(self):
        data = pd.Series(1, pd.MultiIndex.from_product([('a', 'b'),
                                                        ('c', 'd')]))
        result = set_default_names(data)
        assert result.index.names == ['level_0', 'level_1']

    def test_set_default_names_mi_set(self):
        data = pd.Series(
            1,
            pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
                                       names=['n1', 'n2']))
        result = set_default_names(data)
        assert result.index.names == ['n1', 'n2']

    def test_set_default_names_mi_partion(self):
        data = pd.Series(
            1,
            pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')],
                                       names=['n1', None]))
        result = set_default_names(data)
        assert result.index.names == ['n1', 'level_1']

    def test_timestamp_in_columns(self):
        df = pd.DataFrame(
            [[1, 2]],
            columns=[pd.Timestamp('2016'),
                     pd.Timedelta(10, unit='s')])
        result = df.to_json(orient="table")
        js = json.loads(result)
        assert js['schema']['fields'][1]['name'] == 1451606400000
        assert js['schema']['fields'][2]['name'] == 10000

    def test_overlapping_names(self):
        cases = [
            pd.Series([1], index=pd.Index([1], name='a'), name='a'),
            pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
            pd.DataFrame({"A": [1]},
                         index=pd.MultiIndex.from_arrays([['a'], [1]],
                                                         names=["A", "a"])),
        ]

        for data in cases:
            with pytest.raises(ValueError) as excinfo:
                data.to_json(orient='table')

            assert 'Overlapping' in str(excinfo.value)

    def test_mi_falsey_name(self):
        # GH 16203
        df = pd.DataFrame(np.random.randn(4, 4),
                          index=pd.MultiIndex.from_product([('A', 'B'),
                                                            ('a', 'b')]))
        result = [x['name'] for x in build_table_schema(df)['fields']]
        assert result == ['level_0', 'level_1', 0, 1, 2, 3]

示例#50

0

显示文件

文件： json.py 项目： reds118/pandas-dev

class ToJSONLines(BaseIO):

    fname = "__test__.json"

    def setup(self):
        N = 10**5
        ncols = 5
        index = date_range("20000101", periods=N, freq="H")
        timedeltas = timedelta_range(start=1, periods=N, freq="s")
        datetimes = date_range(start=1, periods=N, freq="s")
        ints = np.random.randint(100000000, size=N)
        floats = np.random.randn(N)
        strings = tm.makeStringIndex(N)
        self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
        self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index)
        self.df_td_int_ts = DataFrame(
            {
                "td_1": timedeltas,
                "td_2": timedeltas,
                "int_1": ints,
                "int_2": ints,
                "ts_1": datetimes,
                "ts_2": datetimes,
            },
            index=index,
        )
        self.df_int_floats = DataFrame(
            {
                "int_1": ints,
                "int_2": ints,
                "int_3": ints,
                "float_1": floats,
                "float_2": floats,
                "float_3": floats,
            },
            index=index,
        )
        self.df_int_float_str = DataFrame(
            {
                "int_1": ints,
                "int_2": ints,
                "float_1": floats,
                "float_2": floats,
                "str_1": strings,
                "str_2": strings,
            },
            index=index,
        )

    def time_floats_with_int_idex_lines(self):
        self.df.to_json(self.fname, orient="records", lines=True)

    def time_floats_with_dt_index_lines(self):
        self.df_date_idx.to_json(self.fname, orient="records", lines=True)

    def time_delta_int_tstamp_lines(self):
        self.df_td_int_ts.to_json(self.fname, orient="records", lines=True)

    def time_float_int_lines(self):
        self.df_int_floats.to_json(self.fname, orient="records", lines=True)

    def time_float_int_str_lines(self):
        self.df_int_float_str.to_json(self.fname, orient="records", lines=True)

示例#51

0

显示文件

def test(dataset, config, log_dir):
    """Test model, output prediction as json file."""
    model_config = config['model']
    sess_config = config['session']

    answerset = pd.read_csv(os.path.join(config['preprocess_dir'],
                                         'answer_set.txt'),
                            header=None)[0]

    with tf.Graph().as_default():
        model = GRA(model_config)
        model.build_inference()

        with tf.Session(config=sess_config) as sess:
            ckpt_dir = os.path.join(log_dir, 'checkpoint')
            save_path = tf.train.latest_checkpoint(ckpt_dir)
            saver = tf.train.Saver()
            if save_path:
                print('load checkpoint {}.'.format(save_path))
                saver.restore(sess, save_path)
            else:
                print('no checkpoint.')
                exit()

            # test iterate over examples
            result = DataFrame(columns=['id', 'answer'])
            correct = 0

            while dataset.has_test_example:
                vgg, c3d, question, answer, example_id = dataset.get_test_example(
                )
                feed_dict = {
                    model.appear: [vgg],
                    model.motion: [c3d],
                    model.question_encode: [question],
                }
                prediction, channel_weight, appear_weight, motion_weight = sess.run(
                    [
                        model.prediction, model.channel_weight,
                        model.appear_weight, model.motion_weight
                    ],
                    feed_dict=feed_dict)
                #prediction = prediction[0]
                channel_weight = channel_weight[0]
                appear_weight = appear_weight[0]
                motion_weight = motion_weight[0]

                result = result.append(
                    {
                        'id': example_id,
                        'answer': prediction[1]
                    },
                    ignore_index=True)
                # modified-why
                # if answerset[prediction] in answer:
                #     correct += 1
                #     print(answer, example_id, channel_weight)
                # print(appear_weight)
                # print(motion_weight)

            result.to_json(os.path.join(log_dir, 'prediction.json'), 'records')

            # acc = correct / dataset.test_example_total
            # print('\n[TEST] acc {:.5f}.\n'.format(acc))

            dataset.reset_test()
            return None

示例#52

0

显示文件

文件： main.py 项目： MikeHoangg/text-categorization

def export(df: pd.DataFrame, file_path: str):
    print(f'dataframe has {len(df)} rows')
    print(f'started exporting {file_path}: {datetime.now()}')
    df.to_json(file_path)
    print(f'created {file_path}: {datetime.now()}')

示例#53

0

显示文件

 def render_dataframe(self, df: pd.DataFrame, response: Response) -> str:
     return df.to_json(orient="records")

示例#54

0

显示文件

文件： test_json_table_schema.py 项目： frreiss/pandas-fred

 def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
     # GH 35973
     df = DataFrame(vals, index=idx)
     out = df.to_json(orient="table")
     result = pd.read_json(out, orient="table")
     tm.assert_frame_equal(df, result)

示例#55

0

显示文件

 def test_read_json_table_orient_raises(self, index_nm, vals):
     df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
     out = df.to_json(orient="table")
     with tm.assert_raises_regex(NotImplementedError, 'can not yet read '):
         pd.read_json(out, orient="table")

示例#56

0

显示文件

 def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
     df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
     out = df.to_json(orient="table")
     with pytest.raises(NotImplementedError, match='can not yet read '):
         pd.read_json(out, orient="table")

示例#57

0

显示文件

文件： datasets.py 项目： yarivt/incubator-amaterasu

 def persist_dataset(self, dataset: pd.DataFrame, overwrite: bool):
     orient = self.dataset_conf.get('orient')
     dataset.to_json(self.dataset_conf['uri'], orient=orient)

示例#58

0

显示文件

def lines_json_df():
    df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
    return df.to_json(lines=True, orient="records")

示例#59

0

显示文件

class TestTableOrient(object):
    def setup_method(self, method):
        self.df = DataFrame(
            {
                'A': [1, 2, 3, 4],
                'B': ['a', 'b', 'c', 'c'],
                'C':
                pd.date_range('2016-01-01', freq='d', periods=4),
                'D':
                pd.timedelta_range('1H', periods=4, freq='T'),
                'E':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])),
                'F':
                pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)),
                'G': [1., 2., 3, 4.],
                'H':
                pd.date_range(
                    '2016-01-01', freq='d', periods=4, tz='US/Central'),
            },
            index=pd.Index(range(4), name='idx'))

    def test_build_series(self):
        s = pd.Series([1, 2], name='a')
        s.index.name = 'id'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'id',
            'type': 'integer'
        }, {
            'name': 'a',
            'type': 'integer'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['id'],
        }

        expected = OrderedDict([('schema', schema),
                                ('data', [
                                    OrderedDict([('id', 0), ('a', 1)]),
                                    OrderedDict([('id', 1), ('a', 2)])
                                ])])
        assert result == expected

    def test_to_json(self):
        df = self.df.copy()
        df.index.name = 'idx'
        result = df.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)

        assert "pandas_version" in result['schema']
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'name': 'A',
            'type': 'integer'
        }, {
            'name': 'B',
            'type': 'string'
        }, {
            'name': 'C',
            'type': 'datetime'
        }, {
            'name': 'D',
            'type': 'duration'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'E',
            'ordered': False,
            'type': 'any'
        }, {
            'constraints': {
                'enum': ['a', 'b', 'c']
            },
            'name': 'F',
            'ordered': True,
            'type': 'any'
        }, {
            'name': 'G',
            'type': 'number'
        }, {
            'name': 'H',
            'type': 'datetime',
            'tz': 'US/Central'
        }]

        schema = {
            'fields': fields,
            'primaryKey': ['idx'],
        }
        data = [
            OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
                         ('C', '2016-01-01T00:00:00.000Z'),
                         ('D', 'P0DT1H0M0S'), ('E', 'a'), ('F', 'a'),
                         ('G', 1.), ('H', '2016-01-01T06:00:00.000Z')]),
            OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
                         ('C', '2016-01-02T00:00:00.000Z'),
                         ('D', 'P0DT1H1M0S'), ('E', 'b'), ('F', 'b'),
                         ('G', 2.), ('H', '2016-01-02T06:00:00.000Z')]),
            OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
                         ('C', '2016-01-03T00:00:00.000Z'),
                         ('D', 'P0DT1H2M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 3.), ('H', '2016-01-03T06:00:00.000Z')]),
            OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
                         ('C', '2016-01-04T00:00:00.000Z'),
                         ('D', 'P0DT1H3M0S'), ('E', 'c'), ('F', 'c'),
                         ('G', 4.), ('H', '2016-01-04T06:00:00.000Z')]),
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_float_index(self):
        data = pd.Series(1, index=[1., 2.])
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'number'
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 1.0),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 2.0),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_to_json_period_index(self):
        idx = pd.period_range('2016', freq='Q-JAN', periods=2)
        data = pd.Series(1, idx)
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'freq': 'Q-JAN',
            'name': 'index',
            'type': 'datetime'
        }, {
            'name': 'values',
            'type': 'integer'
        }]

        schema = {'fields': fields, 'primaryKey': ['index']}
        data = [
            OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
                         ('values', 1)]),
            OrderedDict([('index', '2016-02-01T00:00:00.000Z'), ('values', 1)])
        ]
        expected = OrderedDict([('schema', schema), ('data', data)])
        assert result == expected

    def test_to_json_categorical_index(self):
        data = pd.Series(1, pd.CategoricalIndex(['a', 'b']))
        result = data.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        expected = (OrderedDict([('schema', {
            'fields': [{
                'name': 'index',
                'type': 'any',
                'constraints': {
                    'enum': ['a', 'b']
                },
                'ordered': False
            }, {
                'name': 'values',
                'type': 'integer'
            }],
            'primaryKey': ['index']
        }),
                                 ('data', [
                                     OrderedDict([('index', 'a'),
                                                  ('values', 1)]),
                                     OrderedDict([('index', 'b'),
                                                  ('values', 1)])
                                 ])]))
        assert result == expected

    def test_date_format_raises(self):
        with pytest.raises(ValueError):
            self.df.to_json(orient='table', date_format='epoch')

        # others work
        self.df.to_json(orient='table', date_format='iso')
        self.df.to_json(orient='table')

    @pytest.mark.parametrize('kind', [pd.Series, pd.Index])
    def test_convert_pandas_type_to_json_field_int(self, kind):
        data = [1, 2, 3]
        result = convert_pandas_type_to_json_field(kind(data, name='name'))
        expected = {"name": "name", "type": "integer"}
        assert result == expected

    @pytest.mark.parametrize('kind', [pd.Series, pd.Index])
    def test_convert_pandas_type_to_json_field_float(self, kind):
        data = [1., 2., 3.]
        result = convert_pandas_type_to_json_field(kind(data, name='name'))
        expected = {"name": "name", "type": "number"}
        assert result == expected

    @pytest.mark.parametrize('dt_args,extra_exp', [({}, {}),
                                                   ({
                                                       'utc': True
                                                   }, {
                                                       'tz': 'UTC'
                                                   })])
    @pytest.mark.parametrize('wrapper', [None, pd.Series])
    def test_convert_pandas_type_to_json_field_datetime(
            self, dt_args, extra_exp, wrapper):
        data = [1., 2., 3.]
        data = pd.to_datetime(data, **dt_args)
        if wrapper is pd.Series:
            data = pd.Series(data, name='values')
        result = convert_pandas_type_to_json_field(data)
        expected = {"name": "values", "type": 'datetime'}
        expected.update(extra_exp)
        assert result == expected

    def test_convert_pandas_type_to_json_period_range(self):
        arr = pd.period_range('2016', freq='A-DEC', periods=4)
        result = convert_pandas_type_to_json_field(arr)
        expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"}
        assert result == expected

    @pytest.mark.parametrize('kind', [pd.Categorical, pd.CategoricalIndex])
    @pytest.mark.parametrize('ordered', [True, False])
    def test_convert_pandas_type_to_json_field_categorical(
            self, kind, ordered):
        data = ['a', 'b', 'c']
        if kind is pd.Categorical:
            arr = pd.Series(kind(data, ordered=ordered), name='cats')
        elif kind is pd.CategoricalIndex:
            arr = kind(data, ordered=ordered, name='cats')

        result = convert_pandas_type_to_json_field(arr)
        expected = {
            "name": "cats",
            "type": "any",
            "constraints": {
                "enum": data
            },
            "ordered": ordered
        }
        assert result == expected

    @pytest.mark.parametrize(
        "inp,exp",
        [({
            'type': 'integer'
        }, 'int64'), ({
            'type': 'number'
        }, 'float64'), ({
            'type': 'boolean'
        }, 'bool'), ({
            'type': 'duration'
        }, 'timedelta64'), ({
            'type': 'datetime'
        }, 'datetime64[ns]'),
         ({
             'type': 'datetime',
             'tz': 'US/Hawaii'
         }, 'datetime64[ns, US/Hawaii]'), ({
             'type': 'any'
         }, 'object'),
         ({
             'type': 'any',
             'constraints': {
                 'enum': ['a', 'b', 'c']
             },
             'ordered': False
         }, CategoricalDtype(categories=['a', 'b', 'c'], ordered=False)),
         ({
             'type': 'any',
             'constraints': {
                 'enum': ['a', 'b', 'c']
             },
             'ordered': True
         }, CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)),
         ({
             'type': 'string'
         }, 'object')])
    def test_convert_json_field_to_pandas_type(self, inp, exp):
        field = {'name': 'foo'}
        field.update(inp)
        assert convert_json_field_to_pandas_type(field) == exp

    @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"])
    def test_convert_json_field_to_pandas_type_raises(self, inp):
        field = {'type': inp}
        with tm.assert_raises_regex(
                ValueError, "Unsupported or invalid field "
                "type: {}".format(inp)):
            convert_json_field_to_pandas_type(field)

    def test_categorical(self):
        s = pd.Series(pd.Categorical(['a', 'b', 'a']))
        s.index.name = 'idx'
        result = s.to_json(orient='table', date_format='iso')
        result = json.loads(result, object_pairs_hook=OrderedDict)
        result['schema'].pop('pandas_version')

        fields = [{
            'name': 'idx',
            'type': 'integer'
        }, {
            'constraints': {
                'enum': ['a', 'b']
            },
            'name': 'values',
            'ordered': False,
            'type': 'any'
        }]

        expected = OrderedDict([('schema', {
            'fields': fields,
            'primaryKey': ['idx']
        }),
                                ('data', [
                                    OrderedDict([('idx', 0), ('values', 'a')]),
                                    OrderedDict([('idx', 1), ('values', 'b')]),
                                    OrderedDict([('idx', 2), ('values', 'a')])
                                ])])
        assert result == expected

    @pytest.mark.parametrize(
        'idx,nm,prop',
        [(pd.Index([1]), 'index', 'name'),
         (pd.Index([1], name='myname'), 'myname', 'name'),
         (pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')
                                      ]), ['level_0', 'level_1'], 'names'),
         (pd.MultiIndex.from_product(
             [('a', 'b'),
              ('c', 'd')], names=['n1', 'n2']), ['n1', 'n2'], 'names'),
         (pd.MultiIndex.from_product(
             [('a', 'b'),
              ('c', 'd')], names=['n1', None]), ['n1', 'level_1'], 'names')])
    def test_set_names_unset(self, idx, nm, prop):
        data = pd.Series(1, idx)
        result = set_default_names(data)
        assert getattr(result.index, prop) == nm

    def test_timestamp_in_columns(self):
        df = pd.DataFrame(
            [[1, 2]],
            columns=[pd.Timestamp('2016'),
                     pd.Timedelta(10, unit='s')])
        result = df.to_json(orient="table")
        js = json.loads(result)
        assert js['schema']['fields'][1]['name'] == 1451606400000
        assert js['schema']['fields'][2]['name'] == 10000

    @pytest.mark.parametrize('case', [
        pd.Series([1], index=pd.Index([1], name='a'), name='a'),
        pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")),
        pd.DataFrame({"A": [1]},
                     index=pd.MultiIndex.from_arrays([['a'], [1]],
                                                     names=["A", "a"]))
    ])
    def test_overlapping_names(self, case):
        with tm.assert_raises_regex(ValueError, 'Overlapping'):
            case.to_json(orient='table')

示例#60

0

显示文件

def dive(data: pandas.DataFrame) -> HTML:
    # Element ID MUST be unique
    elem_id = _generate_element_id()
    json_str = data.to_json(orient='records')
    return HTML(FACETS_DIVE_TEMPLATE.format(elem_id=elem_id, json_str=json_str))