def test_frame_mixedtype_orient(self): # GH10289 vals = [ [10, 1, "foo", 0.1, 0.01], [20, 2, "bar", 0.2, 0.02], [30, 3, "baz", 0.3, 0.03], [40, 4, "qux", 0.4, 0.04], ] df = DataFrame(vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"]) self.assertTrue(df._is_mixed_type) right = df.copy() for orient in ["split", "index", "columns"]: inp = df.to_json(orient=orient) left = read_json(inp, orient=orient, convert_axes=False) assert_frame_equal(left, right) right.index = np.arange(len(df)) inp = df.to_json(orient="records") left = read_json(inp, orient="records", convert_axes=False) assert_frame_equal(left, right) right.columns = np.arange(df.shape[1]) inp = df.to_json(orient="values") left = read_json(inp, orient="values", convert_axes=False) assert_frame_equal(left, right)
def test_frame_non_unique_columns(self): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"]) self.assertRaises(ValueError, df.to_json, orient="index") self.assertRaises(ValueError, df.to_json, orient="columns") self.assertRaises(ValueError, df.to_json, orient="records") assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split", dtype=False)) unser = read_json(df.to_json(orient="values"), orient="values") np.testing.assert_equal(df.values, unser.values) # GH4377; duplicate columns not processing correctly df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"]) result = read_json(df.to_json(orient="split"), orient="split") assert_frame_equal(result, df) def _check(df): result = read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"]) assert_frame_equal(result, df) for o in [ [["a", "b"], ["c", "d"]], [[1.5, 2.5], [3.5, 4.5]], [[1, 2.5], [3, 4.5]], [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], ]: _check(DataFrame(o, index=[1, 2], columns=["x", "x"]))
def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit="ms") s = Series([timedelta(23), timedelta(seconds=5)]) self.assertEqual(s.dtype, "timedelta64[ns]") # index will be float dtype assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter), check_index_type=False) s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float)) self.assertEqual(s.dtype, "timedelta64[ns]") assert_series_equal(s, pd.read_json(s.to_json(), typ="series").apply(converter)) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) self.assertEqual(frame[0].dtype, "timedelta64[ns]") assert_frame_equal( frame, pd.read_json(frame.to_json()).apply(converter), check_index_type=False, check_column_type=False ) frame = DataFrame( { "a": [timedelta(days=23), timedelta(seconds=5)], "b": [1, 2], "c": pd.date_range(start="20130101", periods=2), } ) result = pd.read_json(frame.to_json(date_unit="ns")) result["a"] = pd.to_timedelta(result.a, unit="ns") result["c"] = pd.to_datetime(result.c) assert_frame_equal(frame, result, check_index_type=False)
def test_frame_mixedtype_orient(self): # GH10289 vals = [[10, 1, 'foo', .1, .01], [20, 2, 'bar', .2, .02], [30, 3, 'baz', .3, .03], [40, 4, 'qux', .4, .04]] df = DataFrame(vals, index=list('abcd'), columns=['1st', '2nd', '3rd', '4th', '5th']) self.assertTrue(df._is_mixed_type) right = df.copy() for orient in ['split', 'index', 'columns']: inp = df.to_json(orient=orient) left = read_json(inp, orient=orient, convert_axes=False) assert_frame_equal(left, right) right.index = np.arange(len(df)) inp = df.to_json(orient='records') left = read_json(inp, orient='records', convert_axes=False) assert_frame_equal(left, right) right.columns = np.arange(df.shape[1]) inp = df.to_json(orient='values') left = read_json(inp, orient='values', convert_axes=False) assert_frame_equal(left, right)
def test_timedelta(self): converter = lambda x: pd.to_timedelta(x, unit='ms') s = Series([timedelta(23), timedelta(seconds=5)]) self.assertEqual(s.dtype, 'timedelta64[ns]') result = pd.read_json(s.to_json(), typ='series').apply(converter) assert_series_equal(result, s) s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) self.assertEqual(s.dtype, 'timedelta64[ns]') result = pd.read_json(s.to_json(), typ='series').apply(converter) assert_series_equal(result, s) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) self.assertEqual(frame[0].dtype, 'timedelta64[ns]') assert_frame_equal(frame, pd.read_json(frame.to_json()) .apply(converter)) frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)], 'b': [1, 2], 'c': pd.date_range(start='20130101', periods=2)}) result = pd.read_json(frame.to_json(date_unit='ns')) result['a'] = pd.to_timedelta(result.a, unit='ns') result['c'] = pd.to_datetime(result.c) assert_frame_equal(frame, result)
def convertToPutJson(csv_file): df = cleanColumns(read_csv(csv_file)) putColumns = ["method", "recordId", "body"] putDf = DataFrame(columns = putColumns) for recordId in df.index: print "Converting data for recordId {recordId}...".format(recordId = recordId) body = {} for col in df.columns: body[str(col).strip()] = [str(df[col][recordId]).strip()] putDfRow = DataFrame([["PUT", str(recordId), body]], columns = putColumns) putDf = putDf.append(putDfRow) json_file = sub("csv|txt", "json", csv_file) putDf.to_json(json_file, orient="records") with open(json_file, 'r') as target: putData = target.read() target = open(json_file, 'w') putData = putData.replace("},{", "}\n\n{")[1:-1] target.write(putData) target.close() print "Successfully created put data!" return json_file
def test_frame_non_unique_columns(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 2], columns=['x', 'x']) self.assertRaises(ValueError, df.to_json, orient='index') self.assertRaises(ValueError, df.to_json, orient='columns') self.assertRaises(ValueError, df.to_json, orient='records') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split', dtype=False)) unser = read_json(df.to_json(orient='values'), orient='values') np.testing.assert_equal(df.values, unser.values) # GH4377; duplicate columns not processing correctly df = DataFrame([['a','b'],['c','d']], index=[1,2], columns=['x','y']) result = read_json(df.to_json(orient='split'), orient='split') assert_frame_equal(result, df) def _check(df): result = read_json(df.to_json(orient='split'), orient='split', convert_dates=['x']) assert_frame_equal(result, df) for o in [[['a','b'],['c','d']], [[1.5,2.5],[3.5,4.5]], [[1,2.5],[3,4.5]], [[Timestamp('20130101'),3.5],[Timestamp('20130102'),4.5]]]: _check(DataFrame(o, index=[1,2], columns=['x','x']))
def setup(self, index): N = 100000 indexes = {'int': np.arange(N), 'datetime': date_range('20000101', periods=N, freq='H')} df = DataFrame(np.random.randn(N, 5), columns=['float_{}'.format(i) for i in range(5)], index=indexes[index]) df.to_json(self.fname, orient='records', lines=True)
def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) self.assertFalse(df._is_mixed_type) assert_frame_equal(read_json(df.to_json()), df) # mixed type df['joe'] = df['joe'].astype('i8') self.assertTrue(df._is_mixed_type) assert_frame_equal(read_json(df.to_json()), df)
def test_data_frame_size_after_to_json(self): # GH15344 df = DataFrame({'a': [str(1)]}) size_before = df.memory_usage(index=True, deep=True).sum() df.to_json() size_after = df.memory_usage(index=True, deep=True).sum() self.assertEqual(size_before, size_after)
def test_frame_double_encoded_labels(self): df = DataFrame([["a", "b"], ["c", "d"]], index=['index " 1', "index / 2"], columns=["a \\ b", "y / z"]) assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) assert_frame_equal(df, read_json(df.to_json(orient="columns"), orient="columns")) assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index")) df_unser = read_json(df.to_json(orient="records"), orient="records") assert_index_equal(df.columns, df_unser.columns) np.testing.assert_equal(df.values, df_unser.values)
def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) result = read_json(df.to_json()) assert_frame_equal(result, df) df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C']) result = read_json(df.to_json()) assert_frame_equal(result, df)
def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) result = read_json(df.to_json()) assert_frame_equal(result, df) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) result = read_json(df.to_json()) assert_frame_equal(result, df)
def test_frame_non_unique_index(self): df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 1], columns=["x", "y"]) self.assertRaises(ValueError, df.to_json, orient="index") self.assertRaises(ValueError, df.to_json, orient="columns") assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) unser = read_json(df.to_json(orient="records"), orient="records") self.assertTrue(df.columns.equals(unser.columns)) np.testing.assert_equal(df.values, unser.values) unser = read_json(df.to_json(orient="values"), orient="values") np.testing.assert_equal(df.values, unser.values)
def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) result = read_json(df.to_json()) self.assertEqual(result.index.dtype, np.float64) self.assertEqual(result.columns.dtype, np.float64) assert_frame_equal(result, df, check_index_type=False, check_column_type=False) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) result = read_json(df.to_json()) assert_frame_equal(result, df)
def test_categorical(self): # GH4377 df.to_json segfaults with non-ndarray blocks df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]}) df["B"] = df["A"] expected = df.to_json() df["B"] = df["A"].astype('category') self.assertEqual(expected, df.to_json()) s = df["A"] sc = df["B"] self.assertEqual(s.to_json(), sc.to_json())
def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":1,"b":2}\n{"a":1,"b":2}' self.assertEqual(result, expected) df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' self.assertEqual(result, expected) assert_frame_equal(pd.read_json(result, lines=True), df)
def main(): # Get links to survey pages home_url = "http://www.igmchicago.org/igm-economic-experts-panel" home_contents = get_page_contents(home_url) urls = re.findall( r"<h2><a href=\"(\S+?results\?SurveyID=\S+?)\"", home_contents) urls = ["http://www.igmchicago.org" + url for url in urls] # Loop through survey pages df = DataFrame() question_count = 0 for url in reversed(urls): contents = get_page_contents(url) questions = re.findall(r"surveyQuestion\">([\s\S]+?)</h3>", contents) responder_list = re.findall( r"\?id=([\d]+)?\">([\s\w.]+?)</a>", contents) responses = re.findall( r"<span class=\"option-[\d]+?\">([\s\w.]+?)</span>", contents) num_responders = len(responses) / len(questions) # Loop through sub-questions (A, B, etc) within each page for i, question in enumerate(questions): question = clean_string(question) question_count += 1 print(question) # Restrict range to responses for this sub-question rng = (i * num_responders, (i + 1) * num_responders) # Collect sub-question, its url suffix, and the responses prefix = "(%03d" % question_count + ") " q_responses = Series( responses[rng[0]:rng[1]], index=responder_list[rng[0]:rng[1]]) q_url_suffix = re.findall("=(.+)", url)[0] q_responses = q_responses.append( Series([q_url_suffix], index=['q_url_suffix'])) q_responses.name = prefix + question.strip() # Add question data to dataframe df = df.join(q_responses, how='outer') # Move responder id from index to column, only after all joins are complete df['responder_id'] = [pair[0] for pair in df.index] df.index = [pair[1] if type(pair) == tuple else pair for pair in df.index] # Write to file df.to_json("survey_results.json")
def test_frame_non_unique_index(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1], columns=['x', 'y']) self.assertRaises(ValueError, df.to_json, orient='index') self.assertRaises(ValueError, df.to_json, orient='columns') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split')) unser = read_json(df.to_json(orient='records'), orient='records') self.assertTrue(df.columns.equals(unser.columns)) np.testing.assert_equal(df.values, unser.values) unser = read_json(df.to_json(orient='values'), orient='values') np.testing.assert_equal(df.values, unser.values)
def test_frame_double_encoded_labels(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=['index " 1', 'index / 2'], columns=['a \\ b', 'y / z']) assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split')) assert_frame_equal(df, read_json(df.to_json(orient='columns'), orient='columns')) assert_frame_equal(df, read_json(df.to_json(orient='index'), orient='index')) df_unser = read_json(df.to_json(orient='records'), orient='records') assert_index_equal(df.columns, df_unser.columns) np.testing.assert_equal(df.values, df_unser.values)
def test_frame_non_unique_index(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=[1, 1], columns=['x', 'y']) pytest.raises(ValueError, df.to_json, orient='index') pytest.raises(ValueError, df.to_json, orient='columns') assert_frame_equal(df, read_json(df.to_json(orient='split'), orient='split')) unser = read_json(df.to_json(orient='records'), orient='records') tm.assert_index_equal(df.columns, unser.columns) tm.assert_almost_equal(df.values, unser.values) unser = read_json(df.to_json(orient='values'), orient='values') tm.assert_numpy_array_equal(df.values, unser.values)
def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) result = read_json(df.to_json()) # the index is serialized as strings....correct? assert_frame_equal(result, df)
def test_blocks_compat_GH9037(self): index = pd.date_range('20000101', periods=10, freq='H') df_mixed = DataFrame(OrderedDict( float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564, -0.60316077, 0.24653374, 0.28668979, -2.51969012, 0.95748401, -1.02970536], int_1=[19680418, 75337055, 99973684, 65103179, 79373900, 40314334, 21290235, 4991321, 41903419, 16008365], str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474', 'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'], float_2=[-0.0428278, -1.80872357, 3.36042349, -0.7573685, -0.48217572, 0.86229683, 1.08935819, 0.93898739, -0.03030452, 1.43366348], str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9', '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'], int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027, 34193846, 10561746, 24867120, 76131025] ), index=index) # JSON deserialisation always creates unicode strings df_mixed.columns = df_mixed.columns.astype('unicode') df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'), orient='split') assert_frame_equal(df_mixed, df_roundtrip, check_index_type=True, check_column_type=True, check_frame_type=True, by_blocks=True, check_exact=True)
def post(self): post = json.loads(self.request.body) MyClient = riak.RiakClient(protocol=RIAK_PROTOCOL, http_port=RIAK_HTTP_PORT, host=RIAK_HOST) MyAdminBucket = MyClient.bucket(ADMIN_BUCKET_NAME) connection = None for c in MyAdminBucket.get('connection').data: if c['slug'] == post.get('connection', None): connection = c['connection'] sql = """SELECT * FROM ({}) AS CUBE LIMIT 10;""".format( post.get('sql', None)) e = create_engine(connection) connection = e.connect() try: resoverall = connection.execute(text(sql)) except: self.write({'sql': '', 'msg': 'Error!'}) self.finish() df = DataFrame(resoverall.fetchall()) if df.empty: self.finish() df.columns = resoverall.keys() df.head() self.write({'sql': df.to_json(orient='records'), 'msg': 'Success!'}) self.finish()
def test_frame_empty_mixedtype(self): # mixed type df = DataFrame(columns=['jim', 'joe']) df['joe'] = df['joe'].astype('i8') self.assertTrue(df._is_mixed_type) assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False)
def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) self.assertFalse(df._is_mixed_type) assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False) # GH 7445 result = pd.DataFrame({'test': []}, index=[]).to_json(orient='columns') expected = '{"test":{}}' tm.assert_equal(result, expected)
def test_default_handler_numpy_unsupported_dtype(self): # GH12554 to_json raises 'Unhandled numpy dtype 15' df = DataFrame({'a': [1, 2.3, complex(4, -5)], 'b': [float('nan'), None, complex(1.2, 0)]}, columns=['a', 'b']) expected = ('[["(1+0j)","(nan+0j)"],' '["(2.3+0j)","(nan+0j)"],' '["(4-5j)","(1.2+0j)"]]') assert df.to_json(default_handler=str, orient="values") == expected
def test_mixed_timedelta_datetime(self): frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]}, dtype=object) expected = DataFrame({'a': [pd.Timedelta(frame.a[0]).value, pd.Timestamp(frame.a[1]).value]}) result = pd.read_json(frame.to_json(date_unit='ns'), dtype={'a': 'int64'}) assert_frame_equal(result, expected)
def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB')) dfj2['date'] = Timestamp('20130101') dfj2['ints'] = lrange(5) dfj2['bools'] = True dfj2.index = pd.date_range('20130101',periods=5) json = dfj2.to_json() result = read_json(json,dtype={'ints' : np.int64, 'bools' : np.bool_}) assert_frame_equal(result,result)
def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB")) dfj2["date"] = Timestamp("20130101") dfj2["ints"] = lrange(5) dfj2["bools"] = True dfj2.index = pd.date_range("20130101", periods=5) json = dfj2.to_json() result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) assert_frame_equal(result, result)
class TestRun: """ represents the collected data of a particular (set of) log file(s) """ FILE_EXTENSION = ".trn" """ the file extension for saving and loading test runs from """ def __init__(self, filenames=[]): self.inputfromstdin = False self.filenames = [] for filename in filenames: self.appendFilename(filename) self.data = DataFrame(dtype=object) self.datadict = {} self.currentproblemdata = {} self.currentproblemid = 0 """ meta data represent problem-independent data """ self.metadatadict = {} self.parametervalues = {} self.defaultparametervalues = {} self.keyset = set() self.currentfileiterator = None self.currentfile = None self.consumedStdinput = [] def __iter__(self): if (self.currentfile != ""): with open(self.currentfile, "r") as f: for line in enumerate(f): yield line else: for line in enumerate(self.consumedStdinput): yield line for line in enumerate(sys.stdin, len(self.consumedStdinput)): yield line def iterationPrepare(self): filenames = sorted( self.filenames, key=lambda x: misc.sortingKeyContext(misc.filenameGetContext(x))) self.currentfileiterator = iter(filenames) def iterationNextFile(self): try: self.currentfile = next(self.currentfileiterator) return True except StopIteration: return False def iterationAddConsumedStdinput(self, consumedlines): if self.currentfile == "": for line in consumedlines: self.consumedStdinput.append(line) def iterationCleanUp(self): self.currentfileiterator = None def iterationGetCurrentFile(self): return self.currentfile def setInputFromStdin(self): self.filenames.append("") def appendFilename(self, filename): # TODO test this """Append a file name to the list of filenames of this test run """ filename = os.path.abspath(filename) if filename not in self.filenames: self.filenames.append(filename) else: return extension = misc.filenameGetContext(filename) if extension in [Key.CONTEXT_ERRFILE, Key.CONTEXT_LOGFILE]: metafile = os.path.splitext(filename)[0] + ".meta" if os.path.isfile(metafile) and (metafile not in self.filenames): self.filenames.append(metafile) def addDataByName(self, datakeys, data, problem): """Add the current data under the specified dataname Readers can use this method to add data, either as a single datakey, or as list, where in the latter case it is required that datakeys and data are both lists of the same length after data was added, the method getProblemDataById() can be used for access """ for problemid, name in self.datadict.setdefault(Key.ProblemName, {}).items(): if name == problem: self.addDataById(datakeys, data, problemid) def addData(self, datakey, data): """Add data to current problem readers can use this method to add data, either as a single datakey, or as list, where in the latter case it is required that datakeys and data are both lists of the same length """ logging.debug("TestRun %s receives data Datakey %s, %s" % (self.getName(), repr(datakey), repr(data))) if type(datakey) is list and type(data) is list: for key, datum in zip(datakey, data): self.currentproblemdata[key] = datum else: self.currentproblemdata[datakey] = data def getCurrentProblemData(self, datakey: str = None): """Return current problem data, either entirely or for specified data key """ if datakey is None: return self.currentproblemdata else: return self.currentproblemdata.get(datakey) def addDataById(self, datakeys, data, problemid): """Add the data or to the specified problem readers can use this method to add data, either as a single datakey, or as list, where in the latter case it is required that datakeys and data are both lists of the same length after data was added, the method getProblemDataById() can be used for access if a problemid was given """ # check for the right dictionary to store the data logging.debug("TestRun %s receives data Datakey %s, %s to problem %s" % (self.getName(), repr(datakeys), repr(data), problemid)) if type(datakeys) is list and type(data) is list: for key, datum in zip(datakeys, data): self.datadict.setdefault(key, {})[problemid] = datum else: self.datadict.setdefault(datakeys, {})[problemid] = data def addParameterValue(self, paramname, paramval): """Store the value for a parameter of a given name for this test run """ self.parametervalues[paramname] = paramval def addDefaultParameterValue(self, paramname, defaultval): """Store the value for a parameter of a given name for this test run """ self.defaultparametervalues[paramname] = defaultval def getParameterData(self): """Return two dictionaries that map parameter names to their value and default value """ return (self.parametervalues, self.defaultparametervalues) def getLogFile(self, fileextension=".out"): """Returns the name of the logfile """ for filename in self.filenames: if filename.endswith(fileextension): return filename return None def getKeySet(self): """Return a list or set of keys (which are the columns headers of the data) """ if self.datadict != {}: return list(self.datadict.keys()) else: return set(self.data.columns) def emptyData(self): """Empty all data of current testrun """ self.data = DataFrame(dtype=object) def getMetaData(self): """Return a data frame containing meta data """ return DataFrame(self.metadatadict) def finalizeCurrentCollection(self, solver): """ Any data of the current problem is saved as a new row in datadict """ if self.currentproblemdata != {}: # Add data collected by solver into currentproblemdata, such as primal and dual bound, self.addData(*solver.getData()) for key in self.metadatadict.keys(): self.addData(key, self.metadatadict[key]) for key in self.currentproblemdata.keys(): self.datadict.setdefault( key, {})[self.currentproblemid] = self.currentproblemdata[key] self.currentproblemdata = {} self.currentproblemid = self.currentproblemid + 1 def finishedReadingFile(self, solver): """ Save data of current problem """ self.finalizeCurrentCollection(solver) def setupForDataCollection(self): """ Save data in a python dictionary for easier data collection """ self.datadict = self.data.to_dict() self.data = DataFrame(dtype=object) def setupAfterDataCollection(self): """ Save data in a pandas dataframe for futher use (i.e. reading and finding data) """ self.data = DataFrame(self.datadict) self.datadict = {} def hasProblemName(self, problemname): """ Return if already collected data for a problem with given name """ if self.datadict != {}: return problemname in self.datadict.get(Key.ProblemName, {}).values() else: if Key.ProblemName in self.data.keys(): for name in self.data[Key.ProblemName]: if problemname == name: return True return False def hasProblemId(self, problemid): """ Returns if there is already data collected for a problem with given id """ return problemid in range(self.currentproblemid) def getProblemIds(self): """ Return a list of problemids """ return list(range(self.currentproblemid)) def getProblemNames(self): """ Return an (unsorted) list of problemnames """ if self.datadict != {}: return list(self.datadict.get(Key.ProblemName, [])) else: if Key.ProblemName in self.data.columns: return list(self.data[Key.ProblemName]) else: return [] def getProblemDataByName(self, problemname, datakey): """Return the data collected for problems with given name """ collecteddata = [] if self.datadict != {}: for key, dat in self.datadict.get("ProblemName", None): if dat == problemname: collecteddata.append(self.getProblemDataById(key, datakey)) else: collecteddata = list(self.data[self.data[Key.ProblemName] == problemname].loc[:, datakey]) try: return collecteddata[0] except IndexError: return None def getProblemDataById(self, problemid, datakey=None): """Return data for a specific datakey, or None, if no such data exists for this (probname, datakey) key pair """ if datakey is None: try: return ",".join("%s: %s" % (key, self.getProblemDataById(problemid, key)) for key in self.getKeySet()) except KeyError: return "<%s> not contained in keys, have only\n%s" % \ (problemid, ",".join((ind for ind in self.getProblemIds()))) else: if self.datadict != {}: return self.datadict.get(datakey, {}).get(problemid, None) else: try: data = self.data.loc[problemid, datakey] except KeyError: data = None if type(data) is list or notnull(data): return data else: return None def getProblemsDataById(self, problemids, datakey): """ Return data for a list of problems """ if self.datadict != {}: return [ self.datadict.get(datakey, {}).get(id, None) for id in problemids ] else: return self.data.loc[problemids, datakey] def deleteProblemDataById(self, problemid): """ Delete all data acquired so far for problemid """ if self.datadict != {}: for key in list(self.datadict.keys()): try: del self.datadict[key][problemid] except KeyError: pass else: try: self.data.drop(problemid, inplace=True) except TypeError: # needs to be caught for pandas version < 0.13 self.data = self.data.drop(problemid) def saveToFile(self, filename): """ Dump the pickled instance of itself into a .trn-file """ try: f = open(filename, 'wb') pickle.dump(self, f, protocol=2) f.close() except IOError: print("Could not open %s for saving test run" % filename) def emptyCurrentProblemData(self): """ Empty data of currently read problem """ return self.currentproblemdata == {} def printToConsole(self, formatstr="{idx}: {d}"): """ Print data to console """ for idx, d in self.data.iterrows(): # pd.set_option('display.max_rows', len(d)) print(formatstr.format(d=d, idx=idx)) # pd.reset_option('display.max_rows') def toJson(self): """ Return the data-object in json """ return self.data.to_json() @staticmethod def loadFromFile(filename): """ Loads a .trn-File containing a particular instance of TestRun """ try: if filename.endswith(".gz"): import gzip f = gzip.open(filename, 'rb') else: f = open(filename, 'rb') except IOError: print("Could not open %s for loading test run" % filename) return None testrun = pickle.load(f) f.close() return testrun def getData(self, datakey=None): """Return a data frame object of the acquired data """ return self.data def getCurrentLogfilename(self): """ Return the name of the current logfile """ return os.path.basename(self.filenames[0]) def getSettings(self): """ Return the settings associated with this test run """ try: return self.data['Settings'][0] except KeyError: return os.path.basename(self.filenames[0]).split('.')[-2] # def getName(self): """ Convenience method to make test run a manageable object """ return self.getIdentification() def getIdentification(self): """ Return identification string of this test run """ # TODO Is this still the way to do this? What if we are reading from stdin? return os.path.splitext(os.path.basename(self.filenames[0]))[0] def problemGetOptimalSolution(self, problemid): """ Return objective of an optimal or a best known solution ... from solu file, or None, if no such data has been acquired """ try: return self.getProblemDataById(problemid, 'OptVal') except KeyError: # print(self.getIdentification() + " has no solu file value for ", problemid) return None def problemGetSoluFileStatus(self, problemid): """ Return 'unkn', 'inf', 'best', 'opt' ... as solu file status, or None, if no solu file status exists for this problem """ try: return self.getProblemDataById(problemid, 'SoluFileStatus') except KeyError: # print(self.getIdentification() + " has no solu file status for ", problemid) return None
def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b']) result = df.to_json(orient="records", lines=True) expected = '{"a":1,"b":2}\n{"a":1,"b":2}' self.assertEqual(result, expected)
def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) self.assertFalse(df._is_mixed_type) assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, check_index_type=False)
def val(epoch, dataset, config, log_dir): """Validate model.""" model_config = config['model'] sess_config = config['session'] answerset = pd.read_csv(os.path.join(config['preprocess_dir'], 'answer_set.txt'), header=None)[0] example_id = 0 with tf.Graph().as_default(): model = GRA(model_config) model.build_inference() result = DataFrame(columns=['id', 'answer']) with tf.Session(config=sess_config) as sess: sum_dir = os.path.join(log_dir, 'summary') summary_writer = tf.summary.FileWriter(sum_dir) ckpt_dir = os.path.join(log_dir, 'checkpoint') save_path = tf.train.latest_checkpoint(ckpt_dir) saver = tf.train.Saver() if save_path: print('load checkpoint {}.'.format(save_path)) saver.restore(sess, save_path) else: print('no checkpoint.') exit() stats_dir = os.path.join(log_dir, 'stats') stats_path = os.path.join(stats_dir, 'val.json') if os.path.exists(stats_path): print('load stats file {}.'.format(stats_path)) stats = pd.read_json(stats_path, 'records') else: print('no stats file.') if not os.path.exists(stats_dir): os.makedirs(stats_dir) stats = pd.DataFrame(columns=['epoch', 'acc']) # val iterate over examples correct = 0 while dataset.has_val_example: vgg, c3d, question, answer = dataset.get_val_example() c3d = np.zeros((len(c3d), len(c3d[0]))) feed_dict = { model.appear: [vgg], model.motion: [c3d], model.question_encode: [question], } prediction = sess.run(model.prediction, feed_dict=feed_dict) prediction = prediction[1] for i, row in enumerate(prediction): for index in row: if answer[index] == 1: correct += 1 break result = result.append({ 'id': example_id, 'answer': prediction }, ignore_index=True) example_id += 1 acc = correct / dataset.val_example_total result.to_json( os.path.join( log_dir, 'validation_' + str(int(acc * 100)) + '_' + str(epoch + lajidaima) + '.json'), 'records') print('\n[VAL] epoch {}, acc {:.5f}.\n'.format( epoch + lajidaima, acc)) summary = tf.Summary() summary.value.add(tag='val/acc', simple_value=float(acc)) summary_writer.add_summary(summary, epoch + lajidaima) record = Series([epoch + lajidaima, acc], ['epoch', 'acc']) stats = stats.append(record, ignore_index=True) stats.to_json(stats_path, 'records') dataset.reset_val() return acc
# -*- coding: utf-8 -*- """ Created on Wed Jul 3 09:31:39 2019 @author: potlus """ import pandas as pd from pandas import DataFrame #Read File Path path = 'S:/DATA CENTER/Autosys/Working On/Sreenivas/SCRIPTS/Python/SN_CMDB_Apps.csv' cmdb = pd.read_csv(path, header=0,encoding = 'unicode_escape') cmdb.shape df = DataFrame(cmdb, columns= ['Sys_Id', 'SW_Name', 'Technical_Lead', 'Support_Group', 'Operational_Status']) Export = df.to_json (r'S:/DATA CENTER/Autosys/Working On/Sreenivas/SCRIPTS/Python/sampleCMDB.json', orient='records', lines=True)
def dataframe_to_json(data: DataFrame, path: Path, **kwargs): ''' Saves a pandas DataFrame into a UTF-8 encoded JSON file ''' with open(path, 'w', encoding='UTF-8') as file: data.to_json(file, force_ascii=False, **kwargs)
def test_default_handler(self): value = object() frame = DataFrame({'a': [7, value]}) expected = DataFrame({'a': [7, str(value)]}) result = pd.read_json(frame.to_json(default_handler=str)) assert_frame_equal(expected, result, check_index_type=False)
def test_default_handler(self): value = object() frame = DataFrame({'a': ['a', value]}) expected = frame.applymap(str) result = pd.read_json(frame.to_json(default_handler=str)) assert_frame_equal(expected, result)
f["properties"]["G"], f["properties"]["B"], ]) closest_index = cdist(XA=np.array([d]), XB=np.array(fl), metric=func).argmin() rgb = fl[closest_index] final_data.append([d[0], d[1], d[2], rgb[2], rgb[3], rgb[4]]) print(len(final_data)) time.sleep(1) except Exception as e: print(e) features_size = 1000 groups = grouper(features_size, xa) part_func = partial(gen_feature_color, space_color=space, func=func) with concurrent.futures.ProcessPoolExecutor(max_workers=60) as executor: executor.map(part_func, groups, chunksize=3) from pandas import DataFrame df = DataFrame(list(final_data)) df.to_json("FINAL_DATA.json", orient="values") ####################################
def test(dataset, config, log_dir, question_type_dict): """Test model, output prediction as json file.""" model_config = config['model'] sess_config = config['session'] question_type_correct_count = copy.deepcopy(question_type_dict) question_type_all_count = copy.deepcopy(question_type_dict) for k in question_type_dict: question_type_correct_count[k] = 0 question_type_all_count[k] = 0 answerset = pd.read_csv(os.path.join(config['preprocess_dir'], 'answer_set.txt'), header=None)[0] with tf.Graph().as_default(): model = Multimodal_DMN_VM(model_config) model.build_inference() with tf.Session(config=sess_config) as sess: ckpt_dir = os.path.join(log_dir, 'checkpoint') save_path = tf.train.latest_checkpoint(ckpt_dir) saver = tf.train.Saver() if save_path: print('load checkpoint {}.'.format(save_path)) saver.restore(sess, save_path) else: print('no checkpoint.') exit() # test iterate over examples result = DataFrame(columns=['id', 'answer']) correct = 0 groundtruth_answer_list = [] predict_answer_list = [] while dataset.has_test_example: vgg, c3d, vgg_conv5, vgg_conv5_3, question, answer, example_id, question_len = dataset.get_test_example( ) input_len = 20 feed_dict = { model.c3d_video_feature: [c3d], model.vgg_video_feature: [vgg], model.question_encode: [question], model.question_len_placeholder: [question_len], model.video_len_placeholder: [input_len], model.keep_placeholder: 1.0 } prediction = sess.run(model.prediction, feed_dict=feed_dict) prediction = prediction[0] result = result.append( { 'id': example_id, 'answer': answerset[prediction] }, ignore_index=True) if answerset[prediction] == answer: correct += 1 question_type_correct_count[question[0]] += 1 question_type_all_count[question[0]] += 1 groundtruth_answer_list.append(answer) predict_answer_list.append(answerset[prediction]) result.to_json(os.path.join(log_dir, 'prediction.json'), 'records') acc = correct * 1.0 / dataset.test_example_total WUPS_0_0 = metrics.compute_wups(groundtruth_answer_list, predict_answer_list, 0.0) WUPS_0_9 = metrics.compute_wups(groundtruth_answer_list, predict_answer_list, 0.9) WUPS_acc = metrics.compute_wups(groundtruth_answer_list, predict_answer_list, -1) print('[TEST] acc {:.5f}.\n'.format(acc)) print('[TEST], WUPS@acc {:.5f}.\n'.format(WUPS_acc)) print('[TEST], [email protected] {:.5f}.\n'.format(WUPS_0_0)) print('[TEST], [email protected] {:.5f}.\n'.format(WUPS_0_9)) print('######## question type acc list ######### ') for k in question_type_dict: print(question_type_dict[k] + ' acc {:.5f}.'.format(question_type_correct_count[k] * 1.0 / question_type_all_count[k])) print('correct = {:d}, all = {:d}'.format( question_type_correct_count[k], question_type_all_count[k])) dataset.reset_test() return acc
class TestTableOrient: def setup_method(self, method): self.df = DataFrame( { "A": [1, 2, 3, 4], "B": ["a", "b", "c", "c"], "C": pd.date_range("2016-01-01", freq="d", periods=4), "D": pd.timedelta_range("1H", periods=4, freq="T"), "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])), "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)), "G": [1.0, 2.0, 3, 4.0], "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"), }, index=pd.Index(range(4), name="idx"), ) def test_build_series(self): s = pd.Series([1, 2], name="a") s.index.name = "id" result = s.to_json(orient="table", date_format="iso") result = json.loads(result, object_pairs_hook=OrderedDict) assert "pandas_version" in result["schema"] result["schema"].pop("pandas_version") fields = [{"name": "id", "type": "integer"}, {"name": "a", "type": "integer"}] schema = {"fields": fields, "primaryKey": ["id"]} expected = OrderedDict( [ ("schema", schema), ( "data", [ OrderedDict([("id", 0), ("a", 1)]), OrderedDict([("id", 1), ("a", 2)]), ], ), ] ) assert result == expected def test_read_json_from_to_json_results(self): # GH32383 df = pd.DataFrame( { "_id": {"row_0": 0}, "category": {"row_0": "Goods"}, "recommender_id": {"row_0": 3}, "recommender_name_jp": {"row_0": "浦田"}, "recommender_name_en": {"row_0": "Urata"}, "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"}, "name_en": {"row_0": "Hakata Dolls Matsuo"}, } ) result1 = pd.read_json(df.to_json()) result2 = pd.DataFrame.from_dict(json.loads(df.to_json())) tm.assert_frame_equal(result1, df) tm.assert_frame_equal(result2, df) def test_to_json(self): df = self.df.copy() df.index.name = "idx" result = df.to_json(orient="table", date_format="iso") result = json.loads(result, object_pairs_hook=OrderedDict) assert "pandas_version" in result["schema"] result["schema"].pop("pandas_version") fields = [ {"name": "idx", "type": "integer"}, {"name": "A", "type": "integer"}, {"name": "B", "type": "string"}, {"name": "C", "type": "datetime"}, {"name": "D", "type": "duration"}, { "constraints": {"enum": ["a", "b", "c"]}, "name": "E", "ordered": False, "type": "any", }, { "constraints": {"enum": ["a", "b", "c"]}, "name": "F", "ordered": True, "type": "any", }, {"name": "G", "type": "number"}, {"name": "H", "type": "datetime", "tz": "US/Central"}, ] schema = {"fields": fields, "primaryKey": ["idx"]} data = [ OrderedDict( [ ("idx", 0), ("A", 1), ("B", "a"), ("C", "2016-01-01T00:00:00.000Z"), ("D", "P0DT1H0M0S"), ("E", "a"), ("F", "a"), ("G", 1.0), ("H", "2016-01-01T06:00:00.000Z"), ] ), OrderedDict( [ ("idx", 1), ("A", 2), ("B", "b"), ("C", "2016-01-02T00:00:00.000Z"), ("D", "P0DT1H1M0S"), ("E", "b"), ("F", "b"), ("G", 2.0), ("H", "2016-01-02T06:00:00.000Z"), ] ), OrderedDict( [ ("idx", 2), ("A", 3), ("B", "c"), ("C", "2016-01-03T00:00:00.000Z"), ("D", "P0DT1H2M0S"), ("E", "c"), ("F", "c"), ("G", 3.0), ("H", "2016-01-03T06:00:00.000Z"), ] ), OrderedDict( [ ("idx", 3), ("A", 4), ("B", "c"), ("C", "2016-01-04T00:00:00.000Z"), ("D", "P0DT1H3M0S"), ("E", "c"), ("F", "c"), ("G", 4.0), ("H", "2016-01-04T06:00:00.000Z"), ] ), ] expected = OrderedDict([("schema", schema), ("data", data)]) assert result == expected def test_to_json_float_index(self): data = pd.Series(1, index=[1.0, 2.0]) result = data.to_json(orient="table", date_format="iso") result = json.loads(result, object_pairs_hook=OrderedDict) result["schema"].pop("pandas_version") expected = OrderedDict( [ ( "schema", { "fields": [ {"name": "index", "type": "number"}, {"name": "values", "type": "integer"}, ], "primaryKey": ["index"], }, ), ( "data", [ OrderedDict([("index", 1.0), ("values", 1)]), OrderedDict([("index", 2.0), ("values", 1)]), ], ), ] ) assert result == expected def test_to_json_period_index(self): idx = pd.period_range("2016", freq="Q-JAN", periods=2) data = pd.Series(1, idx) result = data.to_json(orient="table", date_format="iso") result = json.loads(result, object_pairs_hook=OrderedDict) result["schema"].pop("pandas_version") fields = [ {"freq": "Q-JAN", "name": "index", "type": "datetime"}, {"name": "values", "type": "integer"}, ] schema = {"fields": fields, "primaryKey": ["index"]} data = [ OrderedDict([("index", "2015-11-01T00:00:00.000Z"), ("values", 1)]), OrderedDict([("index", "2016-02-01T00:00:00.000Z"), ("values", 1)]), ] expected = OrderedDict([("schema", schema), ("data", data)]) assert result == expected def test_to_json_categorical_index(self): data = pd.Series(1, pd.CategoricalIndex(["a", "b"])) result = data.to_json(orient="table", date_format="iso") result = json.loads(result, object_pairs_hook=OrderedDict) result["schema"].pop("pandas_version") expected = OrderedDict( [ ( "schema", { "fields": [ { "name": "index", "type": "any", "constraints": {"enum": ["a", "b"]}, "ordered": False, }, {"name": "values", "type": "integer"}, ], "primaryKey": ["index"], }, ), ( "data", [ OrderedDict([("index", "a"), ("values", 1)]), OrderedDict([("index", "b"), ("values", 1)]), ], ), ] ) assert result == expected def test_date_format_raises(self): with pytest.raises(ValueError): self.df.to_json(orient="table", date_format="epoch") # others work self.df.to_json(orient="table", date_format="iso") self.df.to_json(orient="table") def test_convert_pandas_type_to_json_field_int(self, index_or_series): kind = index_or_series data = [1, 2, 3] result = convert_pandas_type_to_json_field(kind(data, name="name")) expected = {"name": "name", "type": "integer"} assert result == expected def test_convert_pandas_type_to_json_field_float(self, index_or_series): kind = index_or_series data = [1.0, 2.0, 3.0] result = convert_pandas_type_to_json_field(kind(data, name="name")) expected = {"name": "name", "type": "number"} assert result == expected @pytest.mark.parametrize( "dt_args,extra_exp", [({}, {}), ({"utc": True}, {"tz": "UTC"})] ) @pytest.mark.parametrize("wrapper", [None, pd.Series]) def test_convert_pandas_type_to_json_field_datetime( self, dt_args, extra_exp, wrapper ): data = [1.0, 2.0, 3.0] data = pd.to_datetime(data, **dt_args) if wrapper is pd.Series: data = pd.Series(data, name="values") result = convert_pandas_type_to_json_field(data) expected = {"name": "values", "type": "datetime"} expected.update(extra_exp) assert result == expected def test_convert_pandas_type_to_json_period_range(self): arr = pd.period_range("2016", freq="A-DEC", periods=4) result = convert_pandas_type_to_json_field(arr) expected = {"name": "values", "type": "datetime", "freq": "A-DEC"} assert result == expected @pytest.mark.parametrize("kind", [pd.Categorical, pd.CategoricalIndex]) @pytest.mark.parametrize("ordered", [True, False]) def test_convert_pandas_type_to_json_field_categorical(self, kind, ordered): data = ["a", "b", "c"] if kind is pd.Categorical: arr = pd.Series(kind(data, ordered=ordered), name="cats") elif kind is pd.CategoricalIndex: arr = kind(data, ordered=ordered, name="cats") result = convert_pandas_type_to_json_field(arr) expected = { "name": "cats", "type": "any", "constraints": {"enum": data}, "ordered": ordered, } assert result == expected @pytest.mark.parametrize( "inp,exp", [ ({"type": "integer"}, "int64"), ({"type": "number"}, "float64"), ({"type": "boolean"}, "bool"), ({"type": "duration"}, "timedelta64"), ({"type": "datetime"}, "datetime64[ns]"), ({"type": "datetime", "tz": "US/Hawaii"}, "datetime64[ns, US/Hawaii]"), ({"type": "any"}, "object"), ( { "type": "any", "constraints": {"enum": ["a", "b", "c"]}, "ordered": False, }, CategoricalDtype(categories=["a", "b", "c"], ordered=False), ), ( { "type": "any", "constraints": {"enum": ["a", "b", "c"]}, "ordered": True, }, CategoricalDtype(categories=["a", "b", "c"], ordered=True), ), ({"type": "string"}, "object"), ], ) def test_convert_json_field_to_pandas_type(self, inp, exp): field = {"name": "foo"} field.update(inp) assert convert_json_field_to_pandas_type(field) == exp @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"]) def test_convert_json_field_to_pandas_type_raises(self, inp): field = {"type": inp} with pytest.raises( ValueError, match=f"Unsupported or invalid field type: {inp}" ): convert_json_field_to_pandas_type(field) def test_categorical(self): s = pd.Series(pd.Categorical(["a", "b", "a"])) s.index.name = "idx" result = s.to_json(orient="table", date_format="iso") result = json.loads(result, object_pairs_hook=OrderedDict) result["schema"].pop("pandas_version") fields = [ {"name": "idx", "type": "integer"}, { "constraints": {"enum": ["a", "b"]}, "name": "values", "ordered": False, "type": "any", }, ] expected = OrderedDict( [ ("schema", {"fields": fields, "primaryKey": ["idx"]}), ( "data", [ OrderedDict([("idx", 0), ("values", "a")]), OrderedDict([("idx", 1), ("values", "b")]), OrderedDict([("idx", 2), ("values", "a")]), ], ), ] ) assert result == expected @pytest.mark.parametrize( "idx,nm,prop", [ (pd.Index([1]), "index", "name"), (pd.Index([1], name="myname"), "myname", "name"), ( pd.MultiIndex.from_product([("a", "b"), ("c", "d")]), ["level_0", "level_1"], "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", "n2"] ), ["n1", "n2"], "names", ), ( pd.MultiIndex.from_product( [("a", "b"), ("c", "d")], names=["n1", None] ), ["n1", "level_1"], "names", ), ], ) def test_set_names_unset(self, idx, nm, prop): data = pd.Series(1, idx) result = set_default_names(data) assert getattr(result.index, prop) == nm @pytest.mark.parametrize( "idx", [ pd.Index([], name="index"), pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("level_0", "level_1")), pd.MultiIndex.from_arrays([["foo"], ["bar"]], names=("foo", "level_1")), ], ) def test_warns_non_roundtrippable_names(self, idx): # GH 19130 df = pd.DataFrame(index=idx) df.index.name = "index" with tm.assert_produces_warning(): set_default_names(df) def test_timestamp_in_columns(self): df = pd.DataFrame( [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")] ) result = df.to_json(orient="table") js = json.loads(result) assert js["schema"]["fields"][1]["name"] == "2016-01-01T00:00:00.000Z" assert js["schema"]["fields"][2]["name"] == "P0DT0H0M10S" @pytest.mark.parametrize( "case", [ pd.Series([1], index=pd.Index([1], name="a"), name="a"), pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")), pd.DataFrame( {"A": [1]}, index=pd.MultiIndex.from_arrays([["a"], [1]], names=["A", "a"]), ), ], ) def test_overlapping_names(self, case): with pytest.raises(ValueError, match="Overlapping"): case.to_json(orient="table")
def to_session(df: pd.DataFrame): # Salva o conteúdo em memória, de forma comprimida # return compressStringToBytes(df.to_json()) return df.to_json()
def test_read_json_table_orient(self, index_nm, vals): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result)
def get_and_store_usage_data(selected_month): rows = session.execute(admin_queries['data_usage_by_month'], [selected_month]) df = DataFrame(rows) return df.to_json(date_format='iso')
def process(request, exec_instance): dataset_list = [] service_exec = ServiceInstance.objects.get(pk=int(exec_instance)) try: service_exec.arguments = { "filter-arguments": [], "algorithm-arguments": [{}, {}] } spill_infos, wave_model, ocean_model, natura_layer, ais_layer, time_interval, sim_length, oil_density, valid_points, valid_points_count, scenario, start_date, latitude, longitude = parse_request_params( request) depth = 0 if (scenario == '1') or (scenario == '3'): service_exec.arguments["algorithm-arguments"][0][ "latitude"] = spill_infos[0]['latitude'] service_exec.arguments["algorithm-arguments"][0][ "longitude"] = spill_infos[0]['longitude'] if scenario == '3': cursor_presto = get_presto_cursor() resolution = 1 if wave_model == '202': query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_aeg_bathymetry WHERE round(latitude," + str( resolution) + " )=" + str( round(float(spill_infos[0]['latitude']), resolution)) + " AND round(longitude," + str( resolution) + ")=" + str( round( float( spill_infos[0]['longitude']), resolution)) + ")" cursor_presto.execute(query) try: dataset_list.append((Dataset.objects.get( table_name='hcmr_poseidon_aeg_bathymetry')).id) except: print 'Dataset does not exist in database' else: query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_med_bathymetry WHERE round(latitude," + str( resolution) + " )=" + str( round(float(spill_infos[0]['latitude']), resolution)) + " AND round(longitude," + str( resolution) + ")=" + str( round( float( spill_infos[0]['longitude']), resolution)) + ")" cursor_presto.execute(query) try: dataset_list.append((Dataset.objects.get( table_name='hcmr_poseidon_med_bathymetry')).id) except: print 'Dataset does not exist in database' result = cursor_presto.fetchall() try: depth = float(result[0][0]) except: resolution = 0 print 'exception: trying with less precise resolution' if wave_model == '202': query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_aeg_bathymetry WHERE round(latitude," + str( resolution) + " )=" + str( round(float(spill_infos[0]['latitude']), resolution) ) + " AND round(longitude," + str( resolution) + ")=" + str( round(float(spill_infos[0]['longitude']), resolution)) + ")" cursor_presto.execute(query) else: query = "SELECT * FROM (SELECT min(depth) FROM hcmr_poseidon_med_bathymetry WHERE round(latitude," + str( resolution) + " )=" + str( round(float(spill_infos[0]['latitude']), resolution) ) + " AND round(longitude," + str( resolution) + ")=" + str( round(float(spill_infos[0]['longitude']), resolution)) + ")" cursor_presto.execute(query) result = cursor_presto.fetchall() try: depth = float(result[0][0]) except: depth = 0 service_exec.arguments["algorithm-arguments"][0][ "depth"] = depth print query print 'Oilspill depth:' + str(depth) # service_exec.arguments["algorithm-arguments"][0]["depth"] = spill_infos[0]['depth'] elif scenario == '2': count = 1 for el in spill_infos: service_exec.arguments["algorithm-arguments"][0][ "latitude" + str(count)] = spill_infos[count - 1]['latitude'] service_exec.arguments["algorithm-arguments"][0][ "longitude" + str(count)] = spill_infos[count - 1]['longitude'] count = count + 1 service_exec.arguments["algorithm-arguments"][0][ "number_of_points"] = count - 1 service_exec.arguments["algorithm-arguments"][0][ "start_date"] = spill_infos[0]['start_date'] service_exec.arguments["algorithm-arguments"][0][ "oil_volume"] = spill_infos[0]['oil_volume'] service_exec.arguments["algorithm-arguments"][0]["sim_length"] = str( sim_length) if wave_model == '202': service_exec.arguments["algorithm-arguments"][0][ "wave_model"] = 'Poseidon WAM Cycle 4 for the Aegean' elif wave_model == '201': service_exec.arguments["algorithm-arguments"][0][ "wave_model"] = 'Poseidon WAM Cycle 4 for the Mediterranean' elif wave_model == '203': service_exec.arguments["algorithm-arguments"][0][ "wave_model"] = 'Copernicus Wave Model for the Mediterranean' else: service_exec.arguments["algorithm-arguments"][0]["wave_model"] = '' if ocean_model == '001': service_exec.arguments["algorithm-arguments"][0][ "ocean_model"] = 'Poseidon High Resolution Aegean Model' elif ocean_model == '002': service_exec.arguments["algorithm-arguments"][0][ "ocean_model"] = 'Poseidon Mediterranean Model' elif ocean_model == '003': service_exec.arguments["algorithm-arguments"][0][ "ocean_model"] = 'Copernicus Mediterranean Model' else: service_exec.arguments["algorithm-arguments"][0][ "ocean_model"] = '' service_exec.arguments["algorithm-arguments"][0][ "natura_layer"] = natura_layer service_exec.arguments["algorithm-arguments"][0][ "ais_layer"] = ais_layer # 1)Create input file if service_exec.status == 'failed': raise Exception service_exec.status = "Creating simulation request" service_exec.save() filename, url_params = create_inp_file_from_request_and_upload( request, depth) # 2)Calculate oil spill if service_exec.status == 'failed': raise Exception service_exec.status = "Simulation running" service_exec.save() found = wait_until_output_ready(url_params, request) if found: if service_exec.status == 'failed': raise Exception service_exec.status = "Simulation results received" service_exec.save() filename_output = str(filename).replace("_F.inp", "_F.out") hcmr_data_filename = str(filename).replace("_F.inp", ".json") red_points_filename = str(filename).replace("_F.inp", ".txt") # 3)Transforming data to be shown on map if service_exec.status == 'failed': raise Exception service_exec.status = "Transforming data to be shown on map" service_exec.save() output_path = 'service_builder/static/services_files/hcmr_service_1/' + filename_output spill_data, parcel_data = create_json_from_out_file(output_path) # spill_data = [spill_infos[0]['start_date']+':00', spill_infos[0]['latitude'], spill_infos[0]['longitude'], spill_data[0][3], spill_data[0][4], spill_data[0][3], spill_infos[0]['oil_volume'],spill_data[0][5], spill_data[0][6]] # print str(spill_infos[0]['latitude']) + ' ' + spill_infos[0]['longitude'] # print str(valid_points[0][0]) + ' ' + str(valid_points[0][1]) # for el in valid_points: # parcel_data.insert(0,[spill_infos[0]['start_date'].encode('ascii') + ':00', float(el[0]),float(el[1]), # parcel_data[0][3], parcel_data[0][4], float(spill_infos[0]['oil_volume']), # parcel_data[0][6], parcel_data[0][7]]) # spill_data.insert(0, # [spill_infos[0]['start_date'].encode('ascii') + ':00', spill_data[0][1], spill_data[0][2], spill_data[0][3], spill_data[0][4], spill_data[0][5], spill_data[0][6], spill_data[0][7], spill_data[0][8], spill_data[0][9], spill_data[0][10]]) print 'create_json_from_out_file done' headers_parcel = [ "time", "Lat", "Lon", "Dpth", "Status", "Volume(m3)", "Dens", "Visc" ] parcel_df = DataFrame(parcel_data, columns=headers_parcel) print 'parcel_df = DataFrame done' print(parcel_df.head(2)) parcel_df.to_json('visualizer/static/visualizer/files/' + hcmr_data_filename, orient='records') print 'parcel_df.to_json done' headers_spill = [ 'time', 'N', '%ev', '%srf', '%em', '%disp', '%cst', '%btm', 'max_visc', 'min_visc', 'dens' ] service_exec.arguments["algorithm-arguments"][1][ "headers_spill"] = headers_spill service_exec.arguments["algorithm-arguments"][1][ "spill_data"] = spill_data service_exec.save() print 'spill_data done' # 4)Calculate red points if service_exec.status == 'failed': raise Exception service_exec.status = "Calculating oil spill intersections with protected areas" service_exec.save() if natura_layer == "true": # red_points_calc.calculate(hcmr_data_filename, red_points_filename) pass if ais_layer == "true": try: dataset_list.append( (Dataset.objects.get(table_name='xmile_ais', stored_at='UBITECH_PRESTO')).id) except: print 'Dataset does not exist in database' print 'red points calculated' # 5)Create Visualization print valid_points oil_spill_start = '' v_count = 1 for el in valid_points: oil_spill_start = oil_spill_start + 'start_lat' + str( v_count) + '=' + str( el[0]) + '&start_lon' + str(v_count) + '=' + str( el[1]) + '&' v_count = v_count + 1 visualization_url = "http://" + request.META[ 'HTTP_HOST'] + "/visualizations/map_markers_in_time_hcmr/" + "?" + oil_spill_start + "markerType=circle&lat_col=Lat&lon_col=Lon" + "&data_file=" + hcmr_data_filename + "&red_points_file=" + red_points_filename + "&natura_layer=" + natura_layer + "&ais_layer=" + ais_layer + "&time_interval=" + time_interval + "&valid_points=" + str( len(valid_points)) visualization_url = "http://" + request.META['HTTP_HOST'] + "/visualizations/map_markers_in_time_hcmr/" + "?"+oil_spill_start \ + "&markerType=circle&lat_col=Lat&lon_col=Lon" \ + "&data_file=" + hcmr_data_filename + "&red_points_file=" \ + red_points_filename + "&natura_layer=" + natura_layer + "&ais_layer=" + ais_layer \ + "&time_interval=" + time_interval + "&start_date=" + start_date + \ '&latitude=' + latitude + "&longitude=" + longitude + "&length="+ sim_length + "&valid_points="+ str(len(valid_points)) service_exec.dataframe_visualizations = {"v1": visualization_url} service_exec.arguments["algorithm-arguments"][0][ "out_filepath"] = filename_output if service_exec.status == 'failed': raise Exception service_exec.status = "done" service_exec.save() service_obj = service_exec.service for dataset_list_el_id in dataset_list: try: dataset_obj = Dataset.objects.get(id=dataset_list_el_id) dataset_service_execution(dataset_obj, service_obj) except: pass service_use(service_obj) unique_service_use(service_obj, request.user) hcmr_statistics(scenario, sim_length, time_interval, ocean_model, wave_model, str_to_bool(natura_layer), str_to_bool(ais_layer)) # context = { # 'url': visualization_url, # 'out_filepath': filename_output, # } # return render(request, 'hcmr_pilot/scenario1-results.html', context) else: # html = "<html><body>Something went wrong. Please, try again.</body></html>" # return HttpResponse(html) service_exec.status = "failed" service_exec.save() except: service_exec.status = "failed" service_exec.save()
def test_frame_from_json_precise_float(self): df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) result = read_json(df.to_json(), precise_float=True) assert_frame_equal(result, df)
def test_frame_from_json_precise_float(self): df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) result = read_json(df.to_json(), precise_float=True) assert_frame_equal(result, df, check_index_type=False, check_column_type=False)
def test_frame_from_json_nones(self): df = DataFrame([[1, 2], [4, 5, 6]]) unser = read_json(df.to_json()) self.assertTrue(np.isnan(unser[2][0])) df = DataFrame([['1', '2'], ['4', '5', '6']]) unser = read_json(df.to_json()) self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), dtype=False) self.assertTrue(unser[2][0] is None) unser = read_json(df.to_json(), convert_axes=False, dtype=False) self.assertTrue(unser['2']['0'] is None) unser = read_json(df.to_json(), numpy=False) self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), numpy=False, dtype=False) self.assertTrue(unser[2][0] is None) unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) self.assertTrue(unser['2']['0'] is None) # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) df.loc[0, 2] = np.inf unser = read_json(df.to_json()) self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0])) df.loc[0, 2] = np.NINF unser = read_json(df.to_json()) self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), dtype=False) self.assertTrue(np.isnan(unser[2][0]))
class TestTableOrient(object): def setup_method(self, method): self.df = DataFrame( { 'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'c'], 'C': pd.date_range('2016-01-01', freq='d', periods=4), 'D': pd.timedelta_range('1H', periods=4, freq='T'), 'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])), 'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)), 'G': [1., 2., 3, 4.], 'H': pd.date_range( '2016-01-01', freq='d', periods=4, tz='US/Central'), }, index=pd.Index(range(4), name='idx')) def test_build_series(self): s = pd.Series([1, 2], name='a') s.index.name = 'id' result = s.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) assert "pandas_version" in result['schema'] result['schema'].pop('pandas_version') fields = [{ 'name': 'id', 'type': 'integer' }, { 'name': 'a', 'type': 'integer' }] schema = { 'fields': fields, 'primaryKey': ['id'], } expected = OrderedDict([('schema', schema), ('data', [ OrderedDict([('id', 0), ('a', 1)]), OrderedDict([('id', 1), ('a', 2)]) ])]) assert result == expected def test_to_json(self): df = self.df.copy() df.index.name = 'idx' result = df.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) assert "pandas_version" in result['schema'] result['schema'].pop('pandas_version') fields = [{ 'name': 'idx', 'type': 'integer' }, { 'name': 'A', 'type': 'integer' }, { 'name': 'B', 'type': 'string' }, { 'name': 'C', 'type': 'datetime' }, { 'name': 'D', 'type': 'duration' }, { 'constraints': { 'enum': ['a', 'b', 'c'] }, 'name': 'E', 'ordered': False, 'type': 'any' }, { 'constraints': { 'enum': ['a', 'b', 'c'] }, 'name': 'F', 'ordered': True, 'type': 'any' }, { 'name': 'G', 'type': 'number' }, { 'name': 'H', 'type': 'datetime', 'tz': 'US/Central' }] schema = { 'fields': fields, 'primaryKey': ['idx'], } data = [ OrderedDict([('idx', 0), ('A', 1), ('B', 'a'), ('C', '2016-01-01T00:00:00.000Z'), ('D', 'P0DT1H0M0S'), ('E', 'a'), ('F', 'a'), ('G', 1.), ('H', '2016-01-01T06:00:00.000Z')]), OrderedDict([('idx', 1), ('A', 2), ('B', 'b'), ('C', '2016-01-02T00:00:00.000Z'), ('D', 'P0DT1H1M0S'), ('E', 'b'), ('F', 'b'), ('G', 2.), ('H', '2016-01-02T06:00:00.000Z')]), OrderedDict([('idx', 2), ('A', 3), ('B', 'c'), ('C', '2016-01-03T00:00:00.000Z'), ('D', 'P0DT1H2M0S'), ('E', 'c'), ('F', 'c'), ('G', 3.), ('H', '2016-01-03T06:00:00.000Z')]), OrderedDict([('idx', 3), ('A', 4), ('B', 'c'), ('C', '2016-01-04T00:00:00.000Z'), ('D', 'P0DT1H3M0S'), ('E', 'c'), ('F', 'c'), ('G', 4.), ('H', '2016-01-04T06:00:00.000Z')]), ] expected = OrderedDict([('schema', schema), ('data', data)]) assert result == expected def test_to_json_float_index(self): data = pd.Series(1, index=[1., 2.]) result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') expected = (OrderedDict([('schema', { 'fields': [{ 'name': 'index', 'type': 'number' }, { 'name': 'values', 'type': 'integer' }], 'primaryKey': ['index'] }), ('data', [ OrderedDict([('index', 1.0), ('values', 1)]), OrderedDict([('index', 2.0), ('values', 1)]) ])])) assert result == expected def test_to_json_period_index(self): idx = pd.period_range('2016', freq='Q-JAN', periods=2) data = pd.Series(1, idx) result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') fields = [{ 'freq': 'Q-JAN', 'name': 'index', 'type': 'datetime' }, { 'name': 'values', 'type': 'integer' }] schema = {'fields': fields, 'primaryKey': ['index']} data = [ OrderedDict([('index', '2015-11-01T00:00:00.000Z'), ('values', 1)]), OrderedDict([('index', '2016-02-01T00:00:00.000Z'), ('values', 1)]) ] expected = OrderedDict([('schema', schema), ('data', data)]) assert result == expected def test_to_json_categorical_index(self): data = pd.Series(1, pd.CategoricalIndex(['a', 'b'])) result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') expected = (OrderedDict([('schema', { 'fields': [{ 'name': 'index', 'type': 'any', 'constraints': { 'enum': ['a', 'b'] }, 'ordered': False }, { 'name': 'values', 'type': 'integer' }], 'primaryKey': ['index'] }), ('data', [ OrderedDict([('index', 'a'), ('values', 1)]), OrderedDict([('index', 'b'), ('values', 1)]) ])])) assert result == expected def test_date_format_raises(self): with pytest.raises(ValueError): self.df.to_json(orient='table', date_format='epoch') # others work self.df.to_json(orient='table', date_format='iso') self.df.to_json(orient='table') def test_make_field_int(self): data = [1, 2, 3] kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')] for kind in kinds: result = make_field(kind) expected = {"name": "name", "type": 'integer'} assert result == expected def test_make_field_float(self): data = [1., 2., 3.] kinds = [pd.Series(data, name='name'), pd.Index(data, name='name')] for kind in kinds: result = make_field(kind) expected = {"name": "name", "type": 'number'} assert result == expected def test_make_field_datetime(self): data = [1., 2., 3.] kinds = [ pd.Series(pd.to_datetime(data), name='values'), pd.to_datetime(data) ] for kind in kinds: result = make_field(kind) expected = {"name": "values", "type": 'datetime'} assert result == expected kinds = [ pd.Series(pd.to_datetime(data, utc=True), name='values'), pd.to_datetime(data, utc=True) ] for kind in kinds: result = make_field(kind) expected = {"name": "values", "type": 'datetime', "tz": "UTC"} assert result == expected arr = pd.period_range('2016', freq='A-DEC', periods=4) result = make_field(arr) expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"} assert result == expected def test_make_field_categorical(self): data = ['a', 'b', 'c'] ordereds = [True, False] for ordered in ordereds: arr = pd.Series(pd.Categorical(data, ordered=ordered), name='cats') result = make_field(arr) expected = { "name": "cats", "type": "any", "constraints": { "enum": data }, "ordered": ordered } assert result == expected arr = pd.CategoricalIndex(data, ordered=ordered, name='cats') result = make_field(arr) expected = { "name": "cats", "type": "any", "constraints": { "enum": data }, "ordered": ordered } assert result == expected def test_categorical(self): s = pd.Series(pd.Categorical(['a', 'b', 'a'])) s.index.name = 'idx' result = s.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') fields = [{ 'name': 'idx', 'type': 'integer' }, { 'constraints': { 'enum': ['a', 'b'] }, 'name': 'values', 'ordered': False, 'type': 'any' }] expected = OrderedDict([('schema', { 'fields': fields, 'primaryKey': ['idx'] }), ('data', [ OrderedDict([('idx', 0), ('values', 'a')]), OrderedDict([('idx', 1), ('values', 'b')]), OrderedDict([('idx', 2), ('values', 'a')]) ])]) assert result == expected def test_set_default_names_unset(self): data = pd.Series(1, pd.Index([1])) result = set_default_names(data) assert result.index.name == 'index' def test_set_default_names_set(self): data = pd.Series(1, pd.Index([1], name='myname')) result = set_default_names(data) assert result.index.name == 'myname' def test_set_default_names_mi_unset(self): data = pd.Series(1, pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')])) result = set_default_names(data) assert result.index.names == ['level_0', 'level_1'] def test_set_default_names_mi_set(self): data = pd.Series( 1, pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')], names=['n1', 'n2'])) result = set_default_names(data) assert result.index.names == ['n1', 'n2'] def test_set_default_names_mi_partion(self): data = pd.Series( 1, pd.MultiIndex.from_product([('a', 'b'), ('c', 'd')], names=['n1', None])) result = set_default_names(data) assert result.index.names == ['n1', 'level_1'] def test_timestamp_in_columns(self): df = pd.DataFrame( [[1, 2]], columns=[pd.Timestamp('2016'), pd.Timedelta(10, unit='s')]) result = df.to_json(orient="table") js = json.loads(result) assert js['schema']['fields'][1]['name'] == 1451606400000 assert js['schema']['fields'][2]['name'] == 10000 def test_overlapping_names(self): cases = [ pd.Series([1], index=pd.Index([1], name='a'), name='a'), pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")), pd.DataFrame({"A": [1]}, index=pd.MultiIndex.from_arrays([['a'], [1]], names=["A", "a"])), ] for data in cases: with pytest.raises(ValueError) as excinfo: data.to_json(orient='table') assert 'Overlapping' in str(excinfo.value) def test_mi_falsey_name(self): # GH 16203 df = pd.DataFrame(np.random.randn(4, 4), index=pd.MultiIndex.from_product([('A', 'B'), ('a', 'b')])) result = [x['name'] for x in build_table_schema(df)['fields']] assert result == ['level_0', 'level_1', 0, 1, 2, 3]
class ToJSONLines(BaseIO): fname = "__test__.json" def setup(self): N = 10**5 ncols = 5 index = date_range("20000101", periods=N, freq="H") timedeltas = timedelta_range(start=1, periods=N, freq="s") datetimes = date_range(start=1, periods=N, freq="s") ints = np.random.randint(100000000, size=N) floats = np.random.randn(N) strings = tm.makeStringIndex(N) self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) self.df_td_int_ts = DataFrame( { "td_1": timedeltas, "td_2": timedeltas, "int_1": ints, "int_2": ints, "ts_1": datetimes, "ts_2": datetimes, }, index=index, ) self.df_int_floats = DataFrame( { "int_1": ints, "int_2": ints, "int_3": ints, "float_1": floats, "float_2": floats, "float_3": floats, }, index=index, ) self.df_int_float_str = DataFrame( { "int_1": ints, "int_2": ints, "float_1": floats, "float_2": floats, "str_1": strings, "str_2": strings, }, index=index, ) def time_floats_with_int_idex_lines(self): self.df.to_json(self.fname, orient="records", lines=True) def time_floats_with_dt_index_lines(self): self.df_date_idx.to_json(self.fname, orient="records", lines=True) def time_delta_int_tstamp_lines(self): self.df_td_int_ts.to_json(self.fname, orient="records", lines=True) def time_float_int_lines(self): self.df_int_floats.to_json(self.fname, orient="records", lines=True) def time_float_int_str_lines(self): self.df_int_float_str.to_json(self.fname, orient="records", lines=True)
def test(dataset, config, log_dir): """Test model, output prediction as json file.""" model_config = config['model'] sess_config = config['session'] answerset = pd.read_csv(os.path.join(config['preprocess_dir'], 'answer_set.txt'), header=None)[0] with tf.Graph().as_default(): model = GRA(model_config) model.build_inference() with tf.Session(config=sess_config) as sess: ckpt_dir = os.path.join(log_dir, 'checkpoint') save_path = tf.train.latest_checkpoint(ckpt_dir) saver = tf.train.Saver() if save_path: print('load checkpoint {}.'.format(save_path)) saver.restore(sess, save_path) else: print('no checkpoint.') exit() # test iterate over examples result = DataFrame(columns=['id', 'answer']) correct = 0 while dataset.has_test_example: vgg, c3d, question, answer, example_id = dataset.get_test_example( ) feed_dict = { model.appear: [vgg], model.motion: [c3d], model.question_encode: [question], } prediction, channel_weight, appear_weight, motion_weight = sess.run( [ model.prediction, model.channel_weight, model.appear_weight, model.motion_weight ], feed_dict=feed_dict) #prediction = prediction[0] channel_weight = channel_weight[0] appear_weight = appear_weight[0] motion_weight = motion_weight[0] result = result.append( { 'id': example_id, 'answer': prediction[1] }, ignore_index=True) # modified-why # if answerset[prediction] in answer: # correct += 1 # print(answer, example_id, channel_weight) # print(appear_weight) # print(motion_weight) result.to_json(os.path.join(log_dir, 'prediction.json'), 'records') # acc = correct / dataset.test_example_total # print('\n[TEST] acc {:.5f}.\n'.format(acc)) dataset.reset_test() return None
def export(df: pd.DataFrame, file_path: str): print(f'dataframe has {len(df)} rows') print(f'started exporting {file_path}: {datetime.now()}') df.to_json(file_path) print(f'created {file_path}: {datetime.now()}')
def render_dataframe(self, df: pd.DataFrame, response: Response) -> str: return df.to_json(orient="records")
def test_read_json_table_timezones_orient(self, idx, vals, recwarn): # GH 35973 df = DataFrame(vals, index=idx) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result)
def test_read_json_table_orient_raises(self, index_nm, vals): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") with tm.assert_raises_regex(NotImplementedError, 'can not yet read '): pd.read_json(out, orient="table")
def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") with pytest.raises(NotImplementedError, match='can not yet read '): pd.read_json(out, orient="table")
def persist_dataset(self, dataset: pd.DataFrame, overwrite: bool): orient = self.dataset_conf.get('orient') dataset.to_json(self.dataset_conf['uri'], orient=orient)
def lines_json_df(): df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) return df.to_json(lines=True, orient="records")
class TestTableOrient(object): def setup_method(self, method): self.df = DataFrame( { 'A': [1, 2, 3, 4], 'B': ['a', 'b', 'c', 'c'], 'C': pd.date_range('2016-01-01', freq='d', periods=4), 'D': pd.timedelta_range('1H', periods=4, freq='T'), 'E': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'])), 'F': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'], ordered=True)), 'G': [1., 2., 3, 4.], 'H': pd.date_range( '2016-01-01', freq='d', periods=4, tz='US/Central'), }, index=pd.Index(range(4), name='idx')) def test_build_series(self): s = pd.Series([1, 2], name='a') s.index.name = 'id' result = s.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) assert "pandas_version" in result['schema'] result['schema'].pop('pandas_version') fields = [{ 'name': 'id', 'type': 'integer' }, { 'name': 'a', 'type': 'integer' }] schema = { 'fields': fields, 'primaryKey': ['id'], } expected = OrderedDict([('schema', schema), ('data', [ OrderedDict([('id', 0), ('a', 1)]), OrderedDict([('id', 1), ('a', 2)]) ])]) assert result == expected def test_to_json(self): df = self.df.copy() df.index.name = 'idx' result = df.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) assert "pandas_version" in result['schema'] result['schema'].pop('pandas_version') fields = [{ 'name': 'idx', 'type': 'integer' }, { 'name': 'A', 'type': 'integer' }, { 'name': 'B', 'type': 'string' }, { 'name': 'C', 'type': 'datetime' }, { 'name': 'D', 'type': 'duration' }, { 'constraints': { 'enum': ['a', 'b', 'c'] }, 'name': 'E', 'ordered': False, 'type': 'any' }, { 'constraints': { 'enum': ['a', 'b', 'c'] }, 'name': 'F', 'ordered': True, 'type': 'any' }, { 'name': 'G', 'type': 'number' }, { 'name': 'H', 'type': 'datetime', 'tz': 'US/Central' }] schema = { 'fields': fields, 'primaryKey': ['idx'], } data = [ OrderedDict([('idx', 0), ('A', 1), ('B', 'a'), ('C', '2016-01-01T00:00:00.000Z'), ('D', 'P0DT1H0M0S'), ('E', 'a'), ('F', 'a'), ('G', 1.), ('H', '2016-01-01T06:00:00.000Z')]), OrderedDict([('idx', 1), ('A', 2), ('B', 'b'), ('C', '2016-01-02T00:00:00.000Z'), ('D', 'P0DT1H1M0S'), ('E', 'b'), ('F', 'b'), ('G', 2.), ('H', '2016-01-02T06:00:00.000Z')]), OrderedDict([('idx', 2), ('A', 3), ('B', 'c'), ('C', '2016-01-03T00:00:00.000Z'), ('D', 'P0DT1H2M0S'), ('E', 'c'), ('F', 'c'), ('G', 3.), ('H', '2016-01-03T06:00:00.000Z')]), OrderedDict([('idx', 3), ('A', 4), ('B', 'c'), ('C', '2016-01-04T00:00:00.000Z'), ('D', 'P0DT1H3M0S'), ('E', 'c'), ('F', 'c'), ('G', 4.), ('H', '2016-01-04T06:00:00.000Z')]), ] expected = OrderedDict([('schema', schema), ('data', data)]) assert result == expected def test_to_json_float_index(self): data = pd.Series(1, index=[1., 2.]) result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') expected = (OrderedDict([('schema', { 'fields': [{ 'name': 'index', 'type': 'number' }, { 'name': 'values', 'type': 'integer' }], 'primaryKey': ['index'] }), ('data', [ OrderedDict([('index', 1.0), ('values', 1)]), OrderedDict([('index', 2.0), ('values', 1)]) ])])) assert result == expected def test_to_json_period_index(self): idx = pd.period_range('2016', freq='Q-JAN', periods=2) data = pd.Series(1, idx) result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') fields = [{ 'freq': 'Q-JAN', 'name': 'index', 'type': 'datetime' }, { 'name': 'values', 'type': 'integer' }] schema = {'fields': fields, 'primaryKey': ['index']} data = [ OrderedDict([('index', '2015-11-01T00:00:00.000Z'), ('values', 1)]), OrderedDict([('index', '2016-02-01T00:00:00.000Z'), ('values', 1)]) ] expected = OrderedDict([('schema', schema), ('data', data)]) assert result == expected def test_to_json_categorical_index(self): data = pd.Series(1, pd.CategoricalIndex(['a', 'b'])) result = data.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') expected = (OrderedDict([('schema', { 'fields': [{ 'name': 'index', 'type': 'any', 'constraints': { 'enum': ['a', 'b'] }, 'ordered': False }, { 'name': 'values', 'type': 'integer' }], 'primaryKey': ['index'] }), ('data', [ OrderedDict([('index', 'a'), ('values', 1)]), OrderedDict([('index', 'b'), ('values', 1)]) ])])) assert result == expected def test_date_format_raises(self): with pytest.raises(ValueError): self.df.to_json(orient='table', date_format='epoch') # others work self.df.to_json(orient='table', date_format='iso') self.df.to_json(orient='table') @pytest.mark.parametrize('kind', [pd.Series, pd.Index]) def test_convert_pandas_type_to_json_field_int(self, kind): data = [1, 2, 3] result = convert_pandas_type_to_json_field(kind(data, name='name')) expected = {"name": "name", "type": "integer"} assert result == expected @pytest.mark.parametrize('kind', [pd.Series, pd.Index]) def test_convert_pandas_type_to_json_field_float(self, kind): data = [1., 2., 3.] result = convert_pandas_type_to_json_field(kind(data, name='name')) expected = {"name": "name", "type": "number"} assert result == expected @pytest.mark.parametrize('dt_args,extra_exp', [({}, {}), ({ 'utc': True }, { 'tz': 'UTC' })]) @pytest.mark.parametrize('wrapper', [None, pd.Series]) def test_convert_pandas_type_to_json_field_datetime( self, dt_args, extra_exp, wrapper): data = [1., 2., 3.] data = pd.to_datetime(data, **dt_args) if wrapper is pd.Series: data = pd.Series(data, name='values') result = convert_pandas_type_to_json_field(data) expected = {"name": "values", "type": 'datetime'} expected.update(extra_exp) assert result == expected def test_convert_pandas_type_to_json_period_range(self): arr = pd.period_range('2016', freq='A-DEC', periods=4) result = convert_pandas_type_to_json_field(arr) expected = {"name": "values", "type": 'datetime', "freq": "A-DEC"} assert result == expected @pytest.mark.parametrize('kind', [pd.Categorical, pd.CategoricalIndex]) @pytest.mark.parametrize('ordered', [True, False]) def test_convert_pandas_type_to_json_field_categorical( self, kind, ordered): data = ['a', 'b', 'c'] if kind is pd.Categorical: arr = pd.Series(kind(data, ordered=ordered), name='cats') elif kind is pd.CategoricalIndex: arr = kind(data, ordered=ordered, name='cats') result = convert_pandas_type_to_json_field(arr) expected = { "name": "cats", "type": "any", "constraints": { "enum": data }, "ordered": ordered } assert result == expected @pytest.mark.parametrize( "inp,exp", [({ 'type': 'integer' }, 'int64'), ({ 'type': 'number' }, 'float64'), ({ 'type': 'boolean' }, 'bool'), ({ 'type': 'duration' }, 'timedelta64'), ({ 'type': 'datetime' }, 'datetime64[ns]'), ({ 'type': 'datetime', 'tz': 'US/Hawaii' }, 'datetime64[ns, US/Hawaii]'), ({ 'type': 'any' }, 'object'), ({ 'type': 'any', 'constraints': { 'enum': ['a', 'b', 'c'] }, 'ordered': False }, CategoricalDtype(categories=['a', 'b', 'c'], ordered=False)), ({ 'type': 'any', 'constraints': { 'enum': ['a', 'b', 'c'] }, 'ordered': True }, CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)), ({ 'type': 'string' }, 'object')]) def test_convert_json_field_to_pandas_type(self, inp, exp): field = {'name': 'foo'} field.update(inp) assert convert_json_field_to_pandas_type(field) == exp @pytest.mark.parametrize("inp", ["geopoint", "geojson", "fake_type"]) def test_convert_json_field_to_pandas_type_raises(self, inp): field = {'type': inp} with tm.assert_raises_regex( ValueError, "Unsupported or invalid field " "type: {}".format(inp)): convert_json_field_to_pandas_type(field) def test_categorical(self): s = pd.Series(pd.Categorical(['a', 'b', 'a'])) s.index.name = 'idx' result = s.to_json(orient='table', date_format='iso') result = json.loads(result, object_pairs_hook=OrderedDict) result['schema'].pop('pandas_version') fields = [{ 'name': 'idx', 'type': 'integer' }, { 'constraints': { 'enum': ['a', 'b'] }, 'name': 'values', 'ordered': False, 'type': 'any' }] expected = OrderedDict([('schema', { 'fields': fields, 'primaryKey': ['idx'] }), ('data', [ OrderedDict([('idx', 0), ('values', 'a')]), OrderedDict([('idx', 1), ('values', 'b')]), OrderedDict([('idx', 2), ('values', 'a')]) ])]) assert result == expected @pytest.mark.parametrize( 'idx,nm,prop', [(pd.Index([1]), 'index', 'name'), (pd.Index([1], name='myname'), 'myname', 'name'), (pd.MultiIndex.from_product([('a', 'b'), ('c', 'd') ]), ['level_0', 'level_1'], 'names'), (pd.MultiIndex.from_product( [('a', 'b'), ('c', 'd')], names=['n1', 'n2']), ['n1', 'n2'], 'names'), (pd.MultiIndex.from_product( [('a', 'b'), ('c', 'd')], names=['n1', None]), ['n1', 'level_1'], 'names')]) def test_set_names_unset(self, idx, nm, prop): data = pd.Series(1, idx) result = set_default_names(data) assert getattr(result.index, prop) == nm def test_timestamp_in_columns(self): df = pd.DataFrame( [[1, 2]], columns=[pd.Timestamp('2016'), pd.Timedelta(10, unit='s')]) result = df.to_json(orient="table") js = json.loads(result) assert js['schema']['fields'][1]['name'] == 1451606400000 assert js['schema']['fields'][2]['name'] == 10000 @pytest.mark.parametrize('case', [ pd.Series([1], index=pd.Index([1], name='a'), name='a'), pd.DataFrame({"A": [1]}, index=pd.Index([1], name="A")), pd.DataFrame({"A": [1]}, index=pd.MultiIndex.from_arrays([['a'], [1]], names=["A", "a"])) ]) def test_overlapping_names(self, case): with tm.assert_raises_regex(ValueError, 'Overlapping'): case.to_json(orient='table')
def dive(data: pandas.DataFrame) -> HTML: # Element ID MUST be unique elem_id = _generate_element_id() json_str = data.to_json(orient='records') return HTML(FACETS_DIVE_TEMPLATE.format(elem_id=elem_id, json_str=json_str))