def test_write_data(specimen, path): msg = sdmx.read_sdmx(path) result = sdmx.to_pandas(msg) expected = specimen.expected_data(path) if expected is not None: print(expected, result, sep="\n") assert_pd_equal(expected, result) # TODO incomplete assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result)
def test_flat(specimen): # Create a bare Message msg = DataMessage() # Recreate the content from exr-flat.json header = Header( id="62b5f19d-f1c9-495d-8446-a3661ed24753", prepared="2012-11-29T08:40:26Z", sender=model.Agency(id="ECB"), ) msg.header = header ds = DataSet() # Create a Key and attributes key = Key( FREQ="D", CURRENCY="NZD", CURRENCY_DENOM="EUR", EXR_TYPE="SP00", EXR_SUFFIX="A", TIME_PERIOD="2013-01-18", ) obs_status = DataAttribute(id="OBS_STATUS") attr = {"OBS_STATUS": AttributeValue(value_for=obs_status, value="A")} ds.obs.append(Observation(dimension=key, value=1.5931, attached_attribute=attr)) key = key.copy(TIME_PERIOD="2013-01-21") ds.obs.append(Observation(dimension=key, value=1.5925, attached_attribute=attr)) key = key.copy(CURRENCY="RUB", TIME_PERIOD="2013-01-18") ds.obs.append(Observation(dimension=key, value=40.3426, attached_attribute=attr)) key = key.copy(TIME_PERIOD="2013-01-21") ds.obs.append(Observation(dimension=key, value=40.3000, attached_attribute=attr)) msg.data.append(ds) # Write to pd.Dataframe df1 = sdmx.to_pandas(msg) with specimen("flat.json") as f: ref = sdmx.read_sdmx(f) df2 = sdmx.to_pandas(ref) assert_pd_equal(df1, df2)
def test_doc_usage_data(): """Code examples in usage.rst.""" ecb = Client("ECB") data_response = ecb.data( resource_id="EXR", key={"CURRENCY": "USD+JPY"}, params={ "startPeriod": "2016", "endPeriod": "2016-12-31" }, ) # # Commented: do the same without triggering requests for validation # data_response = ecb.data(resource_id='EXR', key='.JPY+USD...', # params={'startPeriod': '2016', # 'endPeriod': '2016-12-31'}) data = data_response.data[0] assert type(data) is GenericDataSet # This message doesn't explicitly specify the remaining dimensions; unless # they are inferred from the SeriesKeys, then the DimensionDescriptor is # not complete # assert data.structured_by.dimensions[-1] == 'TIME_PERIOD' # data.dim_at_obs series_keys = list(data.series) assert len(series_keys) == 16 series_keys[5] assert sorted(set(sk.FREQ.value for sk in data.series)) == "A D H M Q".split() daily = sdmx.to_pandas(data).xs("D", level="FREQ") assert len(daily) == 514 assert_pd_equal(daily.tail().values, np.array([1.0446, 1.0445, 1.0401, 1.0453, 1.0541]))
def test_write_dataflow(specimen): # Read the INSEE dataflow definition with specimen("INSEE/dataflow") as f: msg = sdmx.read_sdmx(f) # Convert to pandas result = sdmx.to_pandas(msg, include="dataflow") # Number of Dataflows described in the file assert len(result["dataflow"]) == 663 # ID and names of first Dataflows mbop = "Monthly Balance of Payments - " expected = pd.Series( { "ACT-TRIM-ANC": "Activity by sex and age - Quarterly series", "BPM6-CCAPITAL": "{}Capital account".format(mbop), "BPM6-CFINANCIER": "{}Financial account".format(mbop), "BPM6-CTRANSACTION": "{}Current transactions account".format(mbop), "BPM6-TOTAL": "{}Overall total and main headings".format(mbop), } ) assert_pd_equal(result["dataflow"].head(), expected)
def test_doc_index1(): """First code example in index.rst.""" estat = Client("ESTAT") flow_response = estat.dataflow("une_rt_a") with pytest.raises(TypeError): # This presumes the DataStructureDefinition instance can conduct a # network request for its own content structure_response = flow_response.dataflow.une_rt_a.structure( request=True, target_only=False) # Same effect structure_response = estat.get( "datastructure", flow_response.dataflow.une_rt_a.structure.id) # Even better: Client.get(…) should examine the class and ID of the object # structure = estat.get(flow_response.dataflow.une_rt_a.structure) # Show some codelists s = sdmx.to_pandas(structure_response) expected = pd.Series( { "AT": "Austria", "BE": "Belgium", "BG": "Bulgaria", "CH": "Switzerland", "CY": "Cyprus", }, name="GEO", ).rename_axis("CL_GEO") # Codelists are converted to a DictLike assert isinstance(s.codelist, DictLike) # Same effect assert_pd_equal(s.codelist["CL_GEO"].sort_index().head(), expected)
def test_doc_usage_structure(): """Code examples in walkthrough.rst.""" ecb = Client("ECB") ecb_via_proxy = Client("ECB", proxies={"http": "http://1.2.3.4:5678"}) assert all( getattr(ecb_via_proxy.session, k) == v for k, v in ( ("proxies", { "http": "http://1.2.3.4:5678" }), ("stream", False), ("timeout", 30.1), )) msg1 = ecb.categoryscheme(provider="all") assert msg1.response.url == ( "https://sdw-wsrest.ecb.europa.eu/service/categoryscheme/all/latest" "?references=parentsandsiblings") # Check specific headers headers = msg1.response.headers assert headers["Content-Type"] == ("application/vnd.sdmx.structure+xml; " "version=2.1") assert all(k in headers for k in ["Connection", "Date", "Server"]) # Removed: in pandaSDMX 0.x this was a convenience method that (for this # structure message) returned two DataStructureDefinitions. Contra the # spec, that assumes: # - There is 1 Categorization using the CategoryScheme; there could be # many. # - The Categorization maps DataStructureDefinitions to Categories, when # there could be many. # list(cat_response.category_scheme['MOBILE_NAVI']['07']) dfs = sdmx.to_pandas(msg1.dataflow).head() expected = pd.Series({ "AME": "AMECO", "BKN": "Banknotes statistics", "BLS": "Bank Lending Survey Statistics", "BOP": ("Euro Area Balance of Payments and International Investment " "Position Statistics"), "BSI": "Balance Sheet Items", }) assert_pd_equal(dfs, expected) flows = ecb.dataflow() # noqa: F841 dsd_id = msg1.dataflow.EXR.structure.id assert dsd_id == "ECB_EXR1" refs = dict(references="all") msg2 = ecb.datastructure(resource_id=dsd_id, params=refs) dsd = msg2.structure[dsd_id] assert sdmx.to_pandas(dsd.dimensions) == [ "FREQ", "CURRENCY", "CURRENCY_DENOM", "EXR_TYPE", "EXR_SUFFIX", "TIME_PERIOD", ] cl = sdmx.to_pandas(msg2.codelist["CL_CURRENCY"]).sort_index() expected = pd.Series( { "ADF": "Andorran Franc (1-1 peg to the French franc)", "ADP": "Andorran Peseta (1-1 peg to the Spanish peseta)", "AED": "United Arab Emirates dirham", "AFA": "Afghanistan afghani (old)", "AFN": "Afghanistan, Afghanis", }, name="Currency code list", ).rename_axis("CL_CURRENCY") assert_pd_equal(cl.head(), expected)