Пример #1
0
def get_indexes():
    """
    Returns list of supported indexes
    https://eodhistoricaldata.com/knowledgebase/list-supported-indexes/
    """
    data = """ID	Exchange Code	Code	Index Name
1	INDX	GSPC	S&P 500
2	INDX	GDAXI	DAX Index
3	INDX	SSEC	Shanghai Composite Index (China)
4	INDX	MERV	MERVAL Index (Argentina)
5	INDX	FTSE	FTSE 100 Index (UK)
6	INDX	AORD	All Ordinaries Index (Australia)
7	INDX	BSESN	BSE 30 Sensitivity Index (SENSEX)
8	INDX	VIX	VIX S&P 500 Volatility Index
9	INDX	HSI	Hang Seng Index (Hong Kong)
10	INDX	GSPTSE	S&P TSX Composite Index (Canada)
11	INDX	FCHI	CAC 40 Index
12	INDX	TA100	Tel Aviv 100 Index (Israel
13	INDX	CYC	Morgan Stanley Cyclical Index
14	INDX	IIX	Interactive Week Internet Index
15	INDX	CMR	Morgan Stanley Consumer Index
16	INDX	GOX	CBOE Gold Inde
17	INDX	RTS_RS	RTSI Index
18	INDX	GD_AT	Athens Composite Inde
19	INDX	FTSEMIB_MI	Untitled Dataset 2015-07-13 20:00:12
20	INDX	WILREIT	Wilshire US REIT Inde
21	INDX	W5KMCG	Wilshire US Mid Cap Growt
22	INDX	IBEX	IBEX 35 Index
23	INDX	W5KLCV	Wilshire US Large Cap Valu
24	INDX	SSMI	Swiss Market Index
25	INDX	OEX	S&P 100 Inde
26	INDX	RUI	Russell 1000 Inde
27	INDX	XAX	NYSE AMEX Composite Inde
28	INDX	WILRESI	Wilshire US Real Estate Securities Inde
29	INDX	NZ50	NZSE 50 (New Zealand)
30	INDX	UTY	PHLX Utility Sector Inde
31	INDX	CSE	Colombo All Shares Index (Sri Lanka
32	INDX	XOI	NYSE AMEX Oil Inde
33	INDX	OSX	PHLX Oil Service Sector Inde
34	INDX	XAL	NYSE AMEX Airline Inde
35	INDX	W5KSCG	Wilshire US Small Cap Growt
36	INDX	TWII	Taiwan Weighted Inde
37	INDX	ATX	ATX Index (Austria
38	INDX	NWX	NYSE ARCA Networking Inde
39	INDX	W5KSCV	Wilshire US Small Cap Valu
40	INDX	XAU	PHLX Gold/Silver Sector Inde
41	INDX	W5KMCV	Wilshire US Mid Cap Valu
42	INDX	WGREIT	Wilshire Global REIT Inde
43	INDX	SML	S&P Small-Cap 600 Inde
44	INDX	RUT	Russell 2000 Inde
45	INDX	JKSE	Jakarta Composite Index (Indonesia
46	INDX	BFX	Euronext BEL-20 Index (Belgium)
47	INDX	XBD	NYSE AMEX Securities Broker/Dealer Inde
48	INDX	RUA	Russell 3000 Inde
49	INDX	XII	NYSE ARCA Institutional Inde
50	INDX	IETP	ISEQ 20 Price Index (Ireland
51	INDX	DRG	NYSE AMEX Pharmaceutical Inde
52	INDX	W5000	Wilshire 5000 Total Market Inde
53	INDX	HGX	PHLX Housing Sector Inde
54	INDX	MXX	IPC Index (Mexico)
55	INDX	W5KLCG	Wilshire US Large Cap Growt
56	INDX	STI	Straits Times Index
57	INDX	KS11	KOSPI Composite Index
58	INDX	AEX	AEX Amsterdam Index
59	INDX	NYA	NYSE Composite Index
60	INDX	XMI	NYSE ARCA Major Market Inde
61	INDX	BTK	NYSE AMEX Biotechnology Inde
62	INDX	EPX	NASDAQ SIG Oil Exploration and Production Inde
63	INDX	MID	S&P Mid-Cap 400 Inde
64	INDX	HUI	NYSE Arca Gold Bugs Inde
65	INDX	SOX	PHLX Semiconductor Inde
66	INDX	HCX	CBOE S&P Healthcare Index
67	INDX	XCI	NYSE AMEX Computer Technology Inde
68	INDX	XNG	NYSE AMEX Natural Gas Inde
69	INDX	RMZ	MSCI US REIT Inde
70	INDX	WGRESI	Wilshire Global Real Estate Securities Inde
71	INDX	N225	Nikkei 225 Index (Japan
72	INDX	VDAX	Deutsche Boerse VDAX Volatility Inde
73	INDX	MXY	NYSE ARCA Mexico Inde
74	INDX	OSEAX	Oslo Exchange All Share Index (Norway)
75	INDX	TYX	Treasury Yield 30 Years Inde
76	INDX	DJI	Dow Jones Industrial Average
77	INDX	AXPJ	S&P/ASX 200 Australia REIT Inde
78	INDX	PSI20	PSI 20 Stock Index (Portugal
79	INDX	IRX	13-week Treasury Bill Inde
80	INDX	FVX	Treasury Yield 5 Years Inde
81	INDX	NYI	NYSE International 100 Index
82	INDX	AXJO	S&P/ASX 200 Index (Australia
83	INDX	512NTR	S&P 500 GBP Hdg (Net TR) (^512NTR)
84	INDX	CTES_VI	Czech Trading Inde
85	INDX	NSEI	S&P/CNX Nifty Index (India
86	INDX	NYY	NYSE TMT Inde
87	INDX	CCSI	EGX 70 Price Index (Egypt
88	INDX	SPSUPX	S&P Composite 1500 Inde
89	INDX	BVSP	Bovespa Index (Brazil)
90	INDX	ISEQ	ISEQ Overall Price Index (Ireland
91	INDX	JPN	NYSE AMEX Japan Inde
92	INDX	NYL	NYSE World Leaders Inde
93	INDX	TNX	CBOE Interest Rate 10-Year T-Note Inde
94	INDX	NY	NYSE US 100 Inde
95	INDX	SPLV	PowerShares S&P 500 Low Volatil
96	INDX	OMXSPI	Stockholm General Index (Sweden)
97	INDX	GVZ	CBOE Gold Volatility Inde
98	INDX	SPY	SPDR S&P 500 (SPY
99	INDX	IEQR_IR	ISEQ General Total Return Index (Ireland
100	INDX	OMXC20_CO	OMX Copenhagen 20 Index
101	INDX	DJUSFN	^DJUSFN: Dow Jones U.S. Financials Inde
102	INDX	DJASD	^DJASD: Dow Jones Asia Select Dividen
103	INDX	IMUS	^IMUS: Dow Jones Islamic Market U.S.
104	INDX	W1SGI	^W1SGI: Dow Jones Sustainability Worl
105	INDX	DJT	^DJT: Dow Jones Transportation Averag
106	INDX	DJUSM	^DJUSM: Dow Jones U.S. Mid-Cap Inde
107	INDX	W1XGA	^W1XGA: Dow Jones Sustainability Worl
108	INDX	DWC	^DWC: DJUS Market Index (full-cap
109	INDX	DJC	^DJC: Dow Jones-UBS Commodity Inde
110	INDX	IMXL	^IMXL: Dow Jones Islamic Market Titan
111	INDX	XLHK	^XLHK: Dow Jones Hong Kong Titans 30
112	INDX	DJTMDI	^DJTMDI: Dow Jones Media Titans 30 Inde
113	INDX	DJU	^DJU: Dow Jones Utility Averag
114	INDX	DWCOGS	^DWCOGS: Dow Jones U.S. Oil & Gas Tota
115	INDX	DJUSST	^DJUSST: Dow Jones U.S. Iron & Steel In
116	INDX	PSE	^PSE: NYSE Arca Tech 100 Index - New York Stock Exchange
117	INDX	DWCF	^DWCF: Dow Jones U.S. Total Stock Mar
118	INDX	W1SUS	^W1SUS: Dow Jones Sustainability Worl
119	INDX	DJASDT	^DJASDT: Dow Jones Asia Select Dividen
120	INDX	RCI	^RCI: Dow Jones Composite All REIT I
121	INDX	DJUSL	^DJUSL: Dow Jones U.S. Large-Cap Inde
122	INDX	P1DOW	^P1DOW: Dow Jones Asia/Pacific Inde
123	INDX	DJAT	^DJAT: Dow Jones Asian Titans 50 Inde
124	INDX	DJUS	^DJUS: Dow Jones U.S. Inde
125	INDX	DWMI	^DWMI: Dow Jones U.S. Micro-Cap Tota
126	INDX	DJUSS	^DJUSS: Dow Jones U.S. Small-Cap Inde
127	INDX	OMX	OMXS 30 Index (Sweden
128	INDX	STOXX50E	EuroStoxx 50 Inde
129	INDX	FTAS	FTSE All-Share Index (UK)
130	INDX	WIHUN_L	FTSE HUngary Index
131	INDX	WITUR_L	FTSE Turkey Index
132	INDX	WITHA_L	FTSE Thailand Index
133	INDX	WIPOL_L	FTSE Poland Index
134	INDX	WICZH_L	FTSE Czech Republic Index
135	INDX	OMXC20	OMX Copenhagen 20 Inde
136	INDX	IXE	^IXE: Select Sector Spdr-energy Inde
137	INDX	IXIC	NASDAQ Composite
138	INDX	SPEUP	S&P EUROPE 350"""
    df = pd.read_csv(StringIO(data), sep="\t")
    df = df.set_index("ID")
    return (df)
Пример #2
0
    def test_header_multi_index(self):
        expected = tm.makeCustomDataframe(5,
                                          3,
                                          r_idx_nlevels=2,
                                          c_idx_nlevels=4)

        data = """\
C0,,C_l0_g0,C_l0_g1,C_l0_g2

C1,,C_l1_g0,C_l1_g1,C_l1_g2
C2,,C_l2_g0,C_l2_g1,C_l2_g2
C3,,C_l3_g0,C_l3_g1,C_l3_g2
R0,R1,,,
R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2
R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2
R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2
R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2
R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2
"""

        df = self.read_csv(StringIO(data),
                           header=[0, 1, 2, 3],
                           index_col=[0, 1])
        tm.assert_frame_equal(df, expected)

        # skipping lines in the header
        df = self.read_csv(StringIO(data),
                           header=[0, 1, 2, 3],
                           index_col=[0, 1])
        tm.assert_frame_equal(df, expected)

        # INVALID OPTIONS

        # no as_recarray
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            pytest.raises(ValueError,
                          self.read_csv,
                          StringIO(data),
                          header=[0, 1, 2, 3],
                          index_col=[0, 1],
                          as_recarray=True)

        # names
        pytest.raises(ValueError,
                      self.read_csv,
                      StringIO(data),
                      header=[0, 1, 2, 3],
                      index_col=[0, 1],
                      names=['foo', 'bar'])

        # usecols
        pytest.raises(ValueError,
                      self.read_csv,
                      StringIO(data),
                      header=[0, 1, 2, 3],
                      index_col=[0, 1],
                      usecols=['foo', 'bar'])

        # non-numeric index_col
        pytest.raises(ValueError,
                      self.read_csv,
                      StringIO(data),
                      header=[0, 1, 2, 3],
                      index_col=['foo', 'bar'])
Пример #3
0
 def _test(text, **kwargs):
     nice_text = text.replace('\r', '\r\n')
     result = TextReader(StringIO(text), **kwargs).read()
     expected = TextReader(StringIO(nice_text), **kwargs).read()
     assert_array_dicts_equal(result, expected)
Пример #4
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
        If 'infer' and `filepath_or_buffer` is path-like, then detect
        compression from the following extensions: '.gz', '.bz2', '.zip',
        or '.xz' (otherwise no compression).
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if is_path:
        compression = _infer_compression(path_or_buf, compression)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            # Ensure the container is closed as well.
            handles.append(zf)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            mode = "wb" if mode == "w" else mode
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding, newline="")
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace', newline="")
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if (compat.PY3 and is_text and
            (compression or isinstance(f, need_text_wrapping))):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding, newline='')
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Пример #5
0
    def test_read_with_bad_header(self):
        errmsg = r"but only \d+ lines in file"

        with tm.assert_raises_regex(ValueError, errmsg):
            s = StringIO(',,')
            self.read_csv(s, header=[10])
Пример #6
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)
    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were openned in this function.
    """

    handles = list()
    f = path_or_buf
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path_or_buf)
            zip_names = zip_file.namelist()
            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path_or_buf))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP: {}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and is_text and\
            (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Пример #7
0
def bdi(itype='D', retry_count=3, pause=0.001):
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(ct.BDI_URL %
                              (ct.P_TYPE['http'], ct.DOMAINS['v500']))
            lines = urlopen(request, timeout=10).read()
            if len(lines) < 100:  #no data
                return None
        except Exception as e:
            print(e)
        else:
            linestr = lines.decode('utf-8') if ct.PY3 else lines
            if itype == 'D':  # Daily
                reg = re.compile(r'\"chart_data\",\"(.*?)\"\);')
                lines = reg.findall(linestr)
                lines = lines[0]
                lines = lines.replace('chart', 'table').\
                        replace('</series><graphs>', '').\
                        replace('</graphs>', '').\
                        replace('series', 'tr').\
                        replace('value', 'td').\
                        replace('graph', 'tr').\
                        replace('graphs', 'td')
                df = pd.read_html(lines, encoding='utf8')[0]
                df = df.T
                df.columns = ['date', 'index']
                df['date'] = df['date'].map(lambda x: x.replace(u'年', '-')).\
                    map(lambda x: x.replace(u'月', '-')).\
                    map(lambda x: x.replace(u'日', ''))
                df['date'] = pd.to_datetime(df['date'])
                df['index'] = df['index'].astype(float)
                df = df.sort_values('date',
                                    ascending=False).reset_index(drop=True)
                df['change'] = df['index'].pct_change(-1)
                df['change'] = df['change'] * 100
                df['change'] = df['change'].map(lambda x: '%.2f' % x)
                df['change'] = df['change'].astype(float)
                return df
            else:  #Weekly
                html = lxml.html.parse(StringIO(linestr))
                res = html.xpath(
                    "//table[@class=\"style33\"]/tr/td/table[last()]")
                if ct.PY3:
                    sarr = [
                        etree.tostring(node).decode('utf-8') for node in res
                    ]
                else:
                    sarr = [etree.tostring(node) for node in res]
                sarr = ''.join(sarr)
                sarr = '<table>%s</table>' % sarr
                df = pd.read_html(sarr)[0][1:]
                df.columns = ['month', 'index']
                df['month'] = df['month'].map(lambda x: x.replace(u'年', '-')).\
                    map(lambda x: x.replace(u'月', ''))
                df['month'] = pd.to_datetime(df['month'])
                df['month'] = df['month'].map(lambda x: str(x).replace('-', '')).\
                              map(lambda x: x[:6])
                df['index'] = df['index'].astype(float)
                df['change'] = df['index'].pct_change(-1)
                df['change'] = df['change'].map(lambda x: '%.2f' % x)
                df['change'] = df['change'].astype(float)
                return df
Пример #8
0
    def test_pass_offset_warn(self):
        buf = StringIO()

        sys.stderr = buf
        DatetimeIndex(start='1/1/2000', periods=10, offset='H')
        sys.stderr = sys.__stderr__
Пример #9
0
    def test_value_counts_datetime64(self):
        klasses = [Index, Series]
        for klass in klasses:
            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np_array_datetime64_compat([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                                  dtype='datetime64[ns]')
            if isinstance(s, Index):
                tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
            else:
                tm.assert_numpy_array_equal(s.unique(), expected)

            assert s.nunique() == 3

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            assert result.index.dtype == 'datetime64[ns]'
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            assert unique.dtype == 'datetime64[ns]'

            # numpy_array_equal cannot compare pd.NaT
            if isinstance(s, Index):
                exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
                tm.assert_index_equal(unique, exp_idx)
            else:
                tm.assert_numpy_array_equal(unique[:3], expected)
                assert pd.isna(unique[3])

            assert s.nunique() == 3
            assert s.nunique(dropna=False) == 4

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'], name='dt')
            if isinstance(td, Index):
                tm.assert_index_equal(td.unique(), expected)
            else:
                tm.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
Пример #10
0
 def test_get_filepath_or_buffer_with_buffer(self):
     input_buffer = StringIO()
     filepath_or_buffer, _, _ = common.get_filepath_or_buffer(input_buffer)
     assert filepath_or_buffer == input_buffer
Пример #11
0
import dash
import dash_html_components as html
import pandas as pd
from pandas.compat import StringIO
from webviz_components import (Layout, Map, Page)
from os import path

path_to_data = path.join(path.dirname(__file__), "reek.csv")

cells = pd.read_csv(
    StringIO("""
i,j,k,x0,y0,x1,y1,x2,y2,x3,y3,value,FLOWI+,FLOWJ+
0,0,0,0,0,1,0,1,1,0,1,1,0.005,0.0025
1,0,0,1,0,2,0,2,1,1,1,0,0.002,0.0045
0,1,0,0,1,1,1,1,2,0,2,4,0.001,0.0025
1,1,0,1,1,2,1,2,2,1,2,2,0.004,0.0035
"""))

app = dash.Dash(__name__, external_stylesheets=['assets/theme.css'])
server = app.server

app.css.config.serve_locally = True
app.scripts.config.serve_locally = True

app.layout = Layout(children=[
    Page(id='reek',
         title='Map with Reek data',
         children=[
             html.H1(children='Reek'),
             html.P(children='''
                    This is an example of Map using Reek data
Пример #12
0
 def _lexer_split_from_str(dt_str):
     # The StringIO(str(_)) is for dateutil 2.2 compatibility
     return _timelex.split(StringIO(str(dt_str)))
def read_bucket_csv(gcs_path):
    file_stream = file_io.FileIO(gcs_path, mode='r')
    data = pd.read_csv(StringIO(file_stream.read()),
                       delimiter=';',
                       header=None)
    return data
 def test_get_filepath_or_buffer_with_buffer(self):
     input_buffer = StringIO()
     filepath_or_buffer, _, _, should_close = common.get_filepath_or_buffer(
         input_buffer)
     assert filepath_or_buffer == input_buffer
     assert not should_close
Пример #15
0
def test_readjson_chunksize_requires_lines(lines_json_df):
    msg = "chunksize can only be passed if lines=True"
    with pytest.raises(ValueError, match=msg):
        pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2)
    def test_converters_type_must_be_dict(self):
        data = """index,A,B,C,D
foo,2,3,4,5
"""
        with tm.assert_raises_regex(TypeError, 'Type converters.+'):
            self.read_csv(StringIO(data), converters=0)
Пример #17
0
    ###
    blast = subprocess.call([yyyyy,'-db',zzzzz,'-query', identifier,'-evalue',uuuuu,'-outfmt',
                        "6 qacc sacc qlen slen length score bitscore evalue pident nident mismatch positive gaps gapopen stitle",
                        '-max_target_seqs','10','-max_hsps','1','-out', identifier+'.txt'])
    ###
    out = open(identifier+'.txt','r')
    out = out.read()
    n += 1
    if len(out) == 0:
        out_bytes = len(out)
        sin_resultados.append('Secuencia '+str(n)+' | '+str(identifier))
        os.remove(identifier)
        os.remove(identifier+'.txt')
        continue
    else:
        out_text.append(pd.read_csv(StringIO(out),sep='\t',header=None))
        out_bytes = len(out)
        ###
        dif = max([i for i in range(1, out_bytes+100,100)]) - out_bytes
        total_length = int(out_bytes)
        dld += out_bytes
        dl = 0
        for dat in [i for i in range(1, out_bytes+100,100)]:
            tim = datetime.now() - xx
            dl = dat - dif
            done = int(30 * dl / total_length)
            sys.stdout.write('\rSecuencia '+str(n)+' | %s | %s' % ('{}'.format(tim).split('.')[0], identifier)) 
            sys.stdout.flush()
    os.remove(identifier)
    os.remove(identifier+'.txt')
###
    def test_converters_corner_with_nas(self):
        # skip aberration observed on Win64 Python 3.2.2
        if hash(np.int64(-1)) != -2:
            pytest.skip("skipping because of windows hash on Python" " 3.2.2")

        data = """id,score,days
1,2,12
2,2-5,
3,,14+
4,6-12,2"""

        def convert_days(x):
            x = x.strip()
            if not x:
                return np.nan

            is_plus = x.endswith('+')
            if is_plus:
                x = int(x[:-1]) + 1
            else:
                x = int(x)
            return x

        def convert_days_sentinel(x):
            x = x.strip()
            if not x:
                return np.nan

            is_plus = x.endswith('+')
            if is_plus:
                x = int(x[:-1]) + 1
            else:
                x = int(x)
            return x

        def convert_score(x):
            x = x.strip()
            if not x:
                return np.nan
            if x.find('-') > 0:
                valmin, valmax = lmap(int, x.split('-'))
                val = 0.5 * (valmin + valmax)
            else:
                val = float(x)

            return val

        fh = StringIO(data)
        result = self.read_csv(fh,
                               converters={
                                   'score': convert_score,
                                   'days': convert_days
                               },
                               na_values=['', None])
        assert pd.isna(result['days'][1])

        fh = StringIO(data)
        result2 = self.read_csv(fh,
                                converters={
                                    'score': convert_score,
                                    'days': convert_days_sentinel
                                },
                                na_values=['', None])
        tm.assert_frame_equal(result, result2)
Пример #19
0
    def test_value_counts_inferred(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)
            expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(s.unique(), np.unique(s_values))
            self.assertEqual(s.nunique(), 4)
            # don't sort, have to sort after the fact as not sorting is platform-dep
            hist = s.value_counts(sort=False).sort_values()
            expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
            tm.assert_series_equal(hist, expected)

            # sort ascending
            hist = s.value_counts(ascending=True)
            expected = Series([1, 2, 3, 4], index=list('cdab'))
            tm.assert_series_equal(hist, expected)

            # relative histogram.
            hist = s.value_counts(normalize=True)
            expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
            tm.assert_series_equal(hist, expected)

            # bins
            self.assertRaises(TypeError,
                              lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({0.998: 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({0.998: 1.0})
            tm.assert_series_equal(res1n, exp1n)

            self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3]))
            self.assertEqual(s1.nunique(), 3)

            res4 = s1.value_counts(bins=4)
            exp4 = Series({
                0.998: 2,
                1.5: 1,
                2.0: 0,
                2.5: 1
            },
                          index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4, exp4)
            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series({
                0.998: 0.5,
                1.5: 0.25,
                2.0: 0.0,
                2.5: 0.25
            },
                           index=[0.998, 2.5, 1.5, 2.0])
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            self.assert_numpy_array_equal(
                s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O'))
            self.assertEqual(s.nunique(), 3)

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(), expected)
            self.assert_numpy_array_equal(s.unique(), np.array([]))
            self.assertEqual(s.nunique(), 0)

            # GH 3002, datetime64[ns]
            # don't test names though
            txt = "\n".join([
                'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG',
                'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM'
            ])
            f = StringIO(txt)
            df = pd.read_fwf(f,
                             widths=[6, 8, 3],
                             names=["person_id", "dt", "food"],
                             parse_dates=["dt"])

            s = klass(df['dt'].copy())
            s.name = None

            idx = pd.to_datetime([
                '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
                '2009-01-01 00:00:00X'
            ])
            expected_s = Series([3, 2, 1], index=idx)
            tm.assert_series_equal(s.value_counts(), expected_s)

            expected = np.array([
                '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
                '2008-09-09 00:00:00Z'
            ],
                                dtype='datetime64[ns]')
            if isinstance(s, DatetimeIndex):
                expected = DatetimeIndex(expected)
                self.assertTrue(s.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(s.unique(), expected)

            self.assertEqual(s.nunique(), 3)

            # with NaT
            s = df['dt'].copy()
            s = klass([v for v in s.values] + [pd.NaT])

            result = s.value_counts()
            self.assertEqual(result.index.dtype, 'datetime64[ns]')
            tm.assert_series_equal(result, expected_s)

            result = s.value_counts(dropna=False)
            expected_s[pd.NaT] = 1
            tm.assert_series_equal(result, expected_s)

            unique = s.unique()
            self.assertEqual(unique.dtype, 'datetime64[ns]')

            # numpy_array_equal cannot compare pd.NaT
            self.assert_numpy_array_equal(unique[:3], expected)
            self.assertTrue(unique[3] is pd.NaT
                            or unique[3].astype('int64') == pd.tslib.iNaT)

            self.assertEqual(s.nunique(), 3)
            self.assertEqual(s.nunique(dropna=False), 4)

            # timedelta64[ns]
            td = df.dt - df.dt + timedelta(1)
            td = klass(td, name='dt')

            result = td.value_counts()
            expected_s = Series([6], index=[Timedelta('1day')], name='dt')
            tm.assert_series_equal(result, expected_s)

            expected = TimedeltaIndex(['1 days'])
            if isinstance(td, TimedeltaIndex):
                self.assertTrue(td.unique().equals(expected))
            else:
                self.assert_numpy_array_equal(td.unique(), expected.values)

            td2 = timedelta(1) + (df.dt - df.dt)
            td2 = klass(td2, name='dt')
            result2 = td2.value_counts()
            tm.assert_series_equal(result2, expected_s)
Пример #20
0
 def test_get_filepath_or_buffer_with_buffer(self):
     input_buffer = StringIO()
     filepath_or_buffer, _, _ = common.get_filepath_or_buffer(input_buffer)
     self.assertEqual(filepath_or_buffer, input_buffer)
Пример #21
0
"""
Alice Lepissier
[email protected]
July 2018
Risk-based IFF
Scrape the BIS website for data
"""

import requests, os
import pandas as pd
from pandas.compat import StringIO

os.chdir(
    'C:/cloudstorage/googledrive/Projects/Tax Justice Network/Consultancy 2 - summer 18/Risk-based IFF'
)

gets = pd.read_csv('Data/LBS/LBS_GET.csv', header=None)
urls = gets[0].tolist()
df = pd.DataFrame()

for url in urls:
    req = requests.get(url)
    req_string = req.text
    data = pd.read_csv(StringIO(req_string), sep=',', skiprows=6)
    df = df.append(data, sort=False)
df.to_csv('Data/LBS/LBS.csv')
Пример #22
0
def lhb_detail(code=None, date=None, retry_count=3, pause=0.001):
    """
    获取个股龙虎榜明细数据
    Parameters
    --------
        code:str
                股票代码
        date:str
                日期
        retry_count : int, 默认 3
                     如遇网络等问题重复执行的次数
        pause : int, 默认 0
                    重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
    Return
    ------
    tuple(DataFrame1,DateFrame2)
    DataFrame
        code:股票代码
        date:日期
        broker:营业部名称
        count:上榜次数
        probability:买入后上涨概率
        buy:买入金额(万)
        buy_prop:买入额占总成交比
        sell:卖出金额(万)
        sell_prop:卖出额占总成交比
        net:净额(万)
        buysellflag:买入卖出标记
    """
    df1 = None
    df2 = None
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(rv.LHB_DETAIL % (date, code))
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath("//tbody")
            if ct.PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr.pop(0)  #舍弃无用的第一个元素
            sarr = ''.join(sarr)
            sarr = sarr.replace('tbody', 'table')  #规范格式
            list_sarr = pd.read_html(sarr)  #由于有两个<table>,所以得到两个dataframe

            for i in range(len(list_sarr)):
                df = list_sarr[i]

                #处理dataframe
                # 删除卖出表的最后一行统计数据
                if df.iloc[:, 0].size == 6:  #只保留5行数据
                    df.drop([5], inplace=True, axis=0)

                # 删除第一列无用的序号
                df.drop([0], inplace=True, axis=1)

                #处理机构那一列,拆分成三列
                df[1] = df[1].map(lambda x: str(x).split("  "))  #引号里为两个空格
                try:  #将原有列拆分成三个新的Series
                    ser1 = df[1].map(lambda x: x[0])
                    ser2 = df[1].map(lambda x: x[1])
                    ser3 = df[1].map(lambda x: x[2])
                except Exception as e:
                    pass
                df.drop(1, inplace=True, axis=1)  #删除原有列
                df.insert(0, 'broker', ser1)
                df.insert(1, 'count', ser2)
                df.insert(2, 'per', ser3)

                #在最前面增加code与date两列
                df.insert(0, 'code', str(code))
                df.insert(1, 'date', str(date))

                #在最后面加上买卖标记
                if i == 0:
                    df.insert(len(df.columns), 'buysellflag', 'buy')
                elif i == 1:
                    df.insert(len(df.columns), 'buysellflag', 'sell')

                #判断如果字段个数,若一致,则套用表头
                if len(df.columns) == 11:
                    df.columns = rv.LHB_DETAIL_COLS
                elif len(df.columns) == 10:  #若只有10列,说明没有买入卖出机构:
                    #如何处理
                    df.insert(5, 'None', None)
                    df.columns = rv.LHB_DETAIL_COLS
                    df.loc[0] = [
                        code, date, '没有买入或卖出机构', 'None', 'None', 'None',
                        'None', 'None', 'None', 'None', 'None'
                    ]

                #赋值
                if i == 0:
                    df1 = df
                elif i == 1:
                    df2 = df
            list1 = [df1, df2]
            return pd.concat(list1)
        except Exception as e:
            print(e)
Пример #23
0
    def test_header_multiindex_common_format(self):

        df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                       index=['one', 'two'],
                       columns=MultiIndex.from_tuples([('a', 'q'), ('a', 'r'),
                                                       ('a', 's'), ('b', 't'),
                                                       ('c', 'u'),
                                                       ('c', 'v')]))

        # to_csv
        data = """,a,a,a,b,c,c
,q,r,s,t,u,v
,,,,,,
one,1,2,3,4,5,6
two,7,8,9,10,11,12"""

        result = self.read_csv(StringIO(data), header=[0, 1], index_col=0)
        tm.assert_frame_equal(df, result)

        # common
        data = """,a,a,a,b,c,c
,q,r,s,t,u,v
one,1,2,3,4,5,6
two,7,8,9,10,11,12"""

        result = self.read_csv(StringIO(data), header=[0, 1], index_col=0)
        tm.assert_frame_equal(df, result)

        # common, no index_col
        data = """a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12"""

        result = self.read_csv(StringIO(data), header=[0, 1], index_col=None)
        tm.assert_frame_equal(df.reset_index(drop=True), result)

        # malformed case 1
        expected = DataFrame(np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]],
                                      dtype='int64'),
                             index=Index([1, 7]),
                             columns=MultiIndex(levels=[[
                                 u('a'), u('b'), u('c')
                             ], [u('r'),
                                 u('s'),
                                 u('t'),
                                 u('u'),
                                 u('v')]],
                                                labels=[[0, 0, 1, 2, 2],
                                                        [0, 1, 2, 3, 4]],
                                                names=[u('a'), u('q')]))

        data = """a,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12"""

        result = self.read_csv(StringIO(data), header=[0, 1], index_col=0)
        tm.assert_frame_equal(expected, result)

        # malformed case 2
        expected = DataFrame(np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]],
                                      dtype='int64'),
                             index=Index([1, 7]),
                             columns=MultiIndex(levels=[[
                                 u('a'), u('b'), u('c')
                             ], [u('r'),
                                 u('s'),
                                 u('t'),
                                 u('u'),
                                 u('v')]],
                                                labels=[[0, 0, 1, 2, 2],
                                                        [0, 1, 2, 3, 4]],
                                                names=[None, u('q')]))

        data = """,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12"""

        result = self.read_csv(StringIO(data), header=[0, 1], index_col=0)
        tm.assert_frame_equal(expected, result)

        # mi on columns and index (malformed)
        expected = DataFrame(np.array([[3, 4, 5, 6], [9, 10, 11, 12]],
                                      dtype='int64'),
                             index=MultiIndex(levels=[[1, 7], [2, 8]],
                                              labels=[[0, 1], [0, 1]]),
                             columns=MultiIndex(
                                 levels=[[u('a'), u('b'),
                                          u('c')],
                                         [u('s'),
                                          u('t'),
                                          u('u'),
                                          u('v')]],
                                 labels=[[0, 1, 2, 2], [0, 1, 2, 3]],
                                 names=[None, u('q')]))

        data = """,a,a,b,c,c
q,r,s,t,u,v
1,2,3,4,5,6
7,8,9,10,11,12"""

        result = self.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1])
        tm.assert_frame_equal(expected, result)
Пример #24
0
    def test_info_memory_usage(self):
        # Ensure memory usage is displayed, when asserted, on the last line
        dtypes = [
            'int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
            'complex128', 'object', 'bool'
        ]
        data = {}
        n = 10
        for i, dtype in enumerate(dtypes):
            data[i] = np.random.randint(2, size=n).astype(dtype)
        df = DataFrame(data)
        buf = StringIO()

        # display memory usage case
        df.info(buf=buf, memory_usage=True)
        res = buf.getvalue().splitlines()
        assert "memory usage: " in res[-1]

        # do not display memory usage case
        df.info(buf=buf, memory_usage=False)
        res = buf.getvalue().splitlines()
        assert "memory usage: " not in res[-1]

        df.info(buf=buf, memory_usage=True)
        res = buf.getvalue().splitlines()

        # memory usage is a lower bound, so print it as XYZ+ MB
        assert re.match(r"memory usage: [^+]+\+", res[-1])

        df.iloc[:, :5].info(buf=buf, memory_usage=True)
        res = buf.getvalue().splitlines()

        # excluded column with object dtype, so estimate is accurate
        assert not re.match(r"memory usage: [^+]+\+", res[-1])

        # Test a DataFrame with duplicate columns
        dtypes = ['int64', 'int64', 'int64', 'float64']
        data = {}
        n = 100
        for i, dtype in enumerate(dtypes):
            data[i] = np.random.randint(2, size=n).astype(dtype)
        df = DataFrame(data)
        df.columns = dtypes

        df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo'])
        df_with_object_index.info(buf=buf, memory_usage=True)
        res = buf.getvalue().splitlines()
        assert re.match(r"memory usage: [^+]+\+", res[-1])

        df_with_object_index.info(buf=buf, memory_usage='deep')
        res = buf.getvalue().splitlines()
        assert re.match(r"memory usage: [^+]+$", res[-1])

        # Ensure df size is as expected
        # (cols * rows * bytes) + index size
        df_size = df.memory_usage().sum()
        exp_size = len(dtypes) * n * 8 + df.index.nbytes
        assert df_size == exp_size

        # Ensure number of cols in memory_usage is the same as df
        size_df = np.size(df.columns.values) + 1  # index=True; default
        assert size_df == np.size(df.memory_usage())

        # assert deep works only on object
        assert df.memory_usage().sum() == df.memory_usage(deep=True).sum()

        # test for validity
        DataFrame(1, index=['a'], columns=['A']).memory_usage(index=True)
        DataFrame(1, index=['a'], columns=['A']).index.nbytes
        df = DataFrame(data=1,
                       index=pd.MultiIndex.from_product([['a'],
                                                         range(1000)]),
                       columns=['A'])
        df.index.nbytes
        df.memory_usage(index=True)
        df.index.values.nbytes

        mem = df.memory_usage(deep=True).sum()
        assert mem > 0
Пример #25
0
 def test_singleton_header(self):
     # See GH #7757
     data = """a,b,c\n0,1,2\n1,2,3"""
     df = self.read_csv(StringIO(data), header=[0])
     expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]})
     tm.assert_frame_equal(df, expected)
Пример #26
0
    def test_repr_mixed(self):
        buf = StringIO()

        # mixed
        foo = repr(self.mixed_frame)  # noqa
        self.mixed_frame.info(verbose=False, buf=buf)
Пример #27
0
 def _make_reader(**kwds):
     return TextReader(StringIO(data), delimiter=',', **kwds)
Пример #28
0
def test_readjson_invalid_chunksize(lines_json_df, chunksize):
    msg = r"'chunksize' must be an integer >=1"

    with pytest.raises(ValueError, match=msg):
        pd.read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize)
Пример #29
0
 def test_string_factorize(self):
     # should this be optional?
     data = 'a\nb\na\nb\na'
     reader = TextReader(StringIO(data), header=None)
     result = reader.read()
     assert len(set(map(id, result[0]))) == 2
Пример #30
0
    'max': [6],
    'min': [1]
}

obs_mock_data = {
    'index': ['02-03-2006'],
    'name': ['line-1'],
    'value': [4],
    'error': [2]
}

obs_csv = pd.DataFrame(
    pd.read_csv(
        StringIO("""
index,name,value,error
2012-01-04,line-3,17,2
2012-01-08,line-2,8,3
""")))

obs_without_index = pd.read_csv(
    StringIO("""
name,value,error
line-3,17,2
line-2,8,2
"""))


class TestFanChart(unittest.TestCase):
    def test_using_csv(self):
        self.assertTrue(validate_observation_data(obs_csv))
        try: