示例#1
0
    def _data_retrieval(self, product, head, params):
        if product in ['equities', 'indice', 'fxfuture']:
            has_volume = True
        else:
            has_volume = False

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        result = list()

        if path_:
            for elements_ in path_:
                info = []

                for nested_ in elements_.xpath(".//td"):
                    val = nested_.get('data-real-value')
                    if val is None and nested_.text_content(
                    ) == 'No results found':
                        raise IndexError(
                            "ERR#0033: information unavailable or not found.")
                    info.append(val)

                date_ = datetime.strptime(
                    str(datetime.fromtimestamp(int(info[0])).date()),
                    '%Y-%m-%d')

                close_ = float(info[1].replace(',', ''))
                open_ = float(info[2].replace(',', ''))
                high_ = float(info[3].replace(',', ''))
                low_ = float(info[4].replace(',', ''))

                volume_ = None

                if has_volume is True:
                    volume_ = int(info[5])

                result.insert(
                    len(result),
                    Data(date_, open_, high_, low_, close_, volume_, None,
                         None))

            result = result[::-1]

            df = pd.DataFrame.from_records(
                [value.unknown_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
示例#2
0
def get_certificate_historical_data(certificate, country, from_date, to_date, as_json=False, order='ascending', interval='Daily'):
    """
    This function retrieves historical data from the introduced certificate from Investing.com. So on, the historical data
    of the introduced certificate from the specified country in the specified date range will be retrieved and returned as
    a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally
    some optional parameters can be specified: as_json and order, which let the user decide if the data is going to
    be returned as a :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the
    index is the date), respectively.

    Args:
        certificate (:obj:`str`): name of the certificate to retrieve historical data from.
        country (:obj:`str`): name of the country from where the certificate is.
        from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved.
        to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            historical data of the specified certificate from the specified country. So on, the resulting dataframe contains the
            OHLC values for the selected certificate on market days.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close 
                -----||------|------|-----|-------
                xxxx || xxxx | xxxx | xxx | xxxxx 

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if certificates object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced certificate/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if certificate historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_certificate_historical_data(certificate='COMMERZBANK Call ALIBABA GROUP', country='france', from_date='01/01/2010', to_date='01/01/2019')
                         Open   High    Low  Close
            Date                                  
            2018-03-14  39.77  39.77  39.77  39.77
            2018-03-15  48.18  48.18  48.18  46.48
            2018-03-16  46.48  46.48  46.48  46.48
            2018-03-19  40.73  40.73  40.73  40.73
            2018-03-20  44.61  44.61  44.61  44.61

    """

    if not certificate:
        raise ValueError("ERR#0100: certificate param is mandatory and should be a str.")

    if not isinstance(certificate, str):
        raise ValueError("ERR#0100: certificate param is mandatory and should be a str.")

    if country is None:
        raise ValueError("ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not interval:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if not isinstance(interval, str):
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError("ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError("ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError("ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'.")

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 19:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': start_date.replace(year=start_date.year + 19).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 19)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'certificates', 'certificates.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        certificates = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0096: certificates file not found or errored.")

    if certificates is None:
        raise IOError("ERR#0097: certificates not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_certificate_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.")

    certificates = certificates[certificates['country'] == unidecode.unidecode(country.lower())]

    certificate = certificate.strip()
    certificate = certificate.lower()

    if unidecode.unidecode(certificate) not in [unidecode.unidecode(value.lower()) for value in certificates['name'].tolist()]:
        raise RuntimeError("ERR#0101: certificate " + certificate + " not found, check if it is correct.")

    symbol = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'symbol']
    id_ = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'id']
    name = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'name']

    header = symbol + ' Historical Data'

    final = list()

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError("ERR#0102: certificate information unavailable or not found.")
                else:
                    data_flag = True
                
                info = []
            
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if data_flag is True:
                    certificate_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')
            
                    certificate_close = float(info[1].replace(',', ''))
                    certificate_open = float(info[2].replace(',', ''))
                    certificate_high = float(info[3].replace(',', ''))
                    certificate_low = float(info[4].replace(',', ''))

                    result.insert(len(result), Data(certificate_date, certificate_open, certificate_high,
                                                    certificate_low, certificate_close, None, None))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                            [value.certificate_as_json() for value in result]
                    }
                    
                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records([value.certificate_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError("ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#3
0
def get_certificate_recent_data(certificate, country, as_json=False, order='ascending', interval='Daily'):
    """
    This function retrieves recent historical data from the introduced certificate from Investing.com. So on, the recent data
    of the introduced certificate from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if
    the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters
    can be specified: as_json and order, which let the user decide if the data is going to be returned as a
    :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the 
    date), respectively.

    Args:
        certificate (:obj:`str`): name of the certificate to retrieve recent data from.
        country (:obj:`str`): name of the country from where the certificate is.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved recent 
            data from the specified certificate via argument. The dataset contains the OHLC values of the certificate.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close 
                -----||------|------|-----|-------
                xxxx || xxxx | xxxx | xxx | xxxxx 

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if there was an argument error.
        IOError: raised if certificates object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced certificate does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if certificate information was unavailable or not found.

    Examples:
        >>> investpy.get_certificate_recent_data(certificate='COMMERZBANK Call ALIBABA GROUP', country='france')
                        Open  High   Low  Close
            Date                               
            2019-11-27  5.47  5.47  5.47   5.47
            2019-12-05  5.52  5.52  5.52   5.52
            2019-12-10  5.37  5.37  5.37   5.37
            2019-12-12  6.27  6.27  6.27   6.27
            2019-12-16  6.80  6.80  6.80   6.80
            2019-12-20  7.50  7.50  7.50   7.50

    """

    if not certificate:
        raise ValueError("ERR#0100: certificate param is mandatory and should be a str.")

    if not isinstance(certificate, str):
        raise ValueError("ERR#0100: certificate param is mandatory and should be a str.")

    if country is None:
        raise ValueError("ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not interval:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if not isinstance(interval, str):
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'certificates', 'certificates.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        certificates = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0096: certificates file not found or errored.")

    if certificates is None:
        raise IOError("ERR#0097: certificates not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_certificate_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.")

    certificates = certificates[certificates['country'] == unidecode.unidecode(country.lower())]

    certificate = certificate.strip()
    certificate = certificate.lower()

    if unidecode.unidecode(certificate) not in [unidecode.unidecode(value.lower()) for value in certificates['name'].tolist()]:
        raise RuntimeError("ERR#0101: certificate " + certificate + " not found, check if it is correct.")

    symbol = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'symbol']
    id_ = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'id']
    name = certificates.loc[(certificates['name'].str.lower() == certificate).idxmax(), 'name']

    header = symbol + ' Historical Data'

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    
    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(".//td")[0].text_content() == 'No results found':
                raise IndexError("ERR#0102: certificate information unavailable or not found.")

            info = []
        
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            certificate_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')
            
            certificate_close = float(info[1].replace(',', ''))
            certificate_open = float(info[2].replace(',', ''))
            certificate_high = float(info[3].replace(',', ''))
            certificate_low = float(info[4].replace(',', ''))

            result.insert(len(result), Data(certificate_date, certificate_open, certificate_high,
                                            certificate_low, certificate_close, None, None))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent':
                    [value.certificate_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records([value.certificate_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
def get_currency_cross_historical_data(currency_cross, from_date, to_date, as_json=False, order='ascending', interval='Daily'):
    """
    This function retrieves recent historical data from the introduced `currency_cross` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        currency_cross (:obj:`str`): name of the currency cross to retrieve recent historical data from.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close and
            volume values for the selected currency_cross on market days.

            The return data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Currency
                -----||------|------|-----|-------|---------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        dd/mm/yyyy: {
                            'open': x,
                            'high': x,
                            'low': x,
                            'close': x,
                            'currency' : x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: argument error.
        IOError: stocks object/file not found or unable to retrieve.
        RuntimeError: introduced currency_cross does not match any of the indexed ones.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if currency_cross information was unavailable or not found.

    Examples:
        >>> investpy.get_currency_cross_historical_data(currency_cross='EUR/USD', from_date='01/01/2018', to_date='01/01/2019')
                          Open    High     Low   Close Currency
            Date
            2018-01-01  1.2003  1.2014  1.1995  1.2010      USD
            2018-01-02  1.2013  1.2084  1.2003  1.2059      USD
            2018-01-03  1.2058  1.2070  1.2001  1.2014      USD
            2018-01-04  1.2015  1.2090  1.2004  1.2068      USD
            2018-01-05  1.2068  1.2085  1.2021  1.2030      USD

    """

    if not currency_cross:
        raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.")

    if not isinstance(currency_cross, str):
        raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.")

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError("ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError("ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError("ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not interval:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if not isinstance(interval, str):
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': start_date.replace(year=start_date.year + 20).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'currency_crosses', 'currency_crosses.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        currency_crosses = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0060: currency_crosses file not found or errored.")

    if currency_crosses is None:
        raise IOError("ERR#0050: currency_crosses not found or unable to retrieve.")

    currency_cross = currency_cross.strip()
    currency_cross = currency_cross.lower()

    if unidecode.unidecode(currency_cross) not in [unidecode.unidecode(value.lower()) for value in currency_crosses['name'].tolist()]:
        raise RuntimeError("ERR#0054: the introduced currency_cross " + str(currency_cross) + " does not exists.")

    id_ = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id']
    name = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'name']
    currency = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'second']

    final = list()

    header = name + ' Historical Data'

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        
        result = list()

        if path_:
            for elements_ in path_:
                info = []
        
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if elements_.xpath(".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError("ERR#0055: currency_cross information unavailable or not found.")
                else:
                    data_flag = True

                if data_flag is True:
                    currency_cross_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')
                    
                    currency_cross_close = float(info[1].replace(',', ''))
                    currency_cross_open = float(info[2].replace(',', ''))
                    currency_cross_high = float(info[3].replace(',', ''))
                    currency_cross_low = float(info[4].replace(',', ''))

                    result.insert(len(result),
                                  Data(currency_cross_date, currency_cross_open, currency_cross_high, currency_cross_low,
                                       currency_cross_close, None, currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {'name': name,
                             'historical':
                                 [value.currency_cross_as_json() for value in result]
                             }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records([value.currency_cross_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError("ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
def get_currency_cross_recent_data(currency_cross, as_json=False, order='ascending', interval='Daily'):
    """
    This function retrieves recent historical data from the introduced `currency_cross` as indexed in Investing.com
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        currency_cross (:obj:`str`): name of the currency_cross to retrieve recent historical data from.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close,
            volume and currency values for the selected currency_cross on market days.

            The return data is in case we use default arguments will look like::

                Date || Open | High | Low | Close | Currency
                -----||------|------|-----|-------|---------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        dd/mm/yyyy: {
                            'open': x,
                            'high': x,
                            'low': x,
                            'close': x,
                            'currency' : x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if any of the introduced arguments was not valid or errored.
        IOError: raised if currency_crosses object/file not found or unable to retrieve.
        RuntimeError: raised introduced currency_cross does not match any of the indexed ones.
        ConnectionError: raised if GET request did not return 200 status code.
        IndexError: raised if currency_cross information was unavailable or not found.

    Examples:
        >>> investpy.get_currency_cross_recent_data(currency_cross='EUR/USD')
                          Open    High     Low   Close Currency
            Date
            2019-08-27  1.1101  1.1116  1.1084  1.1091      USD
            2019-08-28  1.1090  1.1099  1.1072  1.1078      USD
            2019-08-29  1.1078  1.1093  1.1042  1.1057      USD
            2019-08-30  1.1058  1.1062  1.0963  1.0991      USD
            2019-09-02  1.0990  1.1000  1.0958  1.0968      USD

    """

    if not currency_cross:
        raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.")

    if not isinstance(currency_cross, str):
        raise ValueError("ERR#0052: currency_cross param is mandatory and should be a str.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not interval:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if not isinstance(interval, str):
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'currency_crosses', 'currency_crosses.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        currency_crosses = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0060: currency_crosses file not found or errored.")

    if currency_crosses is None:
        raise IOError("ERR#0050: currency_crosses not found or unable to retrieve.")

    currency_cross = currency_cross.strip()
    currency_cross = currency_cross.lower()

    if unidecode.unidecode(currency_cross) not in [unidecode.unidecode(value.lower()) for value in currency_crosses['name'].tolist()]:
        raise RuntimeError("ERR#0054: the introduced currency_cross " + str(currency_cross) + " does not exists.")

    id_ = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id']
    name = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'name']
    currency = currency_crosses.loc[(currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'second']

    header = name + ' Historical Data'

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(".//td")[0].text_content() == 'No results found':
                raise IndexError("ERR#0055: currency_cross information unavailable or not found.")

            info = []
        
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            currency_cross_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')
            
            currency_cross_close = float(info[1].replace(',', ''))
            currency_cross_open = float(info[2].replace(',', ''))
            currency_cross_high = float(info[3].replace(',', ''))
            currency_cross_low = float(info[4].replace(',', ''))

            result.insert(len(result),
                          Data(currency_cross_date, currency_cross_open, currency_cross_high, currency_cross_low,
                               currency_cross_close, None, currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.currency_cross_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records([value.currency_cross_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#6
0
def get_stock_recent_data(stock,
                          country,
                          as_json=False,
                          order='ascending',
                          interval='Daily'):
    """
    This function retrieves recent historical data from the introduced stock from Investing.com. So on, the recent data
    of the introduced stock from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if
    the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters
    can be specified: as_json and order, which let the user decide if the data is going to be returned as a
    :obj:`json` or not, and if the historical data is going to be ordered ascending or descending (where the index is the 
    date), respectively.

    Args:
        stock (:obj:`str`): symbol of the stock to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the stock is.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            recent data of the specified stock from the specified country. So on, the resulting dataframe contains the
            open, high, low, close and volume values for the selected stock on market days and the currency in which those
            values are presented.

            The resulting recent data, in case that the default parameters were applied, will look like::

                Date || Open | High | Low | Close | Volume | Currency 
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx 

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if stocks object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if stock recent data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_stock_recent_data(stock='bbva', country='spain')
                         Open   High    Low  Close    Volume Currency
            Date
            2019-08-13  4.263  4.395  4.230  4.353  27250000      EUR
            2019-08-14  4.322  4.325  4.215  4.244  36890000      EUR
            2019-08-15  4.281  4.298  4.187  4.234  21340000      EUR
            2019-08-16  4.234  4.375  4.208  4.365  46080000      EUR
            2019-08-19  4.396  4.425  4.269  4.269  18950000      EUR

    """

    if not stock:
        raise ValueError(
            "ERR#0013: stock parameter is mandatory and must be a valid stock name."
        )

    if not isinstance(stock, str):
        raise ValueError("ERR#0027: stock argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'stocks', 'stocks.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        stocks = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0056: stocks file not found or errored.")

    if stocks is None:
        raise IOError(
            "ERR#0001: stocks object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_stock_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())]

    stock = stock.strip()
    stock = stock.lower()

    if unidecode.unidecode(stock) not in [
            unidecode.unidecode(value.lower())
            for value in stocks['symbol'].tolist()
    ]:
        raise RuntimeError("ERR#0018: stock " + stock +
                           " not found, check if it is correct.")

    symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(),
                        'symbol']
    id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id']
    name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name']

    stock_currency = stocks.loc[(
        stocks['symbol'].str.lower() == stock).idxmax(), 'currency']

    header = symbol + ' Historical Data'

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(
                    ".//td")[0].text_content() == 'No results found':
                raise IndexError(
                    "ERR#0007: stock information unavailable or not found.")

            info = []

            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            stock_date = datetime.fromtimestamp(int(info[0]))
            stock_date = date(stock_date.year, stock_date.month,
                              stock_date.day)

            stock_close = float(info[1].replace(',', ''))
            stock_open = float(info[2].replace(',', ''))
            stock_high = float(info[3].replace(',', ''))
            stock_low = float(info[4].replace(',', ''))

            stock_volume = 0

            if info[5].__contains__('K'):
                stock_volume = int(
                    float(info[5].replace('K', '').replace(',', '')) * 1e3)
            elif info[5].__contains__('M'):
                stock_volume = int(
                    float(info[5].replace('M', '').replace(',', '')) * 1e6)
            elif info[5].__contains__('B'):
                stock_volume = int(
                    float(info[5].replace('B', '').replace(',', '')) * 1e9)

            result.insert(
                len(result),
                Data(stock_date, stock_open, stock_high, stock_low,
                     stock_close, stock_volume, stock_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.stock_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.stock_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#7
0
def get_bond_recent_data(bond,
                         country,
                         as_json=False,
                         order='ascending',
                         debug=False):
    """
    This function retrieves recent historical data from the introduced bond from Investing.com. So on, the recent data
    of the introduced bond from the specified country will be retrieved and returned as a :obj:`pandas.DataFrame` if
    the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters
    can be specified: as_json, order and debug, which let the user decide if the data is going to be returned as a
    :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the index is the date)
    and whether debug messages are going to be printed or not, respectively.

    Args:
        bond (:obj:`str`): name of the bond to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the bond is.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, either True or False, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            recent data of the specified bond from the specified country. So on, the resulting dataframe contains the
            open, high, low and close values for the selected bond on market days.

            The resulting recent data, in case that the default parameters were applied, will look like::

                date || open | high | low | close 
                -----||---------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx 

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    recent: [
                        dd/mm/yyyy: {
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if bonds object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced bond/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if bond historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_bond_recent_data(bond='Argentina 3Y', country='argentina')
                        Open    High     Low   Close
            Date                                      
            2019-09-23  52.214  52.214  52.214  52.214
            2019-09-24  52.323  52.323  52.323  52.323
            2019-09-25  52.432  52.432  52.432  52.432
            2019-09-26  52.765  52.765  52.765  52.765
            2019-09-27  52.876  52.876  52.876  52.876
    
    """

    if not bond:
        raise ValueError(
            "ERR#0066: bond parameter is mandatory and must be a valid bond name."
        )

    if not isinstance(bond, str):
        raise ValueError("ERR#0067: bond argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'bonds', 'bonds.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        bonds = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0064: bonds file not found or errored.")

    if bonds is None:
        raise IOError(
            "ERR#0065: bonds object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_bond_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    bonds = bonds[bonds['country'] == unidecode.unidecode(country.lower())]

    bond = bond.strip()
    bond = bond.lower()

    if unidecode.unidecode(bond) not in [
            unidecode.unidecode(value.lower())
            for value in bonds['name'].tolist()
    ]:
        raise RuntimeError("ERR#0068: bond " + bond +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced bond on Investing.com')

    id_ = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'id']
    name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'name']
    full_name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(),
                          'full_name']

    logger.info(str(bond) + ' found on Investing.com')

    header = full_name + " Bond Yield Historical Data"

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": "Daily",
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": user_agent.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    logger.info('Request sent to Investing.com!')

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    logger.info('Request to Investing.com data succeeded with code ' +
                str(req.status_code) + '!')

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        logger.info('Data parsing process starting...')

        for elements_ in path_:
            info = []
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            bond_date = datetime.fromtimestamp(int(info[0]))
            bond_date = date(bond_date.year, bond_date.month, bond_date.day)
            bond_close = float(info[1])
            bond_open = float(info[2])
            bond_high = float(info[3])
            bond_low = float(info[4])

            result.insert(
                len(result),
                Data(bond_date, bond_open, bond_high, bond_low, bond_close,
                     None, None))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        logger.info('Data parsing process finished...')

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.bond_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.bond_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#8
0
def get_crypto_recent_data(crypto,
                           as_json=False,
                           order='ascending',
                           interval='Daily'):
    """
    This function retrieves recent historical data from the introduced crypto from Investing.com. So on, the recent data
    of the introduced crypto will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid 
    and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json 
    and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical 
    data is going to be ordered ascending or descending (where the index is the date), respectively.

    Args:
        crypto (:obj:`str`): name of the crypto currency to retrieve data from.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            recent data of the specified crypto currency. So on, the resulting dataframe contains the open, high, low, 
            close and volume values for the selected crypto on market days and the currency in which those values are presented.

            The resulting recent data, in case that the default parameters were applied, will look like::

                Date || Open | High | Low | Close | Volume | Currency 
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx 

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if cryptos object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced crypto name was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if crypto recent data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_crypto_recent_data(crypto='bitcoin')
                          Open     High     Low   Close   Volume Currency
            Date                                                         
            2019-10-25  7422.8   8697.7  7404.9  8658.3  1177632      USD
            2019-10-26  8658.4  10540.0  8061.8  9230.6  1784005      USD
            2019-10-27  9230.6   9773.2  9081.0  9529.6  1155038      USD
            2019-10-28  9530.1   9866.9  9202.5  9207.2  1039295      USD
            2019-10-29  9206.5   9531.3  9125.3  9411.3   918477      USD

    """

    if not crypto:
        raise ValueError(
            "ERR#0083: crypto parameter is mandatory and must be a valid crypto name."
        )

    if not isinstance(crypto, str):
        raise ValueError("ERR#0084: crypto argument needs to be a str.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'crypto', 'cryptos.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        cryptos = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0081: cryptos file not found or errored.")

    if cryptos is None:
        raise IOError("ERR#0082: cryptos not found or unable to retrieve.")

    crypto = crypto.strip()
    crypto = crypto.lower()

    if unidecode.unidecode(crypto) not in [
            unidecode.unidecode(value.lower())
            for value in cryptos['name'].tolist()
    ]:
        raise RuntimeError("ERR#0085: crypto currency: " + crypto +
                           ", not found, check if it is correct.")

    status = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(),
                         'status']
    if status == 'unavailable':
        raise ValueError(
            "ERR#0086: the selected crypto currency is not available for retrieval in Investing.com."
        )

    crypto_name = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(),
                              'name']
    crypto_id = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(),
                            'id']
    crypto_currency = cryptos.loc[(
        cryptos['name'].str.lower() == crypto).idxmax(), 'currency']

    header = crypto_name + ' Historical Data'

    params = {
        "curr_id": crypto_id,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(
                    ".//td")[0].text_content() == 'No results found':
                raise IndexError(
                    "ERR#0087: crypto information unavailable or not found.")

            info = []

            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            crypto_date = datetime.strptime(
                str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')

            crypto_close = float(info[1].replace(',', ''))
            crypto_open = float(info[2].replace(',', ''))
            crypto_high = float(info[3].replace(',', ''))
            crypto_low = float(info[4].replace(',', ''))

            crypto_volume = int(info[5])

            result.insert(
                len(result),
                Data(crypto_date, crypto_open, crypto_high, crypto_low,
                     crypto_close, crypto_volume, crypto_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': crypto_name,
                'recent': [value.crypto_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.crypto_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#9
0
def get_fund_recent_data(fund, country, as_json=False, order='ascending', debug=False):
    """
    This function retrieves recent historical data from the introduced `fund` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        fund (:obj:`str`): name of the fund to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the introduced fund is.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified fund via argument. The dataset contains the open, high, low and close
            values for the selected fund on market days.

            The return data is case we use default arguments will look like::

                date || open | high | low | close | currency
                -----||--------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        date: dd/mm/yyyy,
                        open: x,
                        high: x,
                        low: x,
                        close: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: argument error.
        IOError: funds object/file not found or unable to retrieve.
        RuntimeError: introduced fund does not match any of the indexed ones.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if fund information was unavailable or not found.

    Examples:
        >>> investpy.get_fund_recent_data(fund='bbva multiactivo conservador pp', country='spain', as_json=False, order='ascending', debug=False)
                         Open   High    Low  Close Currency
            Date
            2019-08-13  1.110  1.110  1.110  1.110      EUR
            2019-08-16  1.109  1.109  1.109  1.109      EUR
            2019-08-19  1.114  1.114  1.114  1.114      EUR
            2019-08-20  1.112  1.112  1.112  1.112      EUR
            2019-08-21  1.115  1.115  1.115  1.115      EUR

    """

    if not fund:
        raise ValueError("ERR#0029: fund parameter is mandatory and must be a valid fund name.")

    if not isinstance(fund, str):
        raise ValueError("ERR#0028: fund argument needs to be a str.")

    if country is None:
        raise ValueError("ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not isinstance(debug, bool):
        raise ValueError("ERR#0033: debug argument can just be a boolean value, either True or False.")

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'funds', 'funds.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        funds = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0057: funds file not found or errored.")

    if funds is None:
        raise IOError("ERR#0005: funds object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_fund_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.")

    funds = funds[funds['country'] == unidecode.unidecode(country.lower())]

    fund = fund.strip()
    fund = fund.lower()

    if unidecode.unidecode(fund) not in [unidecode.unidecode(value.lower()) for value in funds['name'].tolist()]:
        raise RuntimeError("ERR#0019: fund " + fund + " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced fund on Investing.com')

    symbol = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'symbol']
    id_ = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'id']
    name = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'name']

    fund_currency = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'currency']

    logger.info(str(fund) + ' found on Investing.com')

    header = "Datos históricos " + symbol

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": "Daily",
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": user_agent.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/instruments/HistoricalDataAjax"

    logger.info('Request sent to Investing.com!')

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    logger.info('Request to Investing.com data succeeded with code ' + str(req.status_code) + '!')

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        logger.info('Data parsing process starting...')

        for elements_ in path_:
            info = []
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.text_content())

            if info[0] == 'No se encontraron resultados':
                raise IndexError("ERR#0008: fund information unavailable or not found.")

            fund_date = datetime.datetime.strptime(info[0].replace('.', '-'), '%d-%m-%Y')
            fund_close = float(info[1].replace('.', '').replace(',', '.'))
            fund_open = float(info[2].replace('.', '').replace(',', '.'))
            fund_high = float(info[3].replace('.', '').replace(',', '.'))
            fund_low = float(info[4].replace('.', '').replace(',', '.'))

            result.insert(len(result), Data(fund_date, fund_open, fund_high, fund_low,
                                            fund_close, None, fund_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        logger.info('Data parsing process finished...')

        if as_json is True:
            json_ = {'name': name,
                     'recent':
                         [value.fund_as_json() for value in result]
                     }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records([value.fund_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#10
0
def get_etf_recent_data(etf,
                        country,
                        as_json=False,
                        order='ascending',
                        interval='Daily'):
    """
    This function retrieves recent historical data from the introduced `etf` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        etf (:obj:`str`): name of the etf to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the etf is.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified etf via argument. The dataset contains the open, high, low and close
            values for the selected etf on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close | currency | exchange
                -----||--------------------------------------|---------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            currency: x,
                            exchange: x,
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the arguments is not valid or errored.
        IOError: raised if etfs object/file not found or unable to retrieve.
        RuntimeError:raised if the introduced etf does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if etf information was unavailable or not found.

    Examples:
        >>> investpy.get_etf_recent_data(etf='bbva accion dj eurostoxx 50', country='spain')
                          Open    High     Low   Close Currency Exchange
            Date
            2019-08-13  33.115  33.780  32.985  33.585      EUR   Madrid
            2019-08-14  33.335  33.335  32.880  32.905      EUR   Madrid
            2019-08-15  32.790  32.925  32.455  32.845      EUR   Madrid
            2019-08-16  33.115  33.200  33.115  33.305      EUR   Madrid
            2019-08-19  33.605  33.735  33.490  33.685      EUR   Madrid

    """

    if not etf:
        raise ValueError(
            "ERR#0031: etf parameter is mandatory and must be a valid etf name."
        )

    if not isinstance(etf, str):
        raise ValueError("ERR#0030: etf argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'etfs', 'etfs.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        etfs = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0058: etfs file not found or errored.")

    if etfs is None:
        raise IOError("ERR#0009: etfs object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_etf_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    etfs = etfs[etfs['country'] == unidecode.unidecode(country.lower())]

    etf = etf.strip()
    etf = etf.lower()

    if unidecode.unidecode(etf) not in [
            unidecode.unidecode(value.lower())
            for value in etfs['name'].tolist()
    ]:
        raise RuntimeError("ERR#0019: etf " + etf +
                           " not found, check if it is correct.")

    found_etfs = etfs[etfs['name'].str.lower() == etf]

    if len(found_etfs) > 1:
        warnings.warn(
            'Note that the displayed information can differ depending on the stock exchange.',
            Warning)

    del found_etfs

    symbol = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'symbol']
    id_ = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'id']
    name = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'name']

    etf_currency = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(),
                            'currency']

    header = symbol + ' Historical Data'

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(
                    ".//td")[0].text_content() == 'No results found':
                raise IndexError(
                    "ERR#0010: etf information unavailable or not found.")

            info = []

            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            etf_date = datetime.fromtimestamp(int(info[0]))
            etf_date = date(etf_date.year, etf_date.month, etf_date.day)

            etf_close = float(info[1].replace(',', ''))
            etf_open = float(info[2].replace(',', ''))
            etf_high = float(info[3].replace(',', ''))
            etf_low = float(info[4].replace(',', ''))

            result.insert(
                len(result),
                Data(etf_date, etf_open, etf_high, etf_low, etf_close, None,
                     etf_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.etf_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.etf_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#11
0
def get_commodity_historical_data(commodity, from_date, to_date, country=None, as_json=False, order='ascending', interval='Daily'):
    """
    This function retrieves historical data from the introduced commodity from Investing.com. So on, the historical data
    of the introduced commodity in the specified date range will be retrieved and returned as a :obj:`pandas.DataFrame` 
    if the parameters are valid and the request to Investing.com succeeds. Note that additionally some optional parameters 
    can be specified: as_json and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, 
    and if the historical data is going to be ordered ascending or descending (where the index is the date), respectively.

    Args:
        commodity (:obj:`str`): name of the commodity to retrieve recent data from.
        from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved.
        to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved.
        country (:obj:`str`, optional):
            name of the country to retrieve the commodity data from (if there is more than one country that 
            provides data from the same commodity).
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            historical data of the specified commodity. So on, the resulting dataframe contains the open, high, low and close
            values for the selected commodity on market days and the currency in which those values are presented.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency 
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx 

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if commodities object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced commodity was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if commodity historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_historical_data(commodity='gold', from_date='01/01/2018', to_date='01/01/2019')
                          Open    High     Low   Close  Volume Currency
            Date                                                       
            2018-01-01  1305.8  1309.7  1304.6  1308.7       0      USD
            2018-01-02  1370.5  1370.5  1370.5  1370.5      97      USD
            2018-01-03  1372.0  1372.0  1369.0  1374.2      22      USD
            2018-01-04  1363.4  1375.6  1362.7  1377.4      13      USD
            2018-01-05  1377.8  1377.8  1377.8  1378.4      10      USD

    """

    if not commodity:
        raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.")

    if not isinstance(commodity, str):
        raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")
    
    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not interval:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if not isinstance(interval, str):
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError("ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError("ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError("ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'.")

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 19:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': start_date.replace(year=start_date.year + 19).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 19, day=start_date.day + 1)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'commodities', 'commodities.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        commodities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0075: commodities file not found or errored.")

    if commodities is None:
        raise IOError("ERR#0076: commodities not found or unable to retrieve.")

    commodity = commodity.strip()
    commodity = commodity.lower()

    if unidecode.unidecode(commodity) not in [unidecode.unidecode(value.lower()) for value in commodities['name'].tolist()]:
        raise RuntimeError("ERR#0079: commodity " + commodity + " not found, check if it is correct.")

    if country is None:
        found_commodities = commodities[commodities['name'].str.lower() == commodity]
        
        if len(found_commodities) > 1:
            msg = "Note that the displayed commodity data can differ depending on the country. " \
                "If you want to retrieve " + commodity + " data from either " + \
                " or ".join(found_commodities['country'].tolist()) + ", specify the country parameter."
            warnings.warn(msg, Warning)

        del found_commodities
    else:
        if unidecode.unidecode(country.lower()) not in commodities['country'].unique().tolist():
            raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.")

        commodities = commodities[commodities['country'] == unidecode.unidecode(country.lower())]

    full_name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'full_name']
    id_ = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'id']
    name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'name']

    currency = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'currency']

    header = full_name + ' Historical Data'

    final = list()

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError("ERR#0080: commodity information unavailable or not found.")
                else:
                    data_flag = True
                
                info = []
            
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if data_flag is True:
                    commodity_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')
                    
                    commodity_close = float(info[1].replace(',', ''))
                    commodity_open = float(info[2].replace(',', ''))
                    commodity_high = float(info[3].replace(',', ''))
                    commodity_low = float(info[4].replace(',', ''))

                    commodity_volume = int(info[5])

                    result.insert(len(result),
                                  Data(commodity_date, commodity_open, commodity_high, commodity_low,
                                       commodity_close, commodity_volume, currency, None))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'recent':
                            [value.commodity_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records([value.commodity_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError("ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#12
0
def get_commodity_recent_data(commodity, country=None, as_json=False, order='ascending', interval='Daily'):
    """
    This function retrieves recent historical data from the introduced commodity from Investing.com, which will be
    returned as a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. 
    Note that additionally some optional parameters can be specified: as_json and order, which let the user decide 
    if the data is going to be returned as a :obj:`json` or not, and if the historical data is going to be ordered 
    ascending or descending (where the index is the date), respectively.

    Args:
        commodity (:obj:`str`): name of the commodity to retrieve recent data from.
        country (:obj:`str`, optional):
            name of the country to retrieve the commodity data from (if there is more than one country that 
            provides data from the same commodity).
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            recent data of the specified commodity. So on, the resulting dataframe contains the open, high, low and close
            values for the selected commodity on market days and the currency in which those values are presented.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency 
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx 

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if commodities object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced commodity was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if commodity recent data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_commodity_recent_data(commodity='gold')
                          Open    High     Low   Close  Volume Currency
            Date                                                       
            2019-10-25  1506.4  1520.9  1503.1  1505.3  368743      USD
            2019-10-28  1507.4  1510.8  1492.3  1495.8  318126      USD
            2019-10-29  1494.3  1497.1  1485.6  1490.7  291980      USD
            2019-10-30  1490.5  1499.3  1483.1  1496.7  353638      USD
            2019-10-31  1498.8  1516.7  1496.0  1514.8  390013      USD

    """

    if not commodity:
        raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.")

    if not isinstance(commodity, str):
        raise ValueError("ERR#0078: commodity parameter is mandatory and must be a valid commodity name.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError("ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError("ERR#0003: order argument can just be ascending (asc) or descending (desc), str type.")

    if not interval:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if not isinstance(interval, str):
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError("ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'.")

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'commodities', 'commodities.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        commodities = pd.read_csv(pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0075: commodities file not found or errored.")

    if commodities is None:
        raise IOError("ERR#0076: commodities not found or unable to retrieve.")

    commodity = commodity.strip()
    commodity = commodity.lower()

    if unidecode.unidecode(commodity) not in [unidecode.unidecode(value.lower()) for value in commodities['name'].tolist()]:
        raise RuntimeError("ERR#0079: commodity " + commodity + " not found, check if it is correct.")

    if country is None:
        found_commodities = commodities[commodities['name'].str.lower() == commodity]
        
        if len(found_commodities) > 1:
            msg = "Note that the displayed commodity data can differ depending on the country. " \
                "If you want to retrieve " + commodity + " data from either " + \
                " or ".join(found_commodities['country'].tolist()) + ", specify the country parameter."
            warnings.warn(msg, Warning)

        del found_commodities
    else:
        if unidecode.unidecode(country.lower()) not in commodities['country'].unique().tolist():
            raise RuntimeError("ERR#0034: country " + country.lower() + " not found, check if it is correct.")

        commodities = commodities[commodities['country'] == unidecode.unidecode(country.lower())]

    full_name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'full_name']
    id_ = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'id']
    name = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'name']

    currency = commodities.loc[(commodities['name'].str.lower() == commodity).idxmax(), 'currency']

    header = full_name + ' Historical Data'

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) + ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(".//td")[0].text_content() == 'No results found':
                raise IndexError("ERR#0080: commodity information unavailable or not found.")

            info = []

            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            commodity_date = datetime.strptime(str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')
            
            commodity_close = float(info[1].replace(',', ''))
            commodity_open = float(info[2].replace(',', ''))
            commodity_high = float(info[3].replace(',', ''))
            commodity_low = float(info[4].replace(',', ''))

            commodity_volume = int(info[5])

            result.insert(len(result),
                          Data(commodity_date, commodity_open, commodity_high, commodity_low,
                               commodity_close, commodity_volume, currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent':
                    [value.commodity_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records([value.commodity_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#13
0
def get_etf_historical_data(etf,
                            country,
                            from_date,
                            to_date,
                            as_json=False,
                            order='ascending',
                            debug=False):
    """
    This function retrieves historical data from the introduced `etf` from Investing via Web Scraping on the 
    introduced date range. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a 
    :obj:`json` object with `ascending` or `descending` order.

    Args:
        etf (:obj:`str`): name of the etf to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the etf is.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified etf via argument. The dataset contains the open, high, low and close
            values for the selected etf on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close | currency | exchange
                -----||--------------------------------------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx | xxxxxxxx 

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            currency: x,
                            exchange: x,
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the arguments is not valid or errored.
        IOError: raised if etfs object/file not found or unable to retrieve.
        RuntimeError:raised if the introduced etf does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if etf information was unavailable or not found.

    Examples:
        >>> investpy.get_etf_historical_data(etf='bbva accion dj eurostoxx 50', country='spain', from_date='01/01/2010', to_date='01/01/2019')
                         Open   High    Low  Close Currency Exchange
            Date
            2011-12-07  23.70  23.70  23.70  23.62      EUR   Madrid
            2011-12-08  23.53  23.60  23.15  23.04      EUR   Madrid
            2011-12-09  23.36  23.60  23.36  23.62      EUR   Madrid
            2011-12-12  23.15  23.26  23.00  22.88      EUR   Madrid
            2011-12-13  22.88  22.88  22.88  22.80      EUR   Madrid

    """

    if not etf:
        raise ValueError(
            "ERR#0031: etf parameter is mandatory and must be a valid etf name."
        )

    if not isinstance(etf, str):
        raise ValueError("ERR#0030: etf argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    try:
        datetime.datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'etfs', 'etfs.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        etfs = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0058: etfs file not found or errored.")

    if etfs is None:
        raise IOError("ERR#0009: etfs object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_etf_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    etfs = etfs[etfs['country'] == unidecode.unidecode(country.lower())]

    etf = etf.strip()
    etf = etf.lower()

    if unidecode.unidecode(etf) not in [
            unidecode.unidecode(value.lower())
            for value in etfs['name'].tolist()
    ]:
        raise RuntimeError("ERR#0019: etf " + str(etf) + " not found in " +
                           str(country.lower()) + ", check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced etf on Investing.com')

    found_etfs = etfs[etfs['name'].str.lower() == etf]

    if len(found_etfs) > 1:
        warnings.warn(
            'Note that the displayed information can differ depending on the stock exchange.',
            Warning)

    del found_etfs

    symbol = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'symbol']
    id_ = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'id']
    name = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(), 'name']

    etf_currency = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(),
                            'currency']

    logger.info(str(etf) + ' found on Investing.com')

    final = list()

    header = "Datos históricos " + symbol

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://es.investing.com/instruments/HistoricalDataAjax"

        logger.info('Request sent to Investing.com!')

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        logger.info('Request to Investing.com data succeeded with code ' +
                    str(req.status_code) + '!')

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        result = list()

        if path_:
            logger.info('Data parsing process starting...')

            for elements_ in path_:
                info = []

                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.text_content())

                if info[0] == 'No se encontraron resultados':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0010: etf information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    etf_date = datetime.datetime.strptime(
                        info[0].replace('.', '-'), '%d-%m-%Y')
                    etf_close = float(info[1].replace('.',
                                                      '').replace(',', '.'))
                    etf_open = float(info[2].replace('.',
                                                     '').replace(',', '.'))
                    etf_high = float(info[3].replace('.',
                                                     '').replace(',', '.'))
                    etf_low = float(info[4].replace('.', '').replace(',', '.'))

                    result.insert(
                        len(result),
                        Data(etf_date, etf_open, etf_high, etf_low, etf_close,
                             None, etf_currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.etf_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.etf_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#14
0
def get_currency_cross_recent_data(currency_cross,
                                   as_json=False,
                                   order='ascending',
                                   debug=False):
    """
    This function retrieves recent historical data from the introduced `currency_cross` as indexed in Investing.com
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        currency_cross (:obj:`str`): name of the currency_cross to retrieve recent historical data from.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, `True` or `False`, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified currency_cross via argument. The dataset contains the open, high, low, close,
            volume and currency values for the selected currency_cross on market days.

            The return data is in case we use default arguments will look like::

                date || open | high | low | close | volume | currency
                -----||------------------------------------|---------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        dd/mm/yyyy: {
                            'open': x,
                            'high': x,
                            'low': x,
                            'close': x,
                            'volume': x,
                            'currency' : x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if any of the introduced arguments was not valid or errored.
        IOError: raised if currency_crosses object/file not found or unable to retrieve.
        RuntimeError: raised introduced currency_cross does not match any of the indexed ones.
        ConnectionError: raised if GET request did not return 200 status code.
        IndexError: raised if currency_cross information was unavailable or not found.

    Examples:
        >>> investpy.get_currency_cross_recent_data(currency_cross='EUR/USD')
                          Open    High     Low   Close  Volume Currency
            Date
            2019-08-27  1.1101  1.1116  1.1084  1.1091       0      USD
            2019-08-28  1.1090  1.1099  1.1072  1.1078       0      USD
            2019-08-29  1.1078  1.1093  1.1042  1.1057       0      USD
            2019-08-30  1.1058  1.1062  1.0963  1.0991       0      USD
            2019-09-02  1.0990  1.1000  1.0958  1.0968       0      USD

    """

    if not currency_cross:
        raise ValueError(
            "ERR#0052: currency_cross param is mandatory and should be a str.")

    if not isinstance(currency_cross, str):
        raise ValueError(
            "ERR#0052: currency_cross param is mandatory and should be a str.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(
        ('resources', 'currency_crosses', 'currency_crosses.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        currency_crosses = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError(
            "ERR#0060: currency_crosses file not found or errored.")

    if currency_crosses is None:
        raise IOError(
            "ERR#0050: currency_crosses not found or unable to retrieve.")

    currency_cross = currency_cross.strip()
    currency_cross = currency_cross.lower()

    if unidecode.unidecode(currency_cross) not in [
            unidecode.unidecode(value.lower())
            for value in currency_crosses['name'].tolist()
    ]:
        raise RuntimeError("ERR#0054: the introduced currency_cross " +
                           str(currency_cross) + " does not exists.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced currency_cross on Investing.com')

    id_ = currency_crosses.loc[(
        currency_crosses['name'].str.lower() == currency_cross).idxmax(), 'id']
    name = currency_crosses.loc[(
        currency_crosses['name'].str.lower() == currency_cross).idxmax(),
                                'name']
    currency = currency_crosses.loc[(
        currency_crosses['name'].str.lower() == currency_cross).idxmax(),
                                    'second']

    logger.info(str(currency_cross) + ' found on Investing.com')

    header = "Datos históricos " + name

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": "Daily",
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": user_agent.get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://es.investing.com/instruments/HistoricalDataAjax"

    logger.info('Request sent to Investing.com!')

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    logger.info('Request to Investing.com data succeeded with code ' +
                str(req.status_code) + '!')

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
    result = list()

    if path_:
        logger.info('Data parsing process starting...')

        for elements_ in path_:
            info = []
            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.text_content())

            if info[0] == 'No se encontraron resultados':
                raise IndexError(
                    "ERR#0055: currency_cross information unavailable or not found."
                )

            currency_cross_date = datetime.datetime.strptime(
                info[0].replace('.', '-'), '%d-%m-%Y')
            currency_cross_close = float(info[1].replace('.',
                                                         '').replace(',', '.'))
            currency_cross_open = float(info[2].replace('.',
                                                        '').replace(',', '.'))
            currency_cross_high = float(info[3].replace('.',
                                                        '').replace(',', '.'))
            currency_cross_low = float(info[4].replace('.',
                                                       '').replace(',', '.'))

            currency_cross_volume = 0

            if info[5].__contains__('K'):
                currency_cross_volume = int(
                    float(info[5].replace('K', '').replace('.', '').replace(
                        ',', '.')) * 1e3)
            elif info[5].__contains__('M'):
                currency_cross_volume = int(
                    float(info[5].replace('M', '').replace('.', '').replace(
                        ',', '.')) * 1e6)
            elif info[5].__contains__('B'):
                currency_cross_volume = int(
                    float(info[5].replace('B', '').replace('.', '').replace(
                        ',', '.')) * 1e9)

            result.insert(
                len(result),
                Data(currency_cross_date, currency_cross_open,
                     currency_cross_high, currency_cross_low,
                     currency_cross_close, currency_cross_volume, currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        logger.info('Data parsing process finished...')

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.currency_cross_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.currency_cross_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#15
0
def get_bond_recent_data(bond,
                         as_json=False,
                         order='ascending',
                         interval='Daily'):
    """
    This function retrieves recent historical data from the introduced bond from Investing.com. So on, the recent data
    of the introduced bond will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid and 
    the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json 
    and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the recent 
    data is going to be ordered ascending or descending (where the index is the date), respectively.

    Args:
        bond (:obj:`str`): name of the bond to retrieve recent historical data from.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            recent data of the specified bond. So on, the resulting dataframe contains the open, high, low and close 
            values for the selected bond on market days.

            The resulting recent data, in case that the default parameters were applied, will look like::

                Date || Open | High | Low | Close 
                -----||------|------|-----|-------
                xxxx || xxxx | xxxx | xxx | xxxxx 

            but in case that as_json parameter was defined as True, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if bonds object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced bond was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if bond historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_bond_recent_data(bond='Argentina 3Y')
                          Open    High     Low   Close
            Date                                      
            2019-09-23  52.214  52.214  52.214  52.214
            2019-09-24  52.323  52.323  52.323  52.323
            2019-09-25  52.432  52.432  52.432  52.432
            2019-09-26  52.765  52.765  52.765  52.765
            2019-09-27  52.876  52.876  52.876  52.876
    
    """

    if not bond:
        raise ValueError(
            "ERR#0066: bond parameter is mandatory and must be a valid bond name."
        )

    if not isinstance(bond, str):
        raise ValueError("ERR#0067: bond argument needs to be a str.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'bonds', 'bonds.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        bonds = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0064: bonds file not found or errored.")

    if bonds is None:
        raise IOError(
            "ERR#0065: bonds object not found or unable to retrieve.")

    bond = bond.strip()
    bond = bond.lower()

    if unidecode.unidecode(bond) not in [
            unidecode.unidecode(value.lower())
            for value in bonds['name'].tolist()
    ]:
        raise RuntimeError("ERR#0068: bond " + bond +
                           " not found, check if it is correct.")

    id_ = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'id']
    name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'name']
    full_name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(),
                          'full_name']

    header = full_name + " Bond Yield Historical Data"

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(
                    ".//td")[0].text_content() == 'No results found':
                raise IndexError(
                    "ERR#0069: bond information unavailable or not found.")

            info = []

            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            bond_date = datetime.fromtimestamp(int(info[0]))
            bond_date = date(bond_date.year, bond_date.month, bond_date.day)

            bond_close = float(info[1].replace(',', ''))
            bond_open = float(info[2].replace(',', ''))
            bond_high = float(info[3].replace(',', ''))
            bond_low = float(info[4].replace(',', ''))

            result.insert(
                len(result),
                Data(bond_date, bond_open, bond_high, bond_low, bond_close,
                     None, None))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.bond_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.bond_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#16
0
def get_index_recent_data(index,
                          country,
                          as_json=False,
                          order='ascending',
                          interval='Daily'):
    """
    This function retrieves recent historical data from the introduced `index` from Investing
    via Web Scraping. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a
    :obj:`json` file, with `ascending` or `descending` order.

    Args:
        index (:obj:`str`): name of the index to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the index is.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified index via argument. The dataset contains the open, high, low, close and volume
            values for the selected index on market days, additionally the currency value is returned.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    recent: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if there was an argument error.
        IOError: raised if indices object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced index does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if index information was unavailable or not found.

    Examples:
        >>> investpy.get_index_recent_data(index='ibex 35', country='spain')
                           Open     High      Low    Close   Volume Currency
            Date
            2019-08-26  12604.7  12646.3  12510.4  12621.3  4770000      EUR
            2019-08-27  12618.3  12723.3  12593.6  12683.8  8230000      EUR
            2019-08-28  12657.2  12697.2  12585.1  12642.5  7300000      EUR
            2019-08-29  12637.2  12806.6  12633.8  12806.6  5650000      EUR
            2019-08-30  12767.6  12905.9  12756.9  12821.6  6040000      EUR

    """

    if not index:
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if not isinstance(index, str):
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'indices', 'indices.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        indices = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0059: indices file not found or errored.")

    if indices is None:
        raise IOError("ERR#0037: indices not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_index_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    indices = indices[indices['country'] == unidecode.unidecode(
        country.lower())]

    index = index.strip()
    index = index.lower()

    if unidecode.unidecode(index) not in [
            unidecode.unidecode(value.lower())
            for value in indices['name'].tolist()
    ]:
        raise RuntimeError("ERR#0045: index " + index +
                           " not found, check if it is correct.")

    full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(),
                            'full_name']
    id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id']
    name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name']

    index_currency = indices.loc[(
        indices['name'].str.lower() == index).idxmax(), 'currency']

    header = full_name + ' Historical Data'

    params = {
        "curr_id": id_,
        "smlID": str(randint(1000000, 99999999)),
        "header": header,
        "interval_sec": interval,
        "sort_col": "date",
        "sort_ord": "DESC",
        "action": "historical_data"
    }

    head = {
        "User-Agent": get_random(),
        "X-Requested-With": "XMLHttpRequest",
        "Accept": "text/html",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
    }

    url = "https://www.investing.com/instruments/HistoricalDataAjax"

    req = requests.post(url, headers=head, data=params)

    if req.status_code != 200:
        raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                              ", try again later.")

    root_ = fromstring(req.text)
    path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

    result = list()

    if path_:
        for elements_ in path_:
            if elements_.xpath(
                    ".//td")[0].text_content() == 'No results found':
                raise IndexError(
                    "ERR#0046: index information unavailable or not found.")

            info = []

            for nested_ in elements_.xpath(".//td"):
                info.append(nested_.get('data-real-value'))

            index_date = datetime.strptime(
                str(datetime.fromtimestamp(int(info[0])).date()), '%Y-%m-%d')

            index_close = float(info[1].replace(',', ''))
            index_open = float(info[2].replace(',', ''))
            index_high = float(info[3].replace(',', ''))
            index_low = float(info[4].replace(',', ''))

            index_volume = int(info[5])

            result.insert(
                len(result),
                Data(index_date, index_open, index_high, index_low,
                     index_close, index_volume, index_currency))

        if order in ['ascending', 'asc']:
            result = result[::-1]
        elif order in ['descending', 'desc']:
            result = result

        if as_json is True:
            json_ = {
                'name': name,
                'recent': [value.index_as_json() for value in result]
            }

            return json.dumps(json_, sort_keys=False)
        elif as_json is False:
            df = pd.DataFrame.from_records(
                [value.index_to_dict() for value in result])
            df.set_index('Date', inplace=True)

            return df
    else:
        raise RuntimeError("ERR#0004: data retrieval error while scraping.")
示例#17
0
def get_etf_historical_data(etf,
                            country,
                            from_date,
                            to_date,
                            stock_exchange=None,
                            as_json=False,
                            order='ascending',
                            interval='Daily'):
    """
    This function retrieves historical data from the introduced `etf` from Investing via Web Scraping on the 
    introduced date range. The resulting data can it either be stored in a :obj:`pandas.DataFrame` or in a 
    :obj:`json` object with `ascending` or `descending` order.

    Args:
        etf (:obj:`str`): name of the etf to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the etf is.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified etf via argument. The dataset contains the open, high, low and close
            values for the selected etf on market days.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Currency | Exchange
                -----||------|------|-----|-------|----------|---------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            currency: x,
                            exchange: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the arguments is not valid or errored.
        IOError: raised if etfs object/file not found or unable to retrieve.
        RuntimeError:raised if the introduced etf does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if etf information was unavailable or not found.

    Examples:
        >>> investpy.get_etf_historical_data(etf='bbva accion dj eurostoxx 50', country='spain', from_date='01/01/2010', to_date='01/01/2019')
                         Open   High    Low  Close Currency Exchange
            Date
            2011-12-07  23.70  23.70  23.70  23.62      EUR   Madrid
            2011-12-08  23.53  23.60  23.15  23.04      EUR   Madrid
            2011-12-09  23.36  23.60  23.36  23.62      EUR   Madrid
            2011-12-12  23.15  23.26  23.00  22.88      EUR   Madrid
            2011-12-13  22.88  22.88  22.88  22.80      EUR   Madrid

    """

    if not etf:
        raise ValueError(
            "ERR#0031: etf parameter is mandatory and must be a valid etf name."
        )

    if not isinstance(etf, str):
        raise ValueError("ERR#0030: etf argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if stock_exchange is not None and not isinstance(stock_exchange, str):
        raise ValueError(
            "ERR#0125: specified stock_exchange value is not valid, it should be a str."
        )

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 19:
            obj = {
                'start':
                start_date.strftime('%m/%d/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   19).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 19,
                                            day=start_date.day + 1)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'etfs', 'etfs.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        etfs = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0058: etfs file not found or errored.")

    if etfs is None:
        raise IOError("ERR#0009: etfs object not found or unable to retrieve.")

    country = unidecode.unidecode(country.strip().lower())

    if country not in get_etf_countries():
        raise RuntimeError("ERR#0034: country " + country +
                           " not found, check if it is correct.")

    etf = unidecode.unidecode(etf.strip().lower())

    def_exchange = etfs.loc[((etfs['name'].str.lower() == etf) &
                             (etfs['def_stock_exchange'] == True)).idxmax()]

    etfs = etfs[etfs['country'].str.lower() == country]

    if etf not in [value for value in etfs['name'].str.lower()]:
        raise RuntimeError("ERR#0019: etf " + etf +
                           " not found, check if it is correct.")

    etfs = etfs[etfs['name'].str.lower() == etf]

    if def_exchange['country'] != country:
        warnings.warn(
            'Selected country does not contain the default stock exchange of the introduced ETF. ' + \
            'Default country is: \"' + def_exchange['country'] + '\" and default stock_exchange: \"' + \
            def_exchange['stock_exchange'] + '\".',
            Warning
        )

        if stock_exchange:
            if stock_exchange.lower() not in etfs['stock_exchange'].str.lower(
            ).tolist():
                raise ValueError(
                    "ERR#0126: introduced stock_exchange value does not exists, leave this parameter to None to use default stock_exchange."
                )

            etf_exchange = etfs.loc[(etfs['stock_exchange'].str.lower() ==
                                     stock_exchange.lower()).idxmax(),
                                    'stock_exchange']
        else:
            found_etfs = etfs[etfs['name'].str.lower() == etf]

            if len(found_etfs) > 1:
                warnings.warn(
                    'Note that the displayed information can differ depending on the stock exchange. Available stock_exchange' + \
                    ' values for \"' + country + '\" are: \"' + '\", \"'.join(found_etfs['stock_exchange']) + '\".',
                    Warning
                )

            del found_etfs

            etf_exchange = etfs.loc[(etfs['name'].str.lower() == etf).idxmax(),
                                    'stock_exchange']
    else:
        if stock_exchange:
            if stock_exchange.lower() not in etfs['stock_exchange'].str.lower(
            ).tolist():
                raise ValueError(
                    "ERR#0126: introduced stock_exchange value does not exists, leave this parameter to None to use default stock_exchange."
                )

            if def_exchange['stock_exchange'].lower() != stock_exchange.lower(
            ):
                warnings.warn(
                    'Selected stock_exchange is not the default one of the introduced ETF. ' + \
                    'Default country is: \"' + def_exchange['country'] + '\" and default stock_exchange: \"' + \
                    def_exchange['stock_exchange'].lower() + '\".',
                    Warning
                )

            etf_exchange = etfs.loc[(etfs['stock_exchange'].str.lower() ==
                                     stock_exchange.lower()).idxmax(),
                                    'stock_exchange']
        else:
            etf_exchange = def_exchange['stock_exchange']

    symbol = etfs.loc[(
        (etfs['name'].str.lower() == etf) &
        (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(),
                      'symbol']
    id_ = etfs.loc[(
        (etfs['name'].str.lower() == etf) &
        (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(),
                   'id']
    name = etfs.loc[(
        (etfs['name'].str.lower() == etf) &
        (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(),
                    'name']

    etf_currency = etfs.loc[(
        (etfs['name'].str.lower() == etf) &
        (etfs['stock_exchange'].str.lower() == etf_exchange.lower())).idxmax(),
                            'currency']

    final = list()

    header = symbol + ' Historical Data'

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(
                        ".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0010: etf information unavailable or not found."
                        )
                else:
                    data_flag = True

                info = []

                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if data_flag is True:
                    etf_date = datetime.strptime(
                        str(datetime.fromtimestamp(int(info[0])).date()),
                        '%Y-%m-%d')

                    etf_close = float(info[1].replace(',', ''))
                    etf_open = float(info[2].replace(',', ''))
                    etf_high = float(info[3].replace(',', ''))
                    etf_low = float(info[4].replace(',', ''))

                    result.insert(
                        len(result),
                        Data(etf_date, etf_open, etf_high, etf_low, etf_close,
                             None, etf_currency, etf_exchange))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.etf_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.etf_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#18
0
def get_index_historical_data(index,
                              country,
                              from_date,
                              to_date,
                              as_json=False,
                              order='ascending',
                              interval='Daily'):
    """
    This function retrieves historical data of the introduced `index` (from the specified country, note that both
    index and country should match since if the introduced index is not listed in the indices of that country, the
    function will raise an error). The retrieved historical data are the OHLC values plus the Volume and the Currency in
    which those values are specified, from the introduced date range if valid. So on, the resulting data can it either be
    stored in a :obj:`pandas.DataFrame` or in a :obj:`json` file.

    Args:
        index (:obj:`str`): name of the index to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the index is.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            optional argument to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            historical data from the specified index via argument. The dataset contains the open, high, low, close and
            volume values for the selected index on market days, additionally the currency in which those values are
            specified is returned.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised if there was an argument error.
        IOError: raised if indices object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced index does not match any of the indexed ones.
        ConnectionError: raised if GET requests does not return 200 status code.
        IndexError: raised if index information was unavailable or not found.

    Examples:
        >>> investpy.get_index_historical_data(index='ibex 35', country='spain', from_date='01/01/2018', to_date='01/01/2019')
                           Open     High      Low    Close    Volume Currency
            Date
            2018-01-02  15128.2  15136.7  14996.6  15096.8  10340000      EUR
            2018-01-03  15145.0  15186.9  15091.9  15106.9  12800000      EUR
            2018-01-04  15105.5  15368.7  15103.7  15368.7  17070000      EUR
            2018-01-05  15353.9  15407.5  15348.6  15398.9  11180000      EUR
            2018-01-08  15437.1  15448.7  15344.0  15373.3  12890000      EUR

    """

    if not index:
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if not isinstance(index, str):
        raise ValueError(
            "ERR#0047: index param is mandatory and should be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect data format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%m/%d/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'indices', 'indices.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        indices = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0059: indices file not found or errored.")

    if indices is None:
        raise IOError("ERR#0037: indices not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_index_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    indices = indices[indices['country'] == unidecode.unidecode(
        country.lower())]

    index = index.strip()
    index = index.lower()

    if unidecode.unidecode(index) not in [
            unidecode.unidecode(value.lower())
            for value in indices['name'].tolist()
    ]:
        raise RuntimeError("ERR#0045: index " + index +
                           " not found, check if it is correct.")

    full_name = indices.loc[(indices['name'].str.lower() == index).idxmax(),
                            'full_name']
    id_ = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'id']
    name = indices.loc[(indices['name'].str.lower() == index).idxmax(), 'name']

    index_currency = indices.loc[(
        indices['name'].str.lower() == index).idxmax(), 'currency']

    final = list()

    header = full_name + ' Historical Data'

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(
                        ".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0046: index information unavailable or not found."
                        )
                else:
                    data_flag = True

                info = []

                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if data_flag is True:
                    index_date = datetime.strptime(
                        str(datetime.fromtimestamp(int(info[0])).date()),
                        '%Y-%m-%d')

                    index_close = float(info[1].replace(',', ''))
                    index_open = float(info[2].replace(',', ''))
                    index_high = float(info[3].replace(',', ''))
                    index_low = float(info[4].replace(',', ''))

                    index_volume = int(info[5])

                    result.insert(
                        len(result),
                        Data(index_date, index_open, index_high, index_low,
                             index_close, index_volume, index_currency))
            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.index_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.index_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#19
0
def get_crypto_historical_data(crypto,
                               from_date,
                               to_date,
                               as_json=False,
                               order='ascending',
                               interval='Daily'):
    """
    This function retrieves historical data from the introduced crypto from Investing.com. So on, the historical data
    of the introduced crypto will be retrieved and returned as a :obj:`pandas.DataFrame` if the parameters are valid 
    and the request to Investing.com succeeds. Note that additionally some optional parameters can be specified: as_json 
    and order, which let the user decide if the data is going to be returned as a :obj:`json` or not, and if the historical 
    data is going to be ordered ascending or descending (where the index is the date), respectively.

    Args:
        crypto (:obj:`str`): name of the crypto currency to retrieve data from.
        from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved.
        to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function can return either a :obj:`pandas.DataFrame` or a :obj:`json` object, containing the retrieved
            historical data of the specified crypto currency. So on, the resulting dataframe contains the open, high, 
            low, close and volume values for the selected crypto on market days and the currency in which those values are presented.

            The returned data is case we use default arguments will look like::

                Date || Open | High | Low | Close | Volume | Currency 
                -----||------|------|-----|-------|--------|----------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx 

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: 'dd/mm/yyyy',
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if cryptos object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced crypto currency name was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if crypto historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_crypto_historical_data(crypto='bitcoin', from_date='01/01/2018', to_date='01/01/2019')
                           Open     High      Low    Close  Volume Currency
            Date                                                           
            2018-01-01  13850.5  13921.5  12877.7  13444.9   78425      USD
            2018-01-02  13444.9  15306.1  12934.2  14754.1  137732      USD
            2018-01-03  14754.1  15435.0  14579.7  15156.6  106543      USD
            2018-01-04  15156.5  15408.7  14244.7  15180.1  110969      USD
            2018-01-05  15180.1  17126.9  14832.4  16954.8  141960      USD

    """

    if not crypto:
        raise ValueError(
            "ERR#0083: crypto parameter is mandatory and must be a valid crypto name."
        )

    if not isinstance(crypto, str):
        raise ValueError("ERR#0084: crypto argument needs to be a str.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'."
        )

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 19:
            obj = {
                'start':
                start_date.strftime('%m/%d/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   19).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 19)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'crypto', 'cryptos.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        cryptos = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0081: cryptos file not found or errored.")

    if cryptos is None:
        raise IOError("ERR#0082: cryptos not found or unable to retrieve.")

    crypto = crypto.strip()
    crypto = crypto.lower()

    if unidecode.unidecode(crypto) not in [
            unidecode.unidecode(value.lower())
            for value in cryptos['name'].tolist()
    ]:
        raise RuntimeError("ERR#0085: crypto currency: " + crypto +
                           ", not found, check if it is correct.")

    status = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(),
                         'status']
    if status == 'unavailable':
        raise ValueError(
            "ERR#0086: the selected crypto currency is not available for retrieval in Investing.com."
        )

    crypto_name = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(),
                              'name']
    crypto_id = cryptos.loc[(cryptos['name'].str.lower() == crypto).idxmax(),
                            'id']
    crypto_currency = cryptos.loc[(
        cryptos['name'].str.lower() == crypto).idxmax(), 'currency']

    header = crypto_name + ' Historical Data'

    final = list()

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": crypto_id,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(
                        ".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0087: crypto information unavailable or not found."
                        )
                else:
                    data_flag = True

                info = []

                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if data_flag is True:
                    crypto_date = datetime.strptime(
                        str(datetime.fromtimestamp(int(info[0])).date()),
                        '%Y-%m-%d')

                    crypto_close = float(info[1].replace(',', ''))
                    crypto_open = float(info[2].replace(',', ''))
                    crypto_high = float(info[3].replace(',', ''))
                    crypto_low = float(info[4].replace(',', ''))

                    crypto_volume = int(info[5])

                    result.insert(
                        len(result),
                        Data(crypto_date, crypto_open, crypto_high, crypto_low,
                             crypto_close, crypto_volume, crypto_currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': crypto_name,
                        'historical':
                        [value.crypto_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.crypto_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#20
0
def get_fund_historical_data(fund,
                             country,
                             from_date,
                             to_date,
                             as_json=False,
                             order='ascending',
                             interval='Daily'):
    """
    This function retrieves historical data from the introduced `fund` from Investing
    via Web Scraping on the introduced date range. The resulting data can it either be
    stored in a :obj:`pandas.DataFrame` or in a :obj:`json` object with `ascending` or `descending` order.

    Args:
        fund (:obj:`str`): name of the fund to retrieve recent historical data from.
        country (:obj:`str`): name of the country from where the introduced fund is.
        from_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, from where data is going to be retrieved.
        to_date (:obj:`str`): date as `str` formatted as `dd/mm/yyyy`, until where data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data (:obj:`pandas.DataFrame` or :obj:`json`).
        order (:obj:`str`, optional):
            optional argument to define the order of the retrieved data (`ascending`, `asc` or `descending`, `desc`).
        interval (:obj:`str`, optional):
            value to define the historical data interval to retrieve, by default `Daily`, but it can also be `Weekly` or `Monthly`.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified fund via argument. The dataset contains the open, high, low and close
            values for the selected fund on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close | currency
                -----||--------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        {
                            date: dd/mm/yyyy,
                            open: x,
                            high: x,
                            low: x,
                            close: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: argument error.
        IOError: funds object/file not found or unable to retrieve.
        RuntimeError: introduced fund does not match any of the indexed ones.
        ConnectionError: if GET requests does not return 200 status code.
        IndexError: if fund information was unavailable or not found.

    Examples:
        >>> investpy.get_fund_historical_data(fund='bbva multiactivo conservador pp', country='spain', from_date='01/01/2010', to_date='01/01/2019')
                         Open   High    Low  Close Currency
            Date
            2018-02-15  1.105  1.105  1.105  1.105      EUR
            2018-02-16  1.113  1.113  1.113  1.113      EUR
            2018-02-17  1.113  1.113  1.113  1.113      EUR
            2018-02-18  1.113  1.113  1.113  1.113      EUR
            2018-02-19  1.111  1.111  1.111  1.111      EUR

    """

    if not fund:
        raise ValueError(
            "ERR#0029: fund parameter is mandatory and must be a valid fund name."
        )

    if not isinstance(fund, str):
        raise ValueError("ERR#0028: fund argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not interval:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if not isinstance(interval, str):
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    if interval not in ['Daily', 'Weekly', 'Monthly']:
        raise ValueError(
            "ERR#0073: interval value should be a str type and it can just be either 'Daily', 'Weekly' or 'Monthly'."
        )

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect start date format, it should be 'dd/mm/yyyy'."
        )

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%m/%d/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%m/%d/%Y'),
                'end': end_date.strftime('%m/%d/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'funds', 'funds.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        funds = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0057: funds file not found or errored.")

    if funds is None:
        raise IOError(
            "ERR#0005: funds object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_fund_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    funds = funds[funds['country'] == unidecode.unidecode(country.lower())]

    fund = fund.strip()
    fund = fund.lower()

    if unidecode.unidecode(fund) not in [
            unidecode.unidecode(value.lower())
            for value in funds['name'].tolist()
    ]:
        raise RuntimeError("ERR#0019: fund " + fund +
                           " not found, check if it is correct.")

    symbol = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'symbol']
    id_ = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'id']
    name = funds.loc[(funds['name'].str.lower() == fund).idxmax(), 'name']

    fund_currency = funds.loc[(funds['name'].str.lower() == fund).idxmax(),
                              'currency']

    final = list()

    header = symbol + ' Historical Data'

    for index in range(len(date_interval['intervals'])):
        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": interval,
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")
        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(
                        ".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0008: fund information unavailable or not found."
                        )
                else:
                    data_flag = True

                info = []

                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.get('data-real-value'))

                if data_flag is True:
                    fund_date = datetime.fromtimestamp(int(info[0]))
                    fund_date = date(fund_date.year, fund_date.month,
                                     fund_date.day)

                    fund_close = float(info[1].replace(',', ''))
                    fund_open = float(info[2].replace(',', ''))
                    fund_high = float(info[3].replace(',', ''))
                    fund_low = float(info[4].replace(',', ''))

                    result.insert(
                        len(result),
                        Data(fund_date, fund_open, fund_high, fund_low,
                             fund_close, None, fund_currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.fund_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.fund_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#21
0
def get_bond_historical_data(bond,
                             country,
                             from_date,
                             to_date,
                             as_json=False,
                             order='ascending',
                             debug=False):
    """
    This function retrieves historical data from the introduced bond from Investing.com. So on, the historical data
    of the introduced bond from the specified country in the specified data range will be retrieved and returned as
    a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally
    some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to
    be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the
    index is the date) and whether debug messages are going to be printed or not, respectively.

    Args:
        bond (:obj:`str`): name of the bond to retrieve historical data from.
        country (:obj:`str`): name of the country from where the bond is.
        from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved.
        to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, either True or False, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified bond via argument. The dataset contains the open, high, low and close for the 
            selected bond on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close 
                -----||---------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx 

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        dd/mm/yyyy: {
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if bonds object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced bond/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if bond historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_bond_historical_data(bond='Argentina 3Y', country='argentina', from_date='01/01/2010', to_date='01/01/2019')
                        Open  High   Low  Close
            Date                               
            2011-01-03  4.15  4.15  4.15   5.15
            2011-01-04  4.07  4.07  4.07   5.45
            2011-01-05  4.27  4.27  4.27   5.71
            2011-01-10  4.74  4.74  4.74   6.27
            2011-01-11  4.30  4.30  4.30   6.56

    """

    if not bond:
        raise ValueError(
            "ERR#0066: bond parameter is mandatory and must be a valid bond name."
        )

    if not isinstance(bond, str):
        raise ValueError("ERR#0067: bond argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    try:
        datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'."
        )

    try:
        datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'bonds', 'bonds.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        bonds = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0064: bonds file not found or errored.")

    if bonds is None:
        raise IOError(
            "ERR#0065: bonds object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_bond_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    bonds = bonds[bonds['country'] == unidecode.unidecode(country.lower())]

    bond = bond.strip()
    bond = bond.lower()

    if unidecode.unidecode(bond) not in [
            unidecode.unidecode(value.lower())
            for value in bonds['name'].tolist()
    ]:
        raise RuntimeError("ERR#0068: bond " + bond +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced bond on Investing.com')

    id_ = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'id']
    name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(), 'name']
    full_name = bonds.loc[(bonds['name'].str.lower() == bond).idxmax(),
                          'full_name']

    logger.info(str(bond) + ' found on Investing.com')

    final = list()

    logger.info('Data parsing process starting...')

    header = full_name + " Bond Yield Historical Data"

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://www.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                if elements_.xpath(
                        ".//td")[0].text_content() == 'No results found':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0069: bond information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    info = []
                    for nested_ in elements_.xpath(".//td"):
                        info.append(nested_.get('data-real-value'))

                    bond_date = datetime.fromtimestamp(int(info[0]))
                    bond_date = date(bond_date.year, bond_date.month,
                                     bond_date.day)
                    bond_close = float(info[1])
                    bond_open = float(info[2])
                    bond_high = float(info[3])
                    bond_low = float(info[4])

                    result.insert(
                        len(result),
                        Data(bond_date, bond_open, bond_high, bond_low,
                             bond_close, None, None))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.bond_as_json() for value in result]
                    }

                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.bond_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)
        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)
示例#22
0
def get_stock_historical_data(stock,
                              country,
                              from_date,
                              to_date,
                              as_json=False,
                              order='ascending',
                              debug=False):
    """
    This function retrieves historical data from the introduced stock from Investing.com. So on, the historical data
    of the introduced stock from the specified country in the specified data range will be retrieved and returned as
    a :obj:`pandas.DataFrame` if the parameters are valid and the request to Investing.com succeeds. Note that additionally
    some optional parameters can be specified: as_json, order and debug, which let the user decide if the data is going to
    be returned as a :obj:`json` or not, if the historical data is going to be ordered ascending or descending (where the
    index is the date) and whether debug messages are going to be printed or not, respectively.

    Args:
        stock (:obj:`str`): symbol of the stock to retrieve historical data from.
        country (:obj:`str`): name of the country from where the stock is.
        from_date (:obj:`str`): date formatted as `dd/mm/yyyy`, since when data is going to be retrieved.
        to_date (:obj:`str`): date formatted as `dd/mm/yyyy`, until when data is going to be retrieved.
        as_json (:obj:`bool`, optional):
            to determine the format of the output data, either a :obj:`pandas.DataFrame` if False and a :obj:`json` if True.
        order (:obj:`str`, optional): to define the order of the retrieved data which can either be ascending or descending.
        debug (:obj:`bool`, optional):
            optional argument to either show or hide debug messages on log, either True or False, respectively.

    Returns:
        :obj:`pandas.DataFrame` or :obj:`json`:
            The function returns a either a :obj:`pandas.DataFrame` or a :obj:`json` file containing the retrieved
            recent data from the specified stock via argument. The dataset contains the open, high, low, close and
            volume values for the selected stock on market days.

            The returned data is case we use default arguments will look like::

                date || open | high | low | close | volume | currency
                -----||-----------------------------------------------
                xxxx || xxxx | xxxx | xxx | xxxxx | xxxxxx | xxxxxxxx

            but if we define `as_json=True`, then the output will be::

                {
                    name: name,
                    historical: [
                        dd/mm/yyyy: {
                            open: x,
                            high: x,
                            low: x,
                            close: x,
                            volume: x,
                            currency: x
                        },
                        ...
                    ]
                }

    Raises:
        ValueError: raised whenever any of the introduced arguments is not valid or errored.
        IOError: raised if stocks object/file was not found or unable to retrieve.
        RuntimeError: raised if the introduced stock/country was not found or did not match any of the existing ones.
        ConnectionError: raised if connection to Investing.com could not be established.
        IndexError: raised if stock historical data was unavailable or not found in Investing.com.

    Examples:
        >>> investpy.get_historical_data(stock='bbva', country='spain', from_date='01/01/2010', to_date='01/01/2019')
                         Open   High    Low  Close  Volume Currency
            Date
            2010-01-04  12.73  12.96  12.73  12.96       0      EUR
            2010-01-05  13.00  13.11  12.97  13.09       0      EUR
            2010-01-06  13.03  13.17  13.02  13.12       0      EUR
            2010-01-07  13.02  13.11  12.93  13.05       0      EUR
            2010-01-08  13.12  13.22  13.04  13.18       0      EUR

    """

    if not stock:
        raise ValueError(
            "ERR#0013: stock parameter is mandatory and must be a valid stock name."
        )

    if not isinstance(stock, str):
        raise ValueError("ERR#0027: stock argument needs to be a str.")

    if country is None:
        raise ValueError(
            "ERR#0039: country can not be None, it should be a str.")

    if country is not None and not isinstance(country, str):
        raise ValueError("ERR#0025: specified country value not valid.")

    if not isinstance(as_json, bool):
        raise ValueError(
            "ERR#0002: as_json argument can just be True or False, bool type.")

    if order not in ['ascending', 'asc', 'descending', 'desc']:
        raise ValueError(
            "ERR#0003: order argument can just be ascending (asc) or descending (desc), str type."
        )

    if not isinstance(debug, bool):
        raise ValueError(
            "ERR#0033: debug argument can just be a boolean value, either True or False."
        )

    try:
        datetime.datetime.strptime(from_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0011: incorrect from_date date format, it should be 'dd/mm/yyyy'."
        )

    try:
        datetime.datetime.strptime(to_date, '%d/%m/%Y')
    except ValueError:
        raise ValueError(
            "ERR#0012: incorrect to_date format, it should be 'dd/mm/yyyy'.")

    start_date = datetime.datetime.strptime(from_date, '%d/%m/%Y')
    end_date = datetime.datetime.strptime(to_date, '%d/%m/%Y')

    if start_date >= end_date:
        raise ValueError(
            "ERR#0032: to_date should be greater than from_date, both formatted as 'dd/mm/yyyy'."
        )

    date_interval = {
        'intervals': [],
    }

    flag = True

    while flag is True:
        diff = end_date.year - start_date.year

        if diff > 20:
            obj = {
                'start':
                start_date.strftime('%d/%m/%Y'),
                'end':
                start_date.replace(year=start_date.year +
                                   20).strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            start_date = start_date.replace(year=start_date.year + 20)
        else:
            obj = {
                'start': start_date.strftime('%d/%m/%Y'),
                'end': end_date.strftime('%d/%m/%Y'),
            }

            date_interval['intervals'].append(obj)

            flag = False

    interval_limit = len(date_interval['intervals'])
    interval_counter = 0

    data_flag = False

    resource_package = 'investpy'
    resource_path = '/'.join(('resources', 'stocks', 'stocks.csv'))
    if pkg_resources.resource_exists(resource_package, resource_path):
        stocks = pd.read_csv(
            pkg_resources.resource_filename(resource_package, resource_path))
    else:
        raise FileNotFoundError("ERR#0056: stocks file not found or errored.")

    if stocks is None:
        raise IOError(
            "ERR#0001: stocks object not found or unable to retrieve.")

    if unidecode.unidecode(country.lower()) not in get_stock_countries():
        raise RuntimeError("ERR#0034: country " + country.lower() +
                           " not found, check if it is correct.")

    stocks = stocks[stocks['country'] == unidecode.unidecode(country.lower())]

    stock = stock.strip()
    stock = stock.lower()

    if unidecode.unidecode(stock) not in [
            unidecode.unidecode(value.lower())
            for value in stocks['symbol'].tolist()
    ]:
        raise RuntimeError("ERR#0018: stock " + stock +
                           " not found, check if it is correct.")

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger('investpy')

    if debug is False:
        logger.disabled = True
    else:
        logger.disabled = False

    logger.info('Searching introduced stock on Investing.com')

    symbol = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(),
                        'symbol']
    id_ = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'id']
    name = stocks.loc[(stocks['symbol'].str.lower() == stock).idxmax(), 'name']

    stock_currency = stocks.loc[(
        stocks['symbol'].str.lower() == stock).idxmax(), 'currency']

    logger.info(str(stock) + ' found on Investing.com')

    final = list()

    logger.info('Data parsing process starting...')

    header = "Datos históricos " + symbol

    for index in range(len(date_interval['intervals'])):
        interval_counter += 1

        params = {
            "curr_id": id_,
            "smlID": str(randint(1000000, 99999999)),
            "header": header,
            "st_date": date_interval['intervals'][index]['start'],
            "end_date": date_interval['intervals'][index]['end'],
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }

        head = {
            "User-Agent": user_agent.get_random(),
            "X-Requested-With": "XMLHttpRequest",
            "Accept": "text/html",
            "Accept-Encoding": "gzip, deflate, br",
            "Connection": "keep-alive",
        }

        url = "https://es.investing.com/instruments/HistoricalDataAjax"

        req = requests.post(url, headers=head, data=params)

        if req.status_code != 200:
            raise ConnectionError("ERR#0015: error " + str(req.status_code) +
                                  ", try again later.")

        if not req.text:
            continue

        root_ = fromstring(req.text)
        path_ = root_.xpath(".//table[@id='curr_table']/tbody/tr")

        result = list()

        if path_:
            for elements_ in path_:
                info = []
                for nested_ in elements_.xpath(".//td"):
                    info.append(nested_.text_content())

                if info[0] == 'No se encontraron resultados':
                    if interval_counter < interval_limit:
                        data_flag = False
                    else:
                        raise IndexError(
                            "ERR#0007: stock information unavailable or not found."
                        )
                else:
                    data_flag = True

                if data_flag is True:
                    stock_date = datetime.datetime.strptime(
                        info[0].replace('.', '-'), '%d-%m-%Y')
                    stock_close = float(info[1].replace('.',
                                                        '').replace(',', '.'))
                    stock_open = float(info[2].replace('.',
                                                       '').replace(',', '.'))
                    stock_high = float(info[3].replace('.',
                                                       '').replace(',', '.'))
                    stock_low = float(info[4].replace('.',
                                                      '').replace(',', '.'))

                    stock_volume = 0

                    if info[5].__contains__('K'):
                        stock_volume = int(
                            float(info[5].replace('K', '').replace(
                                '.', '').replace(',', '.')) * 1e3)
                    elif info[5].__contains__('M'):
                        stock_volume = int(
                            float(info[5].replace('M', '').replace(
                                '.', '').replace(',', '.')) * 1e6)
                    elif info[5].__contains__('B'):
                        stock_volume = int(
                            float(info[5].replace('B', '').replace(
                                '.', '').replace(',', '.')) * 1e9)

                    result.insert(
                        len(result),
                        Data(stock_date, stock_open, stock_high, stock_low,
                             stock_close, stock_volume, stock_currency))

            if data_flag is True:
                if order in ['ascending', 'asc']:
                    result = result[::-1]
                elif order in ['descending', 'desc']:
                    result = result

                if as_json is True:
                    json_ = {
                        'name': name,
                        'historical':
                        [value.stock_as_json() for value in result]
                    }
                    final.append(json_)
                elif as_json is False:
                    df = pd.DataFrame.from_records(
                        [value.stock_to_dict() for value in result])
                    df.set_index('Date', inplace=True)

                    final.append(df)

        else:
            raise RuntimeError(
                "ERR#0004: data retrieval error while scraping.")

    logger.info('Data parsing process finished...')

    if as_json is True:
        return json.dumps(final[0], sort_keys=False)
    elif as_json is False:
        return pd.concat(final)