Exemplo n.º 1
0
class AlphaVantageScraper:
    api_key = config.get('scraper.alpha_vantage.api_key')
    lookback_days = config.get('scraper.alpha_vantage.lookback_days')
    concurrency = config.get('scraper.alpha_vantage.concurrency')
    rate_limit = config.get('scraper.alpha_vantage.rate_limit')
    default_start_date = date.today() - timedelta(days=lookback_days)

    @sleep_and_retry
    @limits(calls=rate_limit['calls'], period=rate_limit['period'])
    def get_time_series(
        self,
        yahoo_ticker: str,
        start_date: date = default_start_date,
        end_date: date = date.today()
    ) -> TimeSeries:
        """
        Returns time series for given ticker in Yahoo finance format (e.g. HSBA.L)
        :param yahoo_ticker:
        :param start_date:
        :param end_date:
        :return:
        """
        ts = AlphaVantageTimeSeries(key=self.api_key, output_format='pandas')
        data, meta_data = ts.get_daily(symbol=yahoo_ticker, outputsize='full')
        data.index = to_datetime(data.index)
        data_truncated = data.truncate(before=start_date, after=end_date)
        return [
            HistoricDataPoint(time=dt.to_pydatetime(),
                              open=row['1. open'],
                              high=row['2. high'],
                              low=row['3. low'],
                              close=row['4. close'],
                              volume=row['5. volume'])
            for dt, row in data_truncated.iterrows()
        ]
Exemplo n.º 2
0
def test_get():
    mongo_config = config.get('db.mongo')
    assert 'host' in mongo_config
    assert 'port' in mongo_config
    assert 'user' in mongo_config
    assert 'pass' in mongo_config
    assert 'name' in mongo_config
Exemplo n.º 3
0
def init_connection() -> None:
    global _db
    user = config.get('db.mongo.user')
    password = config.get('db.mongo.pass')
    host = config.get('db.mongo.host')
    port = config.get('db.mongo.port')
    db_name = config.get('db.mongo.name')
    client = MongoClient(
        'mongodb://{user}:{password}@{host}:{port}/{db_name}'.format(
            user=user,
            password=password,
            host=host,
            port=port,
            db_name=db_name))
    _db = client[db_name]

    _db.list_collection_names(
    )  # this line actually connects to database to test connection
    _log.info('Connected to MongoDB')
Exemplo n.º 4
0
def _get_log_level_from_config():
    lookup = {
        'debug': DEBUG,
        'info': INFO,
        'warn': WARN,
        'warning': WARNING,
        'error': ERROR,
        'fatal': FATAL
    }
    fallback = DEBUG
    try:
        level = config.get('log.level')
    except KeyError:
        return fallback
    else:
        return lookup.get(str(level).lower(), fallback)
Exemplo n.º 5
0
class LSEScraper:
    concurrency = config.get('scraper.lse.concurrency')

    def get_constituents(self, index: Index) -> List[IndexConstituent]:
        """
        scrapes and returns the list of (ticker, name) pairs
        :param index:
        :return:
        """
        def open_page(page: int) -> str:
            url = 'http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/indices/summary/' \
                  'summary-indices-constituents.html?index={index_ticker}&page={page}' \
                .format(index_ticker=index.index_ticker, page=page)
            response = requests.get(url)
            return response.content

        def get_total_pages() -> int:
            html = open_page(1)
            soup = BeautifulSoup(html, 'lxml')
            text = soup.select_one(
                '#pi-colonna1-display > div:nth-of-type(1) > p.floatsx').text
            p = re.compile('Page 1 of (\d+)')
            m = p.search(text)
            return int(m.group(1))

        def get_constituents_in_page(page: int) -> List[str]:
            html = open_page(page)
            soup = BeautifulSoup(html, 'lxml')
            rows = soup.select('#pi-colonna1-display > table > tbody > tr')
            constituents = []
            for row in rows:
                [ticker, name
                 ] = [cell.text.strip() for cell in row.find_all('td')[:2]]
                constituent = IndexConstituent(ticker=ticker, name=name)
                constituents.append(constituent)
            return constituents

        total_pages = get_total_pages()

        with ThreadPoolExecutor(
                max_workers=LSEScraper.concurrency) as executor:
            futures = executor.map(get_constituents_in_page,
                                   range(1, total_pages + 1))
            constituents = [c for cs in futures for c in cs]
        return constituents
Exemplo n.º 6
0
def test_get_error():
    with pytest.raises(KeyError):
        config.get('random.key')
Exemplo n.º 7
0
class MarketWatchScraper:
    ckey = config.get('scraper.market_watch.ckey')
    entitlement_token = config.get('scraper.market_watch.entitlement_token')
    default_step = config.get('scraper.market_watch.step')
    default_timeframe = config.get('scraper.market_watch.timeframe')

    def get_basic_stock_info(self, ticker: str, country_code: str) -> MWBasicStockInfo:
        """
        Returns the basic stock information for a ticker and country code (e.g. common name, cusip, isin...)
        :param ticker: e.g. HSBA
        :param country_code: e.g. uk
        :return:
        """

        def open_page():
            url = 'https://api.wsj.net/api/dylan/quotes/v2/comp/quote'
            params = {
                'needed': 'TradingRange|Meta',
                'id': '{ticker}|{country_code}|||'.format(ticker=ticker, country_code=country_code),
                'maxInstrumentMatches': 1,
                'ckey': MarketWatchScraper.ckey,
                'EntitlementToken': MarketWatchScraper.entitlement_token,
                'accept': 'application/json'
            }
            response = requests.get(url, params)
            return response.content

        def parse_response(response_json: str) -> MWBasicStockInfo:
            data = json.loads(response_json)
            instrument = data['GetInstrumentResponse']['InstrumentResponses'][0]['Matches'][0]['Instrument']
            return MWBasicStockInfo(
                ticker=instrument['Ticker'],
                name=instrument['CommonName'],
                cusip=instrument['Cusip'],
                sedol=instrument['Sedol'],
                isin=instrument['Isin'],
                country_code=instrument['Exchange']['CountryCode'],
                iso_code=instrument['Exchange']['IsoCode']
            )

        response = open_page()
        basic_stock_info = parse_response(response)
        return basic_stock_info

    def get_time_series(self, ticker: str, country_code: str, iso_code: str, step: str = default_step,
                        timeframe: str = default_timeframe) -> TimeSeries:
        """
        Returns time series for ticker (e.g. HSBA) given step and timeframe
        :param ticker: e.g. HSBA
        :param country_code: for ticker, e.g. uk
        :param iso_code: for ticker, e.g. xlon
        :param step: e.g. P1D for daily, if not supplied default will be used
        :param timeframe: e.g. P5Y for 5 years, if not supplied default will be used
        :return:
        """
        PRICE_SERIES_ID = 'price'
        VOLUME_SERIES_ID = 'volume'

        def open_page() -> str:
            url = 'https://api-secure.wsj.net/api/michelangelo/timeseries/history'
            options = {
                'Step': step,
                'TimeFrame': timeframe,
                'EntitlementToken': MarketWatchScraper.entitlement_token,
                'Series': [
                    {
                        'Key': 'STOCK/{country_code}/{iso_code}/{ticker}'.format(country_code=country_code,
                                                                                 iso_code=iso_code, ticker=ticker),
                        'Dialect': 'Charting',
                        'Kind': 'Ticker',
                        'SeriesId': PRICE_SERIES_ID,
                        'DataTypes': ['Open', 'High', 'Low', 'Last'],
                        'Indicators': [
                            {'Parameters': [], 'Kind': 'Volume', 'SeriesId': VOLUME_SERIES_ID}
                        ]
                    }
                ]
            }
            params = {
                'json': json.dumps(options),
                'ckey': MarketWatchScraper.ckey
            }
            headers = {
                'Dylan2010.EntitlementToken': MarketWatchScraper.entitlement_token
            }
            response = requests.get(url, params, headers=headers)
            return response.content

        def parse_response(response_json: str) -> TimeSeries:
            data = json.loads(response_json)

            def extract_series(series_id: str) -> List[List[float]]:
                return next(s['DataPoints'] for s in data['Series'] if s['SeriesId'] == series_id)

            time_axis = data['TimeInfo']['Ticks']
            price_series = extract_series(PRICE_SERIES_ID)
            volume_series = extract_series(VOLUME_SERIES_ID)
            time_series = [HistoricDataPoint(
                time=MarketWatchScraper._unix_to_datetime(unix_timestamp),
                open=open,
                high=high,
                low=low,
                close=last,
                volume=volume
            ) for unix_timestamp, [open, high, low, last], [volume]
                in zip(time_axis, price_series, volume_series)]
            return time_series

        response = open_page()
        time_series = parse_response(response)
        return time_series

    def get_quarterly_earnings(self, ticker: str, country_code: str, iso_code: str,
                               timeframe: str = default_timeframe) -> List[EarningsEvent]:
        """
        Returns quarterly earnings series for ticker (e.g. HSBA) given step and timeframe
        :param ticker: e.g. HSBA
        :param country_code: for ticker, e.g. uk
        :param iso_code: for ticker, e.g. xlon
        :param timeframe: e.g. P5Y for 5 years, if not supplied default will be used
        :return:
        """
        simplefilter('always', DeprecationWarning)
        warn("This method is deprecated as data is missing for many UK stocks", DeprecationWarning)

        EARNINGS_SERIES_ID = 'earnings'

        def open_page() -> str:
            url = 'https://api-secure.wsj.net/api/michelangelo/timeseries/history'
            options = {
                'Step': 'P10Y',
                # since we are only interested in events, we try to get rid of irrelevant series data by using a large step
                'TimeFrame': timeframe,
                'EntitlementToken': MarketWatchScraper.entitlement_token,
                'Series': [
                    {
                        'Key': 'STOCK/{country_code}/{iso_code}/{ticker}'.format(country_code=country_code,
                                                                                 iso_code=iso_code, ticker=ticker),
                        'Dialect': 'Charting',
                        'Kind': 'Ticker',
                        'SeriesId': 's1',
                        'Indicators': [
                            {
                                'Parameters': [{'Name': 'YearOverYear'}],
                                'Kind': 'EarningsEvents',
                                'SeriesId': EARNINGS_SERIES_ID
                            }
                        ]
                    }
                ]
            }
            params = {
                'json': json.dumps(options),
                'ckey': MarketWatchScraper.ckey
            }
            headers = {
                'Dylan2010.EntitlementToken': MarketWatchScraper.entitlement_token
            }
            response = requests.get(url, params, headers=headers)
            return response.content

        def parse_response(response_json: str) -> List[EarningsEvent]:
            data = json.loads(response_json)
            events = data['Events'][0]['DataPoints']
            quarterly_earnings = [EarningsEvent(
                time=MarketWatchScraper._unix_to_datetime(event['EventDate']),
                value=event['Value']
            ) for event in events]
            return quarterly_earnings

        response = open_page()
        time_series = parse_response(response)
        return time_series

    def get_annual_earnings(self, ticker: str, country_code: str):
        pass

    @staticmethod
    def _unix_to_datetime(millis: int):
        return to_datetime(millis, unit='ms').to_pydatetime()