def __bulk_dl_and_save(self, url_contexts, process_data_update_function, open_files_function, batch_size=None, sleep=None ): # download, process and update/save for url_contexts in utils.batch(url_contexts, size=batch_size, sleep=sleep): # pipeline for downloading data and preprocessing it data_contexts = \ self._data_downloader.process_downloaded_data( self._data_downloader.verify_download( utils.bulk_download(url_contexts))) # if necessary, convert downloaded format into a new storage format if self._data_downloader.name != self._data_writer.name: data_contexts = \ self._data_downloader.convert_data(data_contexts, self._data_writer) # drain for opening files and saving/updating downloaded data for context in self._data_writer.save_data( process_data_update_function( open_files_function(data_contexts)) ): yield context yield None # poison pill to signal end of downloads
def __bulk_dl_and_save(self, url_contexts, process_data_update_function, open_files_function, batch_size=None, sleep=None): # download, process and update/save for url_contexts in utils.batch(url_contexts, size=batch_size, sleep=sleep): # pipeline for downloading data and preprocessing it data_contexts = \ self._data_downloader.process_downloaded_data( self._data_downloader.verify_download( utils.bulk_download(url_contexts))) # if necessary, convert downloaded format into a new storage format if self._data_downloader.name != self._data_writer.name: data_contexts = \ self._data_downloader.convert_data(data_contexts, self._data_writer) # drain for opening files and saving/updating downloaded data for context in self._data_writer.save_data( process_data_update_function( open_files_function(data_contexts))): yield context yield None # poison pill to signal end of downloads
def _bulk_download(self, urls): results = [] for i, batched_urls in enumerate(batch(urls, self.batch_size, self.sleep)): for r in self.__bulk_download(batched_urls): results.append((r.url, r.content)) logger.debug("Downloaded %d of %d urls" % ((i * self.batch_size) + self.batch_size, len(urls))) logger.debug("got %d results" % len(results)) return results
def _bulk_download(self, urls): results = [] for i, batched_urls in enumerate(batch(urls, self.batch_size, self.sleep)): for r in self.__bulk_download(batched_urls): results.append((r.url, r.content)) logger.debug('Downloaded %d of %d urls' % ( (i * self.batch_size) + self.batch_size, len(urls) )) logger.debug('got %d results' % len(results)) return results
def run_integrity_check(): index = index_.Factory() instruments = index.get_instruments() #instruments = index.get_instruments()[:1000] batch_size = len(instruments)/100 for i, instrument_batch in enumerate(utils.batch(instruments, size=batch_size)): instrument_batch = dict(instrument_batch) filterFromDate = datetime.datetime.now() - datetime.timedelta(days=99999) bar_filter = barfeed.DateRangeFilter(filterFromDate) feed = instrumentfeed.Feed(instrument_batch, bar_filter) feed.add_bars_from_instruments(instrument_batch) print 'batch %i/%i done' % (i + 1, len(instruments)/batch_size + 1)
def run_batched_screener(batch_size): index = index_.Factory() # run against all instruments instruments = index.get_instruments()[:200] for instrument in instruments: instrument.update_stats() for i, batch in enumerate(utils.batch(instruments, batch_size)): print 'starting batch %i/%i' % (i, (len(instruments)/batch_size)+1) filterFromDate = datetime.datetime.now() - datetime.timedelta(days=100) bar_filter = barfeed.DateRangeFilter(filterFromDate) symbol_screener = MyScreener('test_list_%i' % i, batch, bar_filter=bar_filter) symbol_screener.run(save=True) del symbol_screener
def get_symbols_info(symbols, keys=None): if not isinstance(symbols, (list, tuple)): symbols = list(symbols) keys = keys or ["Symbol", "Name", "StockExchange", "LastTradeDate"] yql = "select %(keys)s from yahoo.finance.quotes where symbol in (%(symbols)s)" urls = [] for batched_symbols in batch(symbols, 100): csv_symbols = ",".join(['"%s"' % s.upper() for s in batched_symbols]) urls.append(get_yql_url(yql % {"keys": ",".join(keys), "symbols": csv_symbols})) downloader = Downloader() results = [] for url, text in downloader.download(urls): json_ = json.loads(text) for result in json_["query"]["results"]["quote"]: results.append(_convert_result(result)) return results
def get_data(self): ''' Returns a dict with the following key/value pairs: 'sectors': ['sector names'] 'industry_sectors': [('industry names', 'sector names')] 'symbols': [{ keys: symbol, name, industry }] ''' ret = { 'sectors': [], 'industry_sectors': [], 'symbols': [], } yql = 'select * from yahoo.finance.sectors' json_sector_list = execute_yql(yql, 'sector') ret = self.__parse_sector_industries(json_sector_list, ret) yql_queries = [] for ids in utils.batch(ret.pop('industry_ids'), size=10): yql = 'select * from yahoo.finance.industry where id in (%s)' % ','.join( ['"%s"' % i for i in ids]) yql_queries.append(yql) json_industry_list = execute_yql(yql_queries, 'industry') return self.__parse_industry_symbols(json_industry_list, ret)
def get_data(self, symbols): ''' Returns a dict where the keys are symbols and the values are dicts with keys of keystats property names and values of results. :param symbols: the symbol(s) to download key stats for. :type symbols: string or list of strings ''' if not isinstance(symbols, list): symbols = [symbols] ret = {} yql_queries = [] properties = self.__custom_key_stats_properties or self.__key_stats_properties for symbol_batch in utils.batch(symbols, size=300): yql = 'select %s from yahoo.finance.quotes where symbol in (%s)' % ( ','.join(properties), ','.join(['"%s"' % symbol.upper() for symbol in symbols])) yql_queries.append(yql) for symbol_key_stats in execute_yql(yql_queries, 'quote'): if self.__verify_key_stats(symbol_key_stats): ret[symbol_key_stats['symbol']] = self.__parse_key_stats(symbol_key_stats) return ret
def get_data(self, symbols): ''' Returns a dict where the keys are symbols and the values are dicts with keys of keystats property names and values of results. :param symbols: the symbol(s) to download key stats for. :type symbols: string or list of strings ''' if not isinstance(symbols, list): symbols = [symbols] ret = {} yql_queries = [] properties = self.__custom_key_stats_properties or self.__key_stats_properties for symbol_batch in utils.batch(symbols, size=300): yql = 'select %s from yahoo.finance.quotes where symbol in (%s)' % ( ','.join(properties), ','.join( ['"%s"' % symbol.upper() for symbol in symbols])) yql_queries.append(yql) for symbol_key_stats in execute_yql(yql_queries, 'quote'): if self.__verify_key_stats(symbol_key_stats): ret[symbol_key_stats['symbol']] = self.__parse_key_stats( symbol_key_stats) return ret
def get_urls(self, ids, context=None): return [self.get_url(x, context) for x in utils.batch(ids, size=10)]
def get_urls(self, symbols): if not isinstance(symbols, list): symbols = [symbols] ret = [self.get_url(x) for x in utils.batch(symbols, size=20)] return ret