def __bulk_dl_and_save(self, url_contexts, process_data_update_function, open_files_function, batch_size=None, sleep=None ): # download, process and update/save for url_contexts in utils.batch(url_contexts, size=batch_size, sleep=sleep): # pipeline for downloading data and preprocessing it data_contexts = \ self._data_downloader.process_downloaded_data( self._data_downloader.verify_download( utils.bulk_download(url_contexts))) # if necessary, convert downloaded format into a new storage format if self._data_downloader.name != self._data_writer.name: data_contexts = \ self._data_downloader.convert_data(data_contexts, self._data_writer) # drain for opening files and saving/updating downloaded data for context in self._data_writer.save_data( process_data_update_function( open_files_function(data_contexts)) ): yield context yield None # poison pill to signal end of downloads
def update_stats(self, symbols): urls = self._downloader.get_urls(symbols) symbol_stats = \ self._downloader.process_downloaded_data( self._downloader.verify_download( utils.bulk_download(urls))) self._db.insert_or_update_stats([d for d in symbol_stats])
def __bulk_dl_and_save(self, url_contexts, process_data_update_function, open_files_function, batch_size=None, sleep=None): # download, process and update/save for url_contexts in utils.batch(url_contexts, size=batch_size, sleep=sleep): # pipeline for downloading data and preprocessing it data_contexts = \ self._data_downloader.process_downloaded_data( self._data_downloader.verify_download( utils.bulk_download(url_contexts))) # if necessary, convert downloaded format into a new storage format if self._data_downloader.name != self._data_writer.name: data_contexts = \ self._data_downloader.convert_data(data_contexts, self._data_writer) # drain for opening files and saving/updating downloaded data for context in self._data_writer.save_data( process_data_update_function( open_files_function(data_contexts))): yield context yield None # poison pill to signal end of downloads
def execute_yql(yql_queries, validate_field): ''' Downloads, verifies, and returns the json data for the supplied yql query(s). :param yql_queries: the YQL query string(s) to be downloaded. they must share the same validate field. :type yql_queries: string or list of strings :param validate_field: the key to use to retrieve valid results data from the downloaded json :type validate_field: string ''' if not isinstance(yql_queries, list): yql_queries = [yql_queries] ret = [] for data, tag_error_dict in utils.bulk_download( [__get_yql_url(yql) for yql in yql_queries] ): url = tag_error_dict['tag'] if 'error' not in tag_error_dict: json_data = __verify_json_data(json.loads(data), validate_field) if json_data: if isinstance(json_data, list): ret.extend(json_data) else: ret.append(json_data) else: print 'Error downloading "%s": %s' % (url, tag_error_dict['error']) return ret
def update_sectors_and_industries(): _db = db.Database() _downloader = Sectors() urls = _downloader.get_urls() sectors_and_industries = \ _downloader.process_downloaded_data( _downloader.verify_download( utils.bulk_download(urls))) final_context = [x for x in _downloader.save_data(sectors_and_industries)][0]
def update_symbol_index(): _db = db.Database() _downloader = Industries() ids = _db.get_industry_ids() urls = _downloader.get_urls(ids) symbols = \ _downloader.process_downloaded_data( _downloader.verify_download( utils.bulk_download(urls))) final_context = [x for x in _downloader.save_data(symbols)][0]
def execute_yql(yql_queries, validate_field): ''' Downloads, verifies, and returns the json data for the supplied yql query(s). :param yql_queries: the YQL query string(s) to be downloaded. they must share the same validate field. :type yql_queries: string or list of strings :param validate_field: the key to use to retrieve valid results data from the downloaded json :type validate_field: string ''' if not isinstance(yql_queries, list): yql_queries = [yql_queries] ret = [] for data, tag_error_dict in utils.bulk_download( [__get_yql_url(yql) for yql in yql_queries]): url = tag_error_dict['tag'] if 'error' not in tag_error_dict: json_data = __verify_json_data(json.loads(data), validate_field) if json_data: if isinstance(json_data, list): ret.extend(json_data) else: ret.append(json_data) else: print 'Error downloading "%s": %s' % (url, tag_error_dict['error']) return ret
if ex_dividend_date: if '-' not in ex_dividend_date: ex_dividend_date = get_date_from_month_and_day(ex_dividend_date) while(ex_dividend_date >= dividend_pay_date): ex_dividend_date = adjust_year(ex_dividend_date, -1) else: ex_dividend_date = datetime.datetime.strptime( stats['ExDividendDate'], '%d-%b-%y').date() stats['DividendPayDate'] = dividend_pay_date stats['ExDividendDate'] = ex_dividend_date else: stats['DividendPayDate'] = None stats['ExDividendDate'] = None stats['DividendShare'] = None stats['DividendYield'] = None def save_data(self, data_context): stats = [x for x, context in data_context] self._db.insert_or_update_stats(stats) yield context if __name__ == '__main__': _db = db.Database() _downloader = Provider() urls = _downloader.get_urls(_db.get_symbols()) symbol_stats = \ _downloader.process_downloaded_data( _downloader.verify_download( utils.bulk_download(urls))) final_context = [d for d in symbol_stats][0]