def _retrieve_data(self, main_params, extra_params, output_data_format): output_data = pd.DataFrame() api = main_params[Header.API] if api == 'w.wsi': merged_extra_params = self._merge_query_params(extra_params, date=self.end_date) raw_data = WIND_DATA_PROVIDER.query_data(api=api, sec_id=self.sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=self.start_date, end_date=self.end_date) multi_factors = True if extra_params[Header.MULTIFACTORS] == 'Y' else False output_data = WIND_QUERY_HELPER.reformat_wind_data(raw_data=raw_data, date=self.end_date, multi_factors=multi_factors) else: dates = WIND_DATA_PROVIDER.biz_days_list(start_date=self.start_date, end_date=self.end_date, freq=self.freq) for fetch_date in dates: if not pd.isnull(extra_params[Header.REPORTADJ]): date = WIND_QUERY_HELPER.latest_report_date(fetch_date) else: date = fetch_date date = date_convert_2_str(date) sec_id = WIND_DATA_PROVIDER.get_universe(self.sec_id, date=date) \ if self.is_index else self.sec_id if api == 'w.wsd': merged_extra_params = self._merge_query_params(extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data(api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=date, end_date=date) elif api == 'w.wss': py_assert(not pd.isnull(extra_params[Header.TENOR]), ValueError, 'tenor must be given for query factor {0}'.format(self.factor_name)) merged_extra_params = self._merge_query_params(extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data(api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params) else: raise ValueError('FactorLoader._retrieve_data: unacceptable value of parameter api') multi_factors = True if extra_params[Header.MULTIFACTORS] == 'Y' else False tmp = WIND_QUERY_HELPER.reformat_wind_data(raw_data=raw_data, date=fetch_date, output_data_format=output_data_format, multi_factors=multi_factors) output_data = pd.concat([output_data, tmp], axis=0) return output_data
def _merge_query_params(self, params, date=None): ret = '' for key, value in params.iteritems(): if not pd.isnull(value): if key == Header.TENOR: py_assert(date is not None, ValueError, 'date must given if tenor is not None') # unit = ''.join(re.findall('[0-9]+', params[Header.TENOR])) # freq = FreqType(params[Header.TENOR][len(unit):]) ret += 'startDate=' + WIND_DATA_PROVIDER.forward_date(date, value, self.date_format) + ';endDate=' + date + ';' elif key == Header.FREQ and value[:3] == 'min': ret += ('BarSize=' + value[3:] + ';') else: ret += (key + '=' + str(value) + ';') ret = ret[:-1] + FactorLoader._check_industry_params(params.name) return ret
def _merge_query_params(params, date=None): ret = '' for index, value in params.iteritems(): if not pd.isnull(value): if index == Header.TENOR: py_assert(date is not None, ValueError, 'date must given if tenor is not None') unit = ''.join(re.findall('[0-9]+', params[Header.TENOR])) freq = FreqType(params[Header.TENOR][len(unit):]) ret += 'startDate=' + WIND_DATA_PROVIDER.advance_date( date, unit, freq).strftime('%Y%m%d') + ';endDate=' + date + ';' elif index == Header.FREQ and value[:3] == 'min': ret += ('BarSize=' + value[3:] + ';') else: ret += (index + '=' + str(value) + ';') ret = ret[:-1] + FactorLoader._check_industry_params(params.name) return ret
def _retrieve_data(self, main_params, extra_params, output_data_format): output_data = pd.DataFrame() api = main_params[Header.API] if api == 'w.wsq': loop_times = int(len(self.sec_id) / self.block_size) + 1 for j in range(loop_times): code_set = self.sec_id[j * self.block_size:(j + 1) * self.block_size] raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=code_set, indicator=main_params[Header.INDICATOR]) output_data = pd.concat( [output_data, pd.DataFrame(raw_data.Data).T], axis=0) output_data.columns = [ 'open', 'high', 'low', 'last', 'vol', 'amt', 'vol_ratio', 'pct_chg_5min' ] elif api == 'w.wsi': merged_extra_params = self._merge_query_params(extra_params, date=self.end_date) raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=self.sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=self.start_date, end_date=self.end_date) multi_factors = True if extra_params[ Header.MULTIFACTORS] == 'Y' else False output_data = WIND_QUERY_HELPER.reformat_wind_data( raw_data=raw_data, date=self.end_date, multi_factors=multi_factors) else: dates = WIND_DATA_PROVIDER.biz_days_list( start_date=self.start_date, end_date=self.end_date, freq=self.freq) for fetch_date in dates: if not pd.isnull(extra_params[Header.REPORTADJ]): date = WIND_QUERY_HELPER.latest_report_date(fetch_date) else: date = fetch_date date = date_convert_2_str(date) sec_id = WIND_DATA_PROVIDER.get_universe(self.sec_id, date=date) \ if self.is_index else self.sec_id if api == 'w.wsd': merged_extra_params = self._merge_query_params( extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params, start_date=date, end_date=date) elif api == 'w.wss': py_assert( not pd.isnull(extra_params[Header.TENOR]), ValueError, 'tenor must be given for query factor {0}'.format( self.factor_name)) merged_extra_params = self._merge_query_params( extra_params, date=date) raw_data = WIND_DATA_PROVIDER.query_data( api=api, sec_id=sec_id, indicator=main_params[Header.INDICATOR], extra_params=merged_extra_params) else: raise ValueError( 'FactorLoader._retrieve_data: unacceptable value of parameter api' ) multi_factors = True if extra_params[ Header.MULTIFACTORS] == 'Y' else False tmp = WIND_QUERY_HELPER.reformat_wind_data( raw_data=raw_data, date=fetch_date, output_data_format=output_data_format, multi_factors=multi_factors) output_data = pd.concat([output_data, tmp], axis=0) return output_data