def clean_seas_df_for_min_max_average(seas, range): """ Given a seasonalised dataframe, clean to handle missing data :param seas: :return: """ seas = seas.dropna(how='all', axis=1) seasf = seas.rename(columns=dates.find_year(seas)) # only consider when we have full(er) data for a given range fulldata = pd.DataFrame(seasf.isna().sum()) # count non-na values if not (fulldata == 0).all( ).iloc[0]: # line below doesn't apply when we have full data for all columns fulldata = fulldata[fulldata.apply( lambda x: np.abs(x - x.mean()) / x.std() < 1.5).all( axis=1)] # filter columns with high emtply values seasf = seasf[fulldata.index] # use these column names only if isinstance(range, int): end_year = dates.curyear - 1 start_year = end_year - (range - 1) else: start_year, end_year = range[0], range[1] r = seasf[[x for x in seasf.columns if x >= start_year and x <= end_year]] return r
def reindex_year(df): """ Reindex a dataframe containing prices to the current year. eg dataframe with brent Jan 19, Jan 18, Jan 17 so that 18 is shifted +1 year and 17 is shifted +2 years """ dfs = [] colyearmap = dates.find_year(df) for colname in df.columns: if df[colname].isnull().all(): continue # logic below wont work on all empty NaN columns # determine year colyear = colyearmap[colname] delta = dates.curyear - colyear w = df[[colname]] if delta == 0: dfs.append(w) else: # reindex winew = [x + pd.DateOffset(years=delta) for x in w.index] w.index = winew dfs.append(w) # merge all series into one dataframe, concat doesn't quite do the job res = reduce( lambda left, right: pd.merge( left, right, left_index=True, right_index=True, how='outer'), dfs) res = res.dropna(how='all') # drop uneeded columns out into future res = pandasutil.fillna_downbet( res) # use this as above ffills incorrectly at end of timeseries return res
def reindex_year_df_rel_col(df): """ Given a reindexed year dataframe, figure out which column to use for change summary Basic algorithm is use current year, unless you are 10 days from end of dataframe :param df: :return: """ res_col = df.columns[0] years = dates.find_year(df) last_val_date = df.index[-1] colyears = [x for x in df if str(dates.curyear) in str(x)] if len(colyears) > 0: res_col = colyears[0] relyear = (pd.to_datetime('{}-01-01'.format(years.get(res_col))) ) # year of this column dft = df[colyears].dropna() if len(dft) > 0: relcol_date = df[res_col].dropna().index[ -1] # last date of this column delta = last_val_date - relcol_date if delta.days < 10: relyear1 = (relyear + pd.DateOffset(years=1)).year relyear1 = [x for x in df if str(relyear1) in x] if len(relyear1) > 0: return relyear1[0] else: return res_col return res_col
def time_spreads_quarterly(contracts, m1, m2): """ Given a dataframe of daily values for monthly contracts (eg Brent Jan 15, Brent Feb 15, Brent Mar 15) with columns headings as '2020-01-01', '2020-02-01' Return a dataframe of time spreads (eg m1 = Q1, m2 = Q2 gives Q1-Q2 spread) """ qtrcontracts = quarterly_contracts(contracts) qtrcontracts_years = dates.find_year(qtrcontracts) cf = [x for x in qtrcontracts if x.startswith(m1)] dfs = [] for c1 in cf: year1, year2 = qtrcontracts_years[c1], qtrcontracts_years[c1] if int(m1[-1]) >= int( m2[-1]): # eg Q1-Q1 or Q4-Q1, then do Q419 - Q120 (year ahead) year2 = year1 + 1 c2 = [ x for x in qtrcontracts if x.startswith(m2) and qtrcontracts_years[x] == year2 ] if len(c2) == 1: c2 = c2[0] s = qtrcontracts[c1] - qtrcontracts[c2] s.name = year1 dfs.append(s) res = pd.concat(dfs, 1) res = res.dropna(how='all', axis='rows') return res
def std_yr_col(df, asdict=False): """ Given a dataframe with yearly columns, determine the line colour to use """ if isinstance(df, pd.Series): df = pd.DataFrame(df) yearmap = dates.find_year(df, use_delta=True) colmap = {} for colname, delta in yearmap.items(): colmap[colname] = year_col_map.get(delta, default_line_col) if asdict: return colmap # return array of colours to use - this can be passed into cufflift iplot method return [colmap[x] for x in df]
def test_find_year(self): df = pd.DataFrame(columns=['Q1 2020', 'Q2 2022']) res = dates.find_year(df) self.assertEqual(res['Q1 2020'], 2020) self.assertEqual(res['Q2 2022'], 2022)
def test_find_year4(self): df = pd.DataFrame(columns=['FB', 'FP 2021']) res = dates.find_year(df) self.assertEqual(res['FB'], 'FB') self.assertEqual(res['FP 2021'], 2021)
def test_find_year2(self): df = pd.DataFrame(columns=['CAL 2020-2021']) res = dates.find_year(df) self.assertEqual(res['CAL 2020-2021'], 2020)
def spread_combination(contracts, combination_type): """ Convenience method to access functionality in forwards using a combination_type keyword :param contracts: :param combination_type: :return: """ combination_type = combination_type.lower() contracts = contracts.dropna(how='all', axis='rows') if combination_type == "calendar": c_contracts = cal_contracts(contracts) colmap = dates.find_year(c_contracts) c_contracts = c_contracts.rename( columns={x: colmap[x] for x in c_contracts.columns}) return c_contracts if combination_type == "calendar spread": c_contracts = cal_spreads(cal_contracts(contracts)) colmap = dates.find_year(c_contracts) c_contracts = c_contracts.rename( columns={x: colmap[x] for x in c_contracts.columns}) return c_contracts if combination_type.startswith('q'): q_contracts = quarterly_contracts(contracts) m = re.search('q\d-q\d', combination_type) if m: q_spreads = quarterly_spreads(q_contracts) q_spreads = q_spreads[[ x for x in q_spreads.columns if x.startswith(combination_type.upper()) ]] colmap = dates.find_year(q_spreads) q_spreads = q_spreads.rename( columns={x: colmap[x] for x in q_spreads.columns}) return q_spreads m = re.search('q\dq\dq\d', combination_type) if m: q_spreads = fly_quarterly(q_contracts, x=int(combination_type[1]), y=int(combination_type[3]), z=int(combination_type[5])) colmap = dates.find_year(q_spreads) q_spreads = q_spreads.rename( columns={x: colmap[x] for x in q_spreads.columns}) return q_spreads m = re.search('q\d', combination_type) if m: q_contracts = q_contracts[[ x for x in q_contracts.columns if x.startswith(combination_type.upper()) ]] colmap = dates.find_year(q_contracts) q_contracts = q_contracts.rename( columns={x: colmap[x] for x in q_contracts.columns}) return q_contracts # handle monthly, spread and fly inputs month_abbr_inv = { month.lower(): index for index, month in enumerate(month_abbr) if month } months = [x.lower() for x in month_abbr] if len(combination_type) == 3 and combination_type in months: c = contracts[[ x for x in contracts if x.month == month_abbr_inv[combination_type] ]] c = c.rename(columns={x: x.year for x in c.columns}) return c if len(combination_type) == 6: m1, m2 = combination_type[0:3], combination_type[3:6] if m1 in months and m2 in months: c = time_spreads(contracts, month_abbr_inv[m1], month_abbr_inv[m2]) return c if len(combination_type) == 9: m1, m2, m3 = combination_type[0:3], combination_type[ 3:6], combination_type[6:9] if m1 in months and m2 in months and m3 in months: c = fly(contracts, month_abbr_inv[m1], month_abbr_inv[m2], month_abbr_inv[m3]) return c