def add_history(month, panel_store=None, frame=None): """ Add the 3 month history for every employee working. Will return one of {1, 0, -1} {True, False NaN} where - (1) True if kind anytime in past 3 months and employed today (new hire) - (0) False if employed past 3 months and employed today - (-1) NaN if un/non employed today. """ # TODO: Chcek this... if frame is None: _wp = panel_store.select('m' + month) else: _wp = frame wp = get_useful(_wp.copy()) e_types = ['either', 'unemployed', 'nonemployed'] # inplace [_add_employment_status_last_period(wp, kind=x) for x in e_types] _wp['unemployed_history'] = wp['unemployed'] _wp['nonemployed_history'] = wp['nonemployed'] _wp['either_history'] = wp['either'] return _wp
def is_recently_unemployed(wp, month='both'): """ Checks each row for unemployed/nonemployed in the last 3 months. month can be 4, 8, or 'both'. (I wonder if groupby().filter could handle this...) Returns ------- {timestamp: { labor_stats : DataFrame } } A list (MIS=4, 8) of dicts of labor status to DataFrames containing just that group. aggfuncs should be able to ignore Nones """ wp = get_useful(wp) wp = wp.loc[:, ((wp['age'] >= 22) & (wp['age'] <= 65)).any(1)] if month == 'both': months = [4, 8] elif month in (4, '4'): months == [4] elif month in (8, '8'): months = [8] else: raise ValueError if 8 not in wp.minor_axis: months = [4] if 4 not in wp.minor_axis: return None df = wp['labor_status'] es = [df[df[x].isin([1, 2])] for x in months] employed_idx = [x[x.loc[:, 1:3].isin([1, 2]).all(1)].index for x in es] unemployed_idx = [x[x.loc[:, 1:3].isin([3, 4]).all(1)].index for x in es] nonemployed_idx = [ x[x.loc[:, 1:3].isin([5, 6, 7]).all(1)].index for x in es ] idxes = zip(['employed', 'unemployed', 'nonemployed'], [employed_idx, unemployed_idx, nonemployed_idx]) res = {} for i, m in enumerate(months): stamp = pd.Timestamp( pd.datetime(int(wp['year'][m].dropna().values[0]), int(wp['month'][m].dropna().values[0]), 1)) res[stamp] = {k: wp.loc[:, idx[i], m] for k, idx in idxes} return res
def is_recently_unemployed(wp, month='both'): """ Checks each row for unemployed/nonemployed in the last 3 months. month can be 4, 8, or 'both'. (I wonder if groupby().filter could handle this...) Returns ------- {timestamp: { labor_stats : DataFrame } } A list (MIS=4, 8) of dicts of labor status to DataFrames containing just that group. aggfuncs should be able to ignore Nones """ wp = get_useful(wp) wp = wp.loc[:, ((wp['age'] >= 22) & (wp['age'] <= 65)).any(1)] if month == 'both': months = [4, 8] elif month in (4, '4'): months == [4] elif month in (8, '8'): months = [8] else: raise ValueError if 8 not in wp.minor_axis: months = [4] if 4 not in wp.minor_axis: return None df = wp['labor_status'] es = [df[df[x].isin([1, 2])] for x in months] employed_idx = [x[x.loc[:, 1:3].isin([1, 2]).all(1)].index for x in es] unemployed_idx = [x[x.loc[:, 1:3].isin([3, 4]).all(1)].index for x in es] nonemployed_idx = [x[x.loc[:, 1:3].isin([5, 6, 7]).all(1)].index for x in es] idxes = zip(['employed', 'unemployed', 'nonemployed'], [employed_idx, unemployed_idx, nonemployed_idx]) res = {} for i, m in enumerate(months): stamp = pd.Timestamp(pd.datetime(int(wp['year'][m].dropna().values[0]), int(wp['month'][m].dropna().values[0]), 1)) res[stamp] = {k: wp.loc[:, idx[i], m] for k, idx in idxes} return res
def add_flows(month, panel_store=None, frame=None): """ Add the *montly* flows for each worker, for each month (2 :: 8). The flows are: ee, eu, en, ue, uu, un, ne, nu, nn/ """ if frame is None: _wp = panel_store.select('m' + month) else: _wp = frame wp = get_useful(_wp.copy()) try: _add_flows_panel(wp, inplace=True) _wp['flow'] = wp['flow'] return _wp except Exception as e: print("Skipping {}, because of {}".format(month, e)) raise KeyError(e)