def apply_sequencing(self, d=0.1, n=40, distance=False, fixed=False, dwelling=False, enumerator=False, tuned=False, enumerator_dist=False): print('d: ', d) sequences_dfs = [] if distance: dwellings_dist = tuned if tuned else self.get_dwellings_dist_seq(d) sequences_dfs.append(dwellings_dist) if fixed: sequences_dfs.append(self.get_dwellings_fixed_seq(n)) if dwelling: sequences_dfs.append(self.get_dwellings_dwellings_seq()) if enumerator: sequences_dfs.append(self.get_enum_seq()) if enumerator_dist: dwellings_dist = self.get_enum_seq(all=True) dwellings_dist.dropna(subset=[self.block_col], inplace=True) dwellings_dist = dwellings_dist.groupby( [self.ward_col, "enum_seq"], as_index=False).apply(lambda x: sequences.col_for_seq( x, X=self.x_col, Y=self.y_col)) dwellings_dist = dwellings_dist.groupby( [self.ward_col, "enum_seq"], as_index=False).apply(lambda x: sequences.get_dist_seq(x, d)) dwellings_dist.rename(columns={ "sequence_id": "enum_dist_id", "sequence_order_enum": "enum_dist_order", "dist": "enum_dist", "sequence_len": "enum_sequence_len" }, inplace=True) sequences_dfs.append(dwellings_dist.loc[:, [ self.ward_col, self.dwelling_col, "enum_dist_id", "enum_dist_order", "enum_dist", "enum_sequence_len" ]].copy()) self.df = reduce( lambda x, y: pd.merge( x, y, how="left", on=[self.ward_col, self.dwelling_col]), sequences_dfs, self.data) ## fill within for distance sequence # if distance: # self.df = self.df.groupby(self.ward_col, as_index=False).apply(lambda x: self.fill_within(x, self.dwelling_col, 'sequence_id')) if enumerator_dist: # self.df = self.df.groupby(self.ward_col, as_index=False).apply(lambda x: self.fill_within(x, self.dwelling_col, 'enum_dist_id')) self.df = self.df.dropna(subset=[self.pagenum])
def sequence_datasets(census_1850, census_1880): census_1850 = census_1850.dropna(subset=["CENSUS_DWELLING_NUM"]).copy() dwellings_1850 = census_1850.groupby(["WARD_NUM", "CENSUS_DWELLING_NUM"], as_index=False).first() dwellings_1850 = dwellings_1850.dropna(subset=["CD_ADDRESS"]).copy() dwellings_1880 = census_1880.drop_duplicates( subset=["CENSUS_ADDRESS"]).reset_index(drop=True).copy() sequences.col_for_seq(dwellings_1850, "CD_X", "CD_Y") sequences.col_for_seq(dwellings_1880, "POINT_X", "POINT_Y") dwellings_1850 = sequences.get_dist_seq(dwellings_1850, 0.15)[2] dwellings_1880 = sequences.get_dist_seq(dwellings_1880, 0.15)[2] dwellings_1850 = dwellings_1850.groupby("sequence_id").apply( sequences.sequence_order) dwellings_1880 = dwellings_1880.groupby("sequence_id").apply( sequences.sequence_order) #Not super sure what's happening here, come back and check this dwellings_1850 = dwellings_1850.groupby( ["WARD_NUM", "CENSUS_DWELLING_NUM"], as_index=False).first() dwellings_1880 = dwellings_1880.drop_duplicates( subset=["CENSUS_ADDRESS"]).reset_index(drop=True).copy() census_1880_model = dataprocessing.dwellings_to_all( census_1880, dwellings_1880, ["CENSUS_MATCH_ADDR", "sequence_id", "sequence_order", "num_between"], ["CENSUS_MATCH_ADDR"]) census_1850_model = dataprocessing.dwellings_to_all( census_1850, dwellings_1850, [ "WARD_NUM", "CENSUS_DWELLING_NUM", "sequence_id", "sequence_order", "num_between", "sequence_order_enum" ], ["WARD_NUM", "CENSUS_DWELLING_NUM"]) dataprocessing.create_street_house(dwellings_1880, "CENSUS_ADDRESS") dataprocessing.create_street_house(dwellings_1850, "CD_ADDRESS") dataprocessing.create_street_house(census_1880_model, "CENSUS_ADDRESS") dataprocessing.create_street_house(census_1850_model, "CD_ADDRESS") return [ dwellings_1850, dwellings_1880, census_1850_model, census_1880_model ]
def get_dwellings_dist_seq(self, d=0.1): dwellings = self.get_dwellings() dwellings.dropna(subset=[self.block_col], inplace=True) dwellings_cols = dwellings.groupby( self.ward_col, as_index=False).apply( lambda x: sequences.col_for_seq(x, X=self.x_col, Y=self.y_col)) dwellings_cols = dwellings_cols.groupby( self.ward_col, as_index=False).apply(lambda x: sequences.get_dist_seq(x, d)) return dwellings_cols.loc[:, [ self.ward_col, self.dwelling_col, "sequence_id", "num_between", "sequence_order_enum", "dist", "sequence_len" ]].copy()