def _preprocess_order_data(self, order, need_unitize=True): order = order.loc[order.first_cate_code.isin(self._all_cate_codes)] order = order.loc[order.order_date >= '2015-09-01'] order = order.loc[~order.customer_code.str.contains('\\N', regex=False )] removed_column_names = [ 'org_code', 'region_code', 'region_name', 'road', 'fin_segment1_code', 'fin_segment1_name', 'fin_segment2_code', 'fin_segment2_name', 'received_qty', 'return_qty' ] order.drop(columns=removed_column_names, inplace=True) order = order.sort_values(by='order_date').reset_index(drop=True) str_column_names = ['sales_cen_code', 'customer_code', 'item_code'] remove_whitespace(order, str_column_names) order['district'] = order.district.str.replace(r'\\N', '未知') order['channel_name'] = order.channel_name.apply( lambda x: transform_channel(x)) order['channel_name'] = order.channel_name.str.replace(r'\\N', '未知') order['sales_chan_name'] = order.sales_chan_name.str.replace( r'\\N', '未知') order.project_flag.fillna('未知', inplace=True) order['project_flag'] = order.project_flag.apply( lambda x: transform_project_flag(x)) if need_unitize: # 是否需要单位化,即以万作为单位 order['ord_qty'] = order.ord_qty / 10000 order['ord_amount'] = order.ord_amount / 10000 return order
def _preprocess_dis_data(self, dis, need_unitize=True): dis.drop(columns=[ 'bu_code', 'bu_name', 'region_code', 'region_name', 'road' ], inplace=True) dis = dis.loc[dis.first_cate_code.isin(self._all_cate_codes)] dis = dis.loc[dis.customer_code.str.startswith('C')] dis = dis.sort_values(by='order_date').reset_index(drop=True) dis['sales_cen_code'] = dis.sales_cen_code.astype(str).apply( lambda x: x.strip()) dis['customer_code'] = dis.customer_code.astype(str).apply( lambda x: x.strip()) dis['item_code'] = dis.item_code.astype(str).apply(lambda x: x.strip()) dis['district'] = dis.district.str.replace(r'\\N', '未知') dis['customer_type'] = dis.customer_type.str.replace(r'\\N', '未知') dis['is_usable'] = dis.is_usable.astype(str) dis['is_usable'] = dis.is_usable.str.replace(r'\\N', '未知') dis['channel_name'] = dis.channel_name.apply( lambda x: transform_channel(x)) dis['dis_qty'] = np.round(dis.dis_qty).astype(int) dis['item_price'] = dis.item_price.astype(str).str.replace( r'\\N', '-1.0').astype(float) dis['dis_amount'] = dis.dis_qty * dis.item_price if need_unitize: dis['dis_qty'] = dis.dis_qty / 10000 dis['dis_amount'] = dis.dis_amount / 10000 return dis
def _preprocess_inv_data(self, inv, need_unitize=True): inv = inv.loc[inv.first_cate_code.isin(self._all_cate_codes)] inv = inv.sort_values(by='order_date').reset_index(drop=True) if need_unitize: # 是否需要单位化,即以万作为单位 inv['inv_qty'] = inv.inv_qty / 10000 inv['inv_amount'] = inv.inv_amount / 10000 inv['district'] = inv.district.str.replace(r'\\N', '未知') inv['channel_name'] = inv.channel_name.apply( lambda x: transform_channel(x)) inv['sales_chan_name'] = inv.sales_chan_name.str.replace(r'\\N', '未知') return inv
def _preprocess_inv_data(self, inv, need_unitize=True): inv.drop(columns=['bu_code', 'bu_name', 'region_code', 'region_name'], inplace=True) inv = inv.loc[inv.first_cate_code.isin(self._all_cate_codes)] inv = inv.sort_values(by='order_date').reset_index(drop=True) inv['sales_cen_code'] = inv.sales_cen_code.astype(str).apply(lambda x: x.strip()) inv['customer_code'] = inv.customer_code.astype(str).apply(lambda x: x.strip()) inv['item_code'] = inv.item_code.astype(str).apply(lambda x: x.strip()) inv['district'] = inv.district.str.replace(r'\\N', '未知') inv['channel_name'] = inv.channel_name.apply(lambda x: transform_channel(x)) inv['sales_chan_name'] = inv.sales_chan_name.str.replace(r'\\N', '未知') if need_unitize: inv['inv_qty'] = inv.inv_qty / 10000 inv['inv_amount'] = inv.inv_amount / 10000 return inv