def valid_redemption(self, grouping_set, column_set, measure):
     df_correct = self.base_redemption_df.filter(
         col('contact_stage_code') == 'EXP'
     ).select(
         self.config_dict['identity_type_code'],
         'offer_code'
     ).dropDuplicates()
     
     df = self.base_redemption_df.filter(
         col('contact_stage_code') == 'RDM'
     ).join(
         df_correct,
         [self.config_dict['identity_type_code'], 'offer_code'],
         'inner'
     )
     
     df_final = utils.count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         self.config_dict['identity_type_code']
     )
     return df_final
 def digital_trigered(self, grouping_set, column_set, measure):
     df = self.base_redemption_df.filter(
         (lower(col('channel_code')).like('%digital%'))
         & (col('contact_stage_code') == 'ACT')
     )
     df_final = utils.count(
         self.sqlContext,
         df,
         grouping_set,
         column_set,
         measure,
         'offer_code'
     )
     return df_final
 def number_of_redemptions(self, grouping_set, column_set, measure):
     if not self.df_redemptions.head(1):
         df_offer = utils.count(
             self.sqlContext,
             self.df_redeem,
             grouping_set,
             column_set,
             measure,
             'offer_code'
         )
         
         self.df_redemptions = df_offer
     else:
         df_offer = self.df_redemptions
     return df_offer
示例#4
0
    def mis_redemptions(self, grouping_set, column_set, measure):
        df_subtract = self.df_redeem.select(
            'prod_code', 'offer_code').dropDuplicates().subtract(
                self.df_dict('table3').select('prod_code',
                                              'offer_code').dropDuplicates())

        df_subtract_prsn = df_subtract.join(self.df_redeem,
                                            ['prod_code', 'offer_code'])

        # since f.count() don't consider null customers
        df_subtract_prsn = df_subtract_prsn.fillna('null', ['prsn_code'])

        df_final = utils.count(self.sqlContext, df_subtract_prsn, grouping_set,
                               column_set, measure,
                               self.config_dict['identity_type_code'])
        return df_final
    def redemption_rate(self, grouping_set, column_set, measure):
        redemeed = self.base_redemption_df.filter(
            col('contact_stage_code') == 'RDM'
        )
#         print 'redemeed'
#         redemeed.cache()
#         print redemeed.count()
        
        mailed = self.base_redemption_df.filter(
            col('contact_stage_code') == 'EXP'
        )
#         print 'mailed'
#         mailed.cache()
#         print mailed.count()
        
        redem_coupon = utils.count(
            self.sqlContext,
            redemeed,
            grouping_set,
            column_set,
            'redem_coupon',
            'offer_code'
        )
#         print 'redem_coupon'
#         redem_coupon.cache()
#         print redem_coupon.count()
        
        mailed_coupon = utils.count(
            self.sqlContext,
            mailed,
            grouping_set,
            column_set,
            'mailed_coupon',
            'offer_code'
        )
#         print 'mailed_coupon'
#         mailed_coupon.cache()
#         print mailed_coupon.count()
        
        group_set = column_set + ['grouping_level']
        df_redem_mailed = redem_coupon.join(mailed_coupon, group_set)
#         print 'df_redem_mailed'
#         df_redem_mailed.cache()
#         print df_redem_mailed.count()
        
        df_final = df_redem_mailed.withColumn(
            measure,
            df_redem_mailed.redem_coupon/df_redem_mailed.mailed_coupon
        )
#         print 'df_final'
#         df_final.cache()
#         print df_final.count()
        
        df_redemption_rate = df_final.drop('redem_coupon')
        df_redemption_rate = df_redemption_rate.drop('mailed_coupon')
        
        df_redemption_rate = df_redemption_rate.withColumn(
            measure,
            df_redemption_rate[measure].cast(StringType())
        )
        
        return df_redemption_rate
示例#6
0
    def coupons_allocated(self, grouping_set, column_set, measure):
        df = self.base_allocation_df.filter(col('contact_stage_code') == 'DLV')

        coupon_allocated_df = utils.count(self.sqlContext, df, grouping_set,
                                          column_set, measure, 'offer_code')
        return coupon_allocated_df
    def buy_in_category_but_not_product(self, grouping_set, column_set, measure):
        exp_df = self.detail_offer_prod
#         .withColumnRenamed('prod_code', 'featured_prod_code')
        
#         print 'exp_df'
#         exp_df.cache()
#         print exp_df.count()
#         exp_df.show()
        
        prod_dim_df = self.df_dict('prod_dim')
#         prod_dim_df.cache()
#         prod_dim_df.show()
        
        exp_cat_df = exp_df.join(
            prod_dim_df,
            'prod_code'
            'left_outer'
        )
        
        exp_cat_df = exp_cat_df.withColumnRenamed('prod_code', 'featured_prod_code')
        
#         print 'exp_cat_df'
#         exp_cat_df.cache()
#         print exp_cat_df.count()
#         exp_cat_df.show()
        
        pre_prod_df = self.df_dict('pre_period').join(
            self.df_dict('prod_dim'),
            'prod_code',
            'left_outer'
        )
        
#         print 'pre_prod_df'
#         pre_prod_df.cache()
#         print pre_prod_df.count()
#         pre_prod_df.show()
        
        post_prod_df = self.df_dict('post_period').join(
            self.df_dict('prod_dim'),
            'prod_code',
            'left_outer'
        )
        
#         print 'post_prod_df'
#         post_prod_df.cache()
#         print post_prod_df.count()
#         post_prod_df.show()
        
        pre_post_df = pre_prod_df.select(
            self.config_dict['identity_type_code'],
            'prod_hier_l20_code'
        ).intersect(
            post_prod_df.select(
                self.config_dict['identity_type_code'],
                'prod_hier_l20_code'
            )
        )
        
#         print 'pre_post_df'
#         pre_post_df.cache()
#         print pre_post_df.count()
#         pre_post_df.show()
        
        pre_post_exp_cat_df = exp_cat_df.join(
            pre_post_df,
            [self.config_dict['identity_type_code'], 'prod_hier_l20_code']
        )
        
#         print 'pre_post_exp_cat_df'
#         pre_post_exp_cat_df.cache()
#         print pre_post_exp_cat_df.count()
#         pre_post_exp_cat_df.show()

        df = pre_post_exp_cat_df.filter(
            pre_post_exp_cat_df.featured_prod_code != pre_post_exp_cat_df.prod_code
        )
        
#         print 'df'
#         df.cache()
#         print df.count()
#         df.show()
        
#         .drop('channel_code')
        
        buy_in_category_but_not_product_df = utils.count(
            self.sqlContext,
            df,
            grouping_set,
            column_set,
            measure,
            self.config_dict['identity_type_code']
        )
        return buy_in_category_but_not_product_df