def main(client):

    # file_list = ['ableton__26_12_2019__26_12_2019__16_01_08.html','acqua_di_parma__26_12_2019__20_01_05.html','becca_highlighter__26_12_2019__20_01_07.html']
    for S3_REGION in S3_REGION_LIST:
        print(S3_REGION)
        output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client)

        INPUT_DIR = os.path.join(output_dir, S3_REGION, 'page_source')
        OUTPUT_DIR = os.path.join(output_dir, S3_REGION,
                                  'sponsored_result_computed')

        if os.path.exists(INPUT_DIR):
            create_dir(OUTPUT_DIR)
            for file in os.listdir(INPUT_DIR):
                # for file in file_list:
                # print(file)

                file_path = os.path.join(INPUT_DIR, file)
                with open(file_path, 'r') as f:
                    page_data = f.read()

                    soup = BeautifulSoup(page_data, 'html.parser')
                    query_clean_name = file.split('.')[0]
                    get_sponsored_ad_details(OUTPUT_DIR, soup,
                                             query_clean_name)
        else:
            print('{} -- No sponsored results for Mobile step 4.a'.format(
                S3_REGION))

        print('--------------------------------------------')
def main(client):
	# file_list = ['ableton__26_12_2019__12_33_10.html','argireline__26_12_2019__16_00_19.html']
	for S3_REGION in S3_REGION_LIST:
		print(S3_REGION)
		output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client)
		INPUT_DIR = os.path.join(output_dir, S3_REGION, 'page_source')
		OUTPUT_DIR = os.path.join(output_dir, S3_REGION,'showcase_result_computed')

		if os.path.exists(INPUT_DIR):
			create_dir(OUTPUT_DIR)
			for file in os.listdir(INPUT_DIR):
			# for file in file_list:
				# print(file)

				file_path = os.path.join(INPUT_DIR, file)
				with open(file_path, 'r') as f:
					page_data = f.read()

					soup = BeautifulSoup(page_data,'html.parser')
					query_clean_name = file.split('.')[0]
					get_showcase_ad_details(OUTPUT_DIR, soup, query_clean_name)
		else:
			print('{} -- No showcase results for Mobile step 5.a'.format(S3_REGION))

		print('--------------------------------------------')
示例#3
0
def main(client):
    for S3_REGION in S3_REGION_LIST:
        output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client)
        INPUT_DIR = os.path.join(output_dir, S3_REGION,
                                 'showcase_result_computed')
        OUTPUT_DIR = os.path.join(output_dir, S3_REGION)
        output_df = pd.DataFrame()
        print(S3_REGION)

        if os.path.exists(INPUT_DIR):
            for file in os.listdir(INPUT_DIR):
                # print(file)
                if '~' not in file:
                    file_path = os.path.join(INPUT_DIR, file)

                    try:
                        df = get_computed_df(file_path, file, S3_REGION)
                        output_df = output_df.append(df)
                    except Exception as e:
                        print(str(e))
                        print('Exception for file: {}'.format(file_path))

            if len(output_df) > 0:
                output_df.to_csv(os.path.join(
                    OUTPUT_DIR,
                    'ShowcaseAds_Combined{}.tsv'.format(S3_REGION)),
                                 index=False,
                                 sep='\t')
            else:
                print('{} -- No sponsored results for Mobile step 5.b'.format(
                    S3_REGION))
        else:
            print('{} -- No sponsored results for Mobile step 5.b'.format(
                S3_REGION))
def main(client):
	output_df = pd.DataFrame()
	output_dir = BATCH_OUTPUT_DIR_MOBILE.format(client)
	
	for region in S3_REGION_LIST:
		print(region)
		
		region_related_path = os.path.join(output_dir, region, 'ShowcaseAds_Combined{}.tsv'.format(region))
		if os.path.exists(region_related_path):
			df = pd.read_csv(region_related_path, sep='\t')
			output_df = output_df.append(df)

	if len(output_df)>0:
		output_df.to_csv(os.path.join(output_dir,'ShowcaseAds_Combined_allregion.tsv'), index=False,sep='\t')
	else:
		print('No showcase for all regions.')
示例#5
0
def main(client, client_file_name):
    output_df = pd.DataFrame()

    output_dir_pc = BATCH_OUTPUT_DIR_PC.format(client)
    output_dir_mobile = BATCH_OUTPUT_DIR_MOBILE.format(client)

    pla_ads_pc_file_path = os.path.join(
        output_dir_pc, 'SponsoredResult_Combined_allregion.tsv')
    if os.path.exists(pla_ads_pc_file_path):
        df = pd.read_csv(pla_ads_pc_file_path, sep='\t')
        output_df = output_df.append(df)

    pla_ads_mobile_file_path = os.path.join(
        output_dir_mobile, 'SponsoredAds_Combined_allregion.tsv')
    if os.path.exists(pla_ads_mobile_file_path):
        df = pd.read_csv(pla_ads_mobile_file_path, sep='\t')
        output_df = output_df.append(df)

    showcase_ads_mobile_file_path = os.path.join(
        output_dir_mobile, 'ShowcaseAds_Combined_allregion.tsv')
    if os.path.exists(showcase_ads_mobile_file_path):
        df = pd.read_csv(showcase_ads_mobile_file_path, sep='\t')
        output_df = output_df.append(df)

    # Getting Only Top Ranked Items
    output_df_count = len(output_df)
    print(output_df_count)

    if output_df_count > 0:
        output_df = process_prospect_df(output_df, client, client_file_name)
        output_df['PLA Rank'] = output_df['PLA Rank'].astype(int)

        # Normalizing Field Value
        output_df['PLA Has Sale Tag'] = output_df['PLA Has Sale Tag'].fillna(
            value='').str.upper()

        column_list = output_df.columns.tolist()
        output_df['Prospect Name'] = client
        output_df = output_df[['Prospect Name'] + column_list]
        output_df.to_csv(MERGED_OUTPUT_PATH.format(client),
                         index=False,
                         sep='\t')
    else:
        print('No output of step6')
示例#6
0
 def __init__(self, thread_name, client_name):
     Thread.__init__(self)
     self.thread_name = thread_name
     self.client_name = client_name
     self.batch_output_dir = BATCH_OUTPUT_DIR_MOBILE.format(
         self.client_name)