示例#1
0
def get_computed_df(file_path, file, region):
    file = file.split('.')[0]
    df = pd.read_csv(file_path, sep='\t')
    df['Query'] = file.split('--')[0]
    # df['Query'] = df['Query'].str.replace('_',' ')

    # Convert GMT to EST
    date_time_stamp = file.split('--')[1].split('_showcase_ads')[0]
    date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S")
    est_date_time_obj = date_time_obj - timedelta(hours=5)
    df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S')

    column_list = df.columns.tolist()
    for col in TITLE_CONVERSION_DICT.keys():
        if col not in column_list:
            df[col] = ''

    for col in EXTRA_COLUMNS_FOR_SHOWCASE:
        if col not in column_list:
            df[col] = ''

    df['Min Price'] = df['price'].apply(lambda x: x.split('-')[0])
    df['Max Price'] = df['price'].apply(lambda x: x.split('-')[-1]
                                        if '-' in x else '')

    df['Store_Name'] = df['url'].apply(get_domain_url)
    df.rename(columns=TITLE_CONVERSION_DICT, inplace=True)
    total_product_count = len(df)
    df['PLA Rank'] = range(1, total_product_count + 1)
    df['Search Region'] = get_region_name(region)
    df['PLA/Showcase'] = 'Showcase'
    df['Search From'] = 'Mobile'
    df = df[OUTPUT_COLUMN_LIST]
    return df
	def get_computed_df(self, file_path, file, region):
		file = file.split('.')[0]
		df = pd.read_csv(file_path, sep='\t')
		df['Query'] = file.split('--')[0]
		# df['Query'] = df['Query'].str.replace('_',' ')
		
		# Convert GMT to EST
		date_time_stamp = file.split('--')[1].split('_brands_related')[0]
		date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S")
		est_date_time_obj = date_time_obj - timedelta(hours=5)
		df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S')
		df['ASIN'] = df['url'].apply(lambda x: x.split('asins=')[1].split('&')[0] if type(x)!= float else x)
		
		df.rename(columns=
					{	
						'Query' : 'Search Keyword',
						'text' : 'Brand Tagline',
						'name' : 'Brand',
						},

					inplace=True)

		total_product_count = len(df)
		df['Result Rank'] = range(1, total_product_count+1)
		df['Search Region'] = get_region_name(region)
		df['Search From'] = 'Desktop'
		df = df[OUTPUT_COLUMN_LIST]
		return df
	def get_computed_df(self, file_path, file, region):
		file = file.split('.')[0]
		df = pd.read_csv(file_path, sep='\t')
		df['Query'] = file.split('--')[0]
		df['Product Sponsored'] = 'Yes'
		# df['Query'] = df['Query'].str.replace('_',' ')
		
		# Convert GMT to EST
		date_time_stamp = file.split('--')[1].split('_today_deals')[0]
		date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S")
		est_date_time_obj = date_time_obj - timedelta(hours=5)
		df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S')

		# df['rating'] = df['rating'].apply(lambda x: x.split(' ')[0].replace('(','').replace(')','') if type(x)!=float else x)
		df['rating'] = df['rating'].apply(lambda x: x.split(' ')[0] if type(x)!=float else x)

		df['total_ratings'] = df['total_ratings'].astype(str).fillna(value='')
		df['total_ratings'] = df['total_ratings'].str.replace(',','')
		# df['total_ratings'] = df['total_ratings'].str.replace('k+','000').str.replace('+','')
		df['ASIN'] = df['url'].apply(lambda x: x.split('dp/')[1].split('/')[0])

		df.rename(columns=
					{	
						'Query' : 'Search Keyword',
						'name' : 'Product Title',
						'rating' : 'Product Ratings',
						'total_ratings' : 'Product Reviews',
						'sale_price' :'Product Sale Price',
						'marked_price' : 'Product Marked Price',
						'prime_info' : 'Product Is Prime',
						'sale_price' : 'Product Sale Price',
						'marked_price' : 'Product Marked Price'
						},

					inplace=True)

		total_product_count = len(df)
		df['Result Rank'] = range(1, total_product_count+1)
		df['Search Region'] = get_region_name(region)
		df['Search From'] = 'Desktop'
		df = df[OUTPUT_COLUMN_LIST]
		return df
示例#4
0
	def get_computed_df(self, file_path, file, region):
		file = file.split('.')[0]
		df = pd.read_csv(file_path, sep='\t')
		df['Query'] = file.split('--')[0]
		# df['Query'] = df['Query'].str.replace('_',' ')
		
		# Convert GMT to EST
		date_time_stamp = file.split('--')[1].split('_serp_result')[0]
		date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S")
		est_date_time_obj = date_time_obj - timedelta(hours=5)
		df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S')

		df['rating'] = df['rating'].apply(lambda x: x.split(' ')[0] if type(x)!=float else x)
		df['total_ratings'] = df['total_ratings'].astype(str).fillna(value='')
		df['total_ratings'] = df['total_ratings'].str.replace(',','')
		df['ASIN'] = df['url'].apply(self.get_asin_from_url)		# Redirection needed

		df.rename(columns=
					{	
						'Query' : 'Search Keyword',
						'name' : 'Product Title',
						'rating' : 'Product Ratings',
						'total_ratings' : 'Product Reviews',
						'sale_price' :'Product Sale Price',
						'marked_price' : 'Product Marked Price',
						'prime_info' : 'Product Is Prime',
						'sponsored' : 'Product Sponsored',
						'stock_info' : 'Product Stock Info',
						'coupon_info' : 'Product Coupon Info',
						'shipping_info' : 'Product Shipping Info',
						'more_buy_info' : 'Product More Buy Info',
						},

					inplace=True)

		total_product_count = len(df)
		df['Result Rank'] = range(1, total_product_count+1)
		df['Search Region'] = get_region_name(region)
		df['Search From'] = 'Desktop'
		df = df[OUTPUT_COLUMN_LIST]
		return df
示例#5
0
	def get_computed_df(self, file_path, file, region):
		file = file.split('.')[0]
		df = pd.read_csv(file_path, sep='\t')
		del df['image']
		del df['return_policy']
		df['url'] = df['url'].fillna(value='')
		df['price'] = df['price'].fillna(value='')
		df['review'] = df['review'].fillna(value='')
		df['Query'] = file.split('--')[0]
		# df['Query'] = df['Query'].str.replace('custom_t_shirts','Custom T-shirts').str.replace('men_s_shoes','Men\'s shoes').str.replace('red_sweater','Red Sweater')
		# df['Query'] = df['Query'].str.replace('_',' ')

		# Convert GMT to EST
		date_time_stamp = file.split('--')[1].split('_sponsored_')[0]
		date_time_obj = datetime.strptime(date_time_stamp, "%d_%m_%Y__%H_%M_%S")
		est_date_time_obj = date_time_obj - timedelta(hours=5)
		df['Search Datetime'] = est_date_time_obj.strftime('%m-%d-%Y:%H:%M:%S')

		df['Store_Name'] = df['url'].apply(self.get_domain_url)

		df['url'] = df['url'].apply(lambda x: x.split('?')[0] if 'google.com' in x and 'www.googleadservices.com/pagead/aclk' not in x else x)
		df['price_drop'] = df['price_drop'].apply(lambda x: x.split('%')[0] if type(x)!=float else x)

		df['Min Price'] = df['price'].apply(lambda x: x.split('-')[0])
		df['Max Price'] = df['price'].apply(lambda x: x.split('-')[-1] if '-' in x else '')

		df['rating'] = df['rating'].apply(lambda x: x.split(' ')[1] if type(x)!=float else x)
		df['review'] = df['review'].apply(lambda x: x.split(' ')[0].replace('(','').replace(')','') if type(x)!=float else x)
		df['review'] = df['review'].fillna(value='')
		df['review'] = df['review'].str.replace('k+','000').str.replace('+','')

		# df['price'] = df['price'].apply(self.get_range_price)
		df.rename(columns=
					{
					'brand':'PLA Store',
					'name': 'PLA Title',
					'url' : 'PLA URL',
					'price' : 'PLA Price Text',
					'Min Price' : 'PLA Price MIN',
					'Max Price' : 'PLA Price MAX',
					'review' : 'PLA Reviews',
					'rating' : 'PLA Ratings',
					'price_drop' : 'PLA PriceDrop Percentage',
					'Hour': 'Search Hour',
					'Date': 'Search Date',
					'Query': 'Search Keyword',
					'PRICEC DROP TAG': 'PLA Has Price Drop Tag',
					'SALE TAG': 'PLA Has Sale Tag',
					'Store_Name': 'PLA Domain',
					'In Store/Pick up Today' : 'PLA Has Special Tags'
					},

						inplace=True)
		

		total_product_count = len(df)

		df['PLA Rank'] = range(1, total_product_count+1)
		df['Search Region'] = get_region_name(region)
		df['PLA/Showcase'] = 'PLA'
		df['Search From'] = 'Desktop'
		df = df[df['PLA URL']!='']
							
		df = df[self.output_column_list]
		return df