def p_content_owner_video_metadata_a2(file_name, dataset_id, _content_owner, _table, _date): data = pd.read_table(file_name, sep=",") data = data.replace(np.nan, '', regex=True) table_ref = client.dataset(dataset_id).table(_table) table = client.get_table(table_ref) start_time = time.time() _arr = [] for i_row in range(len(data)): _video_id = data['video_id'][i_row] _channel_id = data['channel_id'][i_row] _channel_display_name = data['channel_display_name'][i_row] _time_uploaded = data['time_uploaded'][i_row] _time_published = data['time_published'][i_row] _video_title = data['video_title'][i_row] _video_length = data['video_length'][i_row] _views = data['views'][i_row] _comments = data['comments'][i_row] _video_privacy_status = data['video_privacy_status'][i_row] _video_url = data['video_url'][i_row] _category = data['category'][i_row] _embedding_allowed = data['embedding_allowed'][i_row] _ratings_allowed = data['ratings_allowed'][i_row] _comments_allowed = data['comments_allowed'][i_row] _claim_origin = data['claim_origin'][i_row] _content_type = data['content_type'][i_row] _upload_source = data['upload_source'][i_row] _claimed_by_this_owner = data['claimed_by_this_owner'][i_row] _claimed_by_another_owner = data['claimed_by_another_owner'][i_row] _other_owners_claiming = data['other_owners_claiming'][i_row] _offweb_syndicatable = data['offweb_syndicatable'][i_row] _claim_id = data['claim_id'][i_row] _asset_id = data['asset_id'][i_row] _custom_id = data['custom_id'][i_row] _effective_policy = data['effective_policy'][i_row] _third_party_video_id = data['third_party_video_id'][i_row] _third_party_ads_enabled = data['third_party_ads_enabled'][i_row] _display_ads_enabled = data['display_ads_enabled'][i_row] _sponsored_cards_enabled = data['sponsored_cards_enabled'][i_row] _overlay_ads_enabled = data['overlay_ads_enabled'][i_row] _nonskippable_video_ads_enabled = data[ 'nonskippable_video_ads_enabled'][i_row] _long_nonskippable_video_ads_enabled = data[ 'long_nonskippable_video_ads_enabled'][i_row] _skippable_video_ads_enabled = data['skippable_video_ads_enabled'][ i_row] _prerolls_enabled = data['prerolls_enabled'][i_row] _postrolls_enabled = data['postrolls_enabled'][i_row] _isrc = data['isrc'][i_row] _eidr = data['eidr'][i_row] _date = _date.replace("-", "") _arr.append( (str(_date), str(_video_id), str(_channel_id), str(_channel_display_name), str(_time_uploaded), str(_time_published), str(_video_title), int(_video_length), int(_views), int(_comments), str(_video_privacy_status), str(_video_url), str(_category), str(_embedding_allowed), str(_ratings_allowed), str(_comments_allowed), str(_claim_origin), str(_content_type), str(_upload_source), str(_claimed_by_this_owner), str(_claimed_by_another_owner), str(_other_owners_claiming), str(_offweb_syndicatable), str(_claim_id), str(_asset_id), str(_custom_id), str(_effective_policy), str(_third_party_video_id), str(_third_party_ads_enabled), str(_display_ads_enabled), str(_sponsored_cards_enabled), str(_overlay_ads_enabled), str(_nonskippable_video_ads_enabled), str(_long_nonskippable_video_ads_enabled), str(_skippable_video_ads_enabled), str(_prerolls_enabled), str(_postrolls_enabled), str(_isrc), str(_eidr))) if i_row != 0 and i_row % 1500 == 0: errors = client.insert_rows(table, _arr) assert errors == [] print( round((float(int(i_row) / float(len(data))) * 100), 2), "%", "------- %s seconds -------" % round( (time.time() - start_time), 2)) time.sleep(5) _arr = [] if len(_arr) > 0: errors = client.insert_rows(table, _arr) assert errors == [] print("------- Done! 100% uploaded. -------") time.sleep(5) _arr = []
def p_content_owner_ad_revenue_raw_a1(file_name, dataset_id, _content_owner, _table, _date): data = pd.read_table(file_name, sep=",") data = data.replace(np.nan, '', regex=True) table_ref = client.dataset(dataset_id).table(_table) table = client.get_table(table_ref) start_time = time.time() _arr = [] for i_row in range(len(data)): _adjustment_type = data['adjustment_type'][i_row] _date = data['date'][i_row] _country_code = data['country_code'][i_row] _video_id = data['video_id'][i_row] _video_title = data['video_title'][i_row] _video_duration_sec = data['video_duration_sec'][i_row] _category = data['category'][i_row] _channel_id = data['channel_id'][i_row] _uploader = data['uploader'][i_row] _channel_display_name = data['channel_display_name'][i_row] _content_type = data['content_type'][i_row] _policy = data['policy'][i_row] _owned_views = data['owned_views'][i_row] _youtube_revenue_split_auction = data['youtube_revenue_split_auction'][ i_row] _youtube_revenue_split_reserved = data[ 'youtube_revenue_split_reserved'][i_row] _youtube_revenue_split_partner_sold_youtube_served = data[ 'youtube_revenue_split_partner_sold_youtube_served'][i_row] _youtube_revenue_split_partner_sold_partner_served = data[ 'youtube_revenue_split_partner_sold_partner_served'][i_row] _youtube_revenue_split = data['youtube_revenue_split'][i_row] _partner_revenue_auction = data['partner_revenue_auction'][i_row] _partner_revenue_reserved = data['partner_revenue_reserved'][i_row] _partner_revenue_partner_sold_youtube_served = data[ 'partner_revenue_partner_sold_youtube_served'][i_row] _partner_revenue_partner_sold_partner_served = data[ 'partner_revenue_partner_sold_partner_served'][i_row] _partner_revenue = data['partner_revenue'][i_row] _arr.append( (str(_adjustment_type), str(_date), str(_country_code), str(_video_id), str(_video_title), int(_video_duration_sec), str(_category), str(_channel_id), str(_uploader), str(_channel_display_name), str(_content_type), str(_policy), int(_owned_views), float(_youtube_revenue_split_auction), float(_youtube_revenue_split_reserved), float(_youtube_revenue_split_partner_sold_youtube_served), float(_youtube_revenue_split_partner_sold_partner_served), float(_youtube_revenue_split), float(_partner_revenue_auction), float(_partner_revenue_reserved), float(_partner_revenue_partner_sold_youtube_served), float(_partner_revenue_partner_sold_partner_served), float(_partner_revenue))) if i_row != 0 and i_row % 1500 == 0: errors = client.insert_rows(table, _arr) assert errors == [] print( "------- ", round((float(int(i_row) / float(len(data))) * 100), 2), "%", " took %s seconds -------" % round( (time.time() - start_time), 2)) time.sleep(5) _arr = [] if len(_arr) > 0: errors = client.insert_rows(table, _arr) assert errors == [] print("------- Done! 100% Uploaded. -------") time.sleep(5) _arr = []
def p_content_owner_basic_a3(file_name, dataset_id, _content_owner, _table, _date): data = pd.read_table(file_name, sep=",") data = data.replace(np.nan, '', regex=True) table_ref = client.dataset(dataset_id).table(_table) table = client.get_table(table_ref) start_time = time.time() _arr = [] for i_row in range(len(data)): _date = data['date'][i_row] _channel_id = data['channel_id'][i_row] _video_id = data['video_id'][i_row] _claimed_status = data['claimed_status'][i_row] _uploader_type = data['uploader_type'][i_row] _live_or_on_demand = data['live_or_on_demand'][i_row] _subscribed_status = data['subscribed_status'][i_row] _country_code = data['country_code'][i_row] _views = data['views'][i_row] _comments = data['comments'][i_row] _shares = data['shares'][i_row] _watch_time_minutes = data['watch_time_minutes'][i_row] _average_view_duration_seconds = data['average_view_duration_seconds'][ i_row] _average_view_duration_percentage = data[ 'average_view_duration_percentage'][i_row] _annotation_impressions = data['annotation_impressions'][i_row] _annotation_clickable_impressions = data[ 'annotation_clickable_impressions'][i_row] _annotation_clicks = data['annotation_clicks'][i_row] _annotation_click_through_rate = data['annotation_click_through_rate'][ i_row] _annotation_closable_impressions = data[ 'annotation_closable_impressions'][i_row] _annotation_closes = data['annotation_closes'][i_row] _annotation_close_rate = data['annotation_close_rate'][i_row] _card_teaser_impressions = data['card_teaser_impressions'][i_row] _card_teaser_clicks = data['card_teaser_clicks'][i_row] _card_teaser_click_rate = data['card_teaser_click_rate'][i_row] _card_impressions = data['card_impressions'][i_row] _card_clicks = data['card_clicks'][i_row] _card_click_rate = data['card_click_rate'][i_row] _subscribers_gained = data['subscribers_gained'][i_row] _subscribers_lost = data['subscribers_lost'][i_row] _videos_added_to_playlists = data['videos_added_to_playlists'][i_row] _videos_removed_from_playlists = data['videos_removed_from_playlists'][ i_row] _likes = data['likes'][i_row] _dislikes = data['dislikes'][i_row] _red_views = data['red_views'][i_row] _red_watch_time_minutes = data['red_watch_time_minutes'][i_row] _arr.append( (str(_date), str(_channel_id), str(_video_id), str(_claimed_status), str(_uploader_type), str(_live_or_on_demand), str(_subscribed_status), str(_country_code), int(_views), int(_comments), int(_shares), float(_watch_time_minutes), float(_average_view_duration_seconds), float(_average_view_duration_percentage), int(_annotation_impressions), int(_annotation_clickable_impressions), int(_annotation_clicks), float(_annotation_click_through_rate), int(_annotation_closable_impressions), int(_annotation_closes), float(_annotation_close_rate), int(_card_teaser_impressions), int(_card_teaser_clicks), float(_card_teaser_click_rate), int(_card_impressions), int(_card_clicks), float(_card_click_rate), int(_subscribers_gained), int(_subscribers_lost), int(_videos_added_to_playlists), int(_videos_removed_from_playlists), int(_likes), int(_dislikes), int(_red_views), float(_red_watch_time_minutes))) if i_row != 0 and i_row % 1500 == 0: errors = client.insert_rows(table, _arr) assert errors == [] print( round((float(int(i_row) / float(len(data))) * 100), 2), "%", "------- %s seconds -------" % round( (time.time() - start_time), 2)) time.sleep(5) _arr = [] if len(_arr) > 0: errors = client.insert_rows(table, _arr) assert errors == [] print("------- Done! 100% Uploaded. -------") time.sleep(5) _arr = []
def p_content_owner_estimated_revenue_a1(file_name, dataset_id, _content_owner, _table, _date): data = pd.read_table(file_name, sep=",") data = data.replace(np.nan, '', regex=True) table_ref = client.dataset(dataset_id).table(_table) table = client.get_table(table_ref) start_time = time.time() _arr = [] for i_row in range(len(data)): _date = data['date'][i_row] _channel_id = data['channel_id'][i_row] _video_id = data['video_id'][i_row] _claimed_status = data['claimed_status'][i_row] _uploader_type = data['uploader_type'][i_row] _country_code = data['country_code'][i_row] _estimated_partner_revenue = data['estimated_partner_revenue'][i_row] _estimated_partner_ad_revenue = data['estimated_partner_ad_revenue'][ i_row] _estimated_partner_ad_auction_revenue = data[ 'estimated_partner_ad_auction_revenue'][i_row] _estimated_partner_ad_reserved_revenue = data[ 'estimated_partner_ad_reserved_revenue'][i_row] _estimated_youtube_ad_revenue = data['estimated_youtube_ad_revenue'][ i_row] _estimated_monetized_playbacks = data['estimated_monetized_playbacks'][ i_row] _estimated_playback_based_cpm = data['estimated_playback_based_cpm'][ i_row] _ad_impressions = data['ad_impressions'][i_row] _estimated_cpm = data['estimated_cpm'][i_row] _estimated_partner_red_revenue = data['estimated_partner_red_revenue'][ i_row] _estimated_partner_transaction_revenue = data[ 'estimated_partner_transaction_revenue'][i_row] _arr.append( (str(_date), str(_channel_id), str(_video_id), str(_claimed_status), str(_uploader_type), str(_country_code), float(_estimated_partner_revenue), float(_estimated_partner_ad_revenue), float(_estimated_partner_ad_auction_revenue), float(_estimated_partner_ad_reserved_revenue), float(_estimated_youtube_ad_revenue), int(_estimated_monetized_playbacks), float(_estimated_playback_based_cpm), int(_ad_impressions), float(_estimated_cpm), float(_estimated_partner_red_revenue), float(_estimated_partner_transaction_revenue))) if i_row != 0 and i_row % 1500 == 0: errors = client.insert_rows(table, _arr) assert errors == [] print( round((float(int(i_row) / float(len(data))) * 100), 2), "%", "------- %s seconds -------" % round( (time.time() - start_time), 2)) time.sleep(5) _arr = [] if len(_arr) > 0: errors = client.insert_rows(table, _arr) assert errors == [] print("------- Done! 100% Uploaded. -------") time.sleep(5) _arr = []
from apiclient.discovery import build from oauth2client.service_account import ServiceAccountCredentials import httplib2 from oauth2client import client from oauth2client import file from oauth2client import tools from google.cloud import bigquery import json import csv from datetime import datetime, date, time, timedelta import schedule import re # Create client, dataset and table objects client = bigquery.Client(project='XXXX-XXXX') dataset = client.dataset('DATASET_NAME') table = dataset.table('TABLE_NAME') # Get a service that communicates to a Google API. def get_service(api_name, api_version, scope, key_file_location, service_account_email): credentials = ServiceAccountCredentials.from_p12_keyfile(service_account_email, key_file_location, scopes=scope) http = credentials.authorize(httplib2.Http()) # Build the service object. service = build(api_name, api_version, http=http) return service # Init necessary dimensions and metrics industry = 'XXXXXXX'