def getInsightViews(start_date, end_date): logger.info(" extral data from Dashboard.matchedVideoViewCountCompletion start") date_dict ={"start_date": start_date, "end_date": end_date} try: get_data_sql = """ select matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, current_timestamp as ETLDate from matchedVideoViewCountCompletion where report_at > "%(start_date)s" and report_at <= "%(end_date)s" """ %date_dict insight_mysql = MySQLHelper(host=insight_host, user=insight_user,passwd=insight_passwd, port = insight_port, db_name = insight_db) data = insight_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """insert into matchedVideoViewCountCompletion (matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, ETLDate) values(%s, %s, %s, %s, %s, %s, %s) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from Dashboard.matchedVideoViewCountCompletion, %s" %e) sendToMe(subject = "titleBased_matchedVideoViewCountCompletio ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateTrackingWebsite(): sendToMe(subject = "update TrackingWebsite start", body = "update TrackingWebsite table: SiteBased TitleBased1 TitleBasedRemoveNum1") logger.info("update TrackingWebsite start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteDomain = b.websiteDomain, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ update_TitleBased1_SQL = """ update TitleBased1 as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ update_TitleBasedRemoveNum1_SQL = """ update TitleBasedRemoveNum1 as a, TitleBasedTrackingWebsite as b set a.websiteName = b.websiteName, a.websiteType = b.websiteType where a.trackingWebsite_id = b.trackingWebsite_id """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') target_mysql.queryCMD(update_SiteBased_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBased1_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBasedRemoveNum1_SQL) target_mysql.commit() except Exception, e: sendToMe(subject = "update TrackingWebsite ERROR", body = e) logger.DEBUG("update TrackingWebsite data %s" %e)
def dataTo_matchedVideoTmp(): logger.info("load data target matchedVideoTmp start") sendToMe(subject = "matchedVideoTmp start", body = "matchedVideoTmp start") target_db = "FOX_DASHBOARD" min_reportDate = getMinDatePara("matchedVideoTmp", "created_at", db = target_db) min_updateDate = getMinDatePara("matchedVideoTmp", "updated_at", db = target_db) if not min_reportDate: min_reportDate = "2015-03-01" if not min_updateDate: min_updateDate = "0000-00-00 00:00:00" data = get_matchedVideo(min_reportDate, min_updateDate) insert_sql = """ insert into matchedVideoTmp (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE trackingMeta_id = values(trackingMeta_id), trackingWebsite_id = values(trackingWebsite_id), view_count = values(view_count), count_send_notice = values(count_send_notice), first_send_notice_date = values(first_send_notice_date), takeoff_time = values(takeoff_time), hide_flag = values(hide_flag), clip_duration = values(clip_duration), matchedFile_id = values(matchedFile_id), meta_title = values(meta_title), created_at = values(created_at), updated_at = values(updated_at) """ dataToTarget(data, target_db, insert_sql) sendToMe(subject = "matchedVideoTmp end", body = "matchedVideoTmp end") logger.info("load data target matchedVideoTmp end")
def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedTrackingWebsite_SQL = """ select a.trackingWebsite_id, b.website_type as websiteType, a.display_name as websiteName, b.website_domain as websiteDomain, b.country_id, CURRENT_TIMESTAMP as ETLDate from trackingWebsiteExtraInfo as a, mddb.trackingWebsite as b where a.trackingWebsite_id = b.id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedTrackingWebsite_SQL) except Exception, e: logger.debug(": extract data from vt for dimension trackingWebsite, %s" %e) sendToMe(subject = "TitleBasedTrackingWebsite ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def dataTo_matchedFileItem(): logger.info("load data target matchedFileItem start") sendToMe(subject = "matchedFileItem start", body = "matchedFileItem start") get_data_sql = """ select b.* from tracker2.matchedVideo as a, tracker2.matchedFileItem as b, mddb.trackingWebsite as c where a.matchedFile_id = b.matchedFile_id and (c.website_type = "cyberlocker" or c.website_type = "hybrid") and a.trackingWebsite_id = c.id and a.company_id = 10 and a.created_at >= "2015-03-01" """ insert_sql = """ insert into matchedFileItem (id, matchedFile_id, trackingWebsite_id, key_id, file_name, file_size, clip_url, takeoff_time, takeoff_type) values(%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE file_name = values(file_name), file_size = values(file_size), clip_url = values(clip_url), takeoff_time = values(takeoff_time), takeoff_type = values(takeoff_type) """ target_db = "FOX_DASHBOARD" data0 = getData(get_data_sql) data = [(item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7] if item[7] else "0000-00-00 00:00:00", item[8]) for item in data0] dataToTarget(data, target_db, insert_sql) sendToMe(subject = "matchedFileItem end", body = "matchedFileItem end") logger.info("load data target matchedFileItem end")
def dataTo_matchedVideo(): logger.info("load data target matchedVideo start") sendToMe(subject = "matchedVideo start", body = "matchedVideo start") try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = "FOX_DASHBOARD", port = target_port, charset = 'utf8') sql = """ insert into FOX_DASHBOARD.matchedVideo (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at) select a.id, a.company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at from FOX_DASHBOARD.matchedVideoTmp as a, FOX_DASHBOARD.matchedVideoSequence as b where a.id = b.id and a.company_id = b.company_id """ target_mysql.queryNoData("delete from FOX_DASHBOARD.matchedVideo") target_mysql.queryNoData(sql) target_mysql.commit() except Exception, e: logger.debug("extract data from 192.168.111.235 tracker2 error, %s" %e) sendToMe(subject = "extract data from 192.168.111.235 tracker2", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateSiteBasedAlexa(): sendToMe(subject = "update alexa data start", body = "update alexa data start") logger.info(" updateSiteBasedAlexa start") update_SQL1 = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.reportDate > (select max(reportDate) from SiteBasedAlexa); """ update_SQL2 = """ update SiteBased as a, SiteBasedAlexa as b set a.alexaGlobalRank = b.alexaGlobalRank, a.alexaTopCountry = b.alexaTopCountry where a.trackingWebsite_id = b.trackingWebsite_id and b.reportDate = (select max(reportDate) from SiteBasedAlexa) and a.alexaTopCountry = "unknown" and a.alexaGlobalRank = 0; """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') target_mysql.queryCMD(update_SQL1) target_mysql.commit() target_mysql.queryCMD(update_SQL2) target_mysql.commit() except Exception, e: sendToMe(subject = "update alexa data ERROR", body = e) logger.DEBUG("update alexa data %s" %e)
def dataTo_country(): logger.info("load data target country start") sendToMe(subject = "country start", body = "country start") get_data_sql = """ select id, region, country_code, country_name, language_id, national_flag, longitude, latitude, created_at, updated_at from mddb.country; """ insert_sql = """ insert into country (id, region, country_code, country_name, language_id, national_flag, longitude, latitude, created_at, updated_at) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE region = values(region), country_code = values(country_code), country_name = values(country_name), language_id = values(language_id), national_flag = values(national_flag), longitude = values(longitude), latitude = values(latitude), created_at = values(created_at), updated_at = values(updated_at) """ target_db = "FOX_DASHBOARD" data = getData(get_data_sql) dataToTarget(data, target_db, insert_sql) sendToMe(subject = "country end", body = "country end") logger.info("load data target country end")
def main(): today = time.strftime("%Y-%m-%d") subject_start = "update alexa data start " + today sendToMe(subject = subject_start, body = "update alexa data (SiteBased table) start") updateSiteBasedAlexa() subject_end = "update alexa data end " + today sendToMe(subject_end, body = "update alexa data (SiteBased table) end")
def dataToTarget(data, db): logger.info("load data target matchedVideo start") try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = db, port = target_port, charset = 'utf8') insert_sql = """ insert into Test (id, company_id, trackingMeta_id, trackingWebsite_id, view_count, count_send_notice, first_send_notice_date, takeoff_time, hide_flag, clip_duration, matchedFile_id, meta_title, created_at, updated_at) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE trackingMeta_id = values(trackingMeta_id), trackingWebsite_id = values(trackingWebsite_id), view_count = values(view_count), count_send_notice = values(count_send_notice), first_send_notice_date = values(first_send_notice_date), takeoff_time = values(takeoff_time), hide_flag = values(hide_flag), clip_duration = values(clip_duration), matchedFile_id = values(matchedFile_id), meta_title = values(meta_title), created_at = values(created_at), updated_at = values(updated_at) """ commitInTurn(commit_num = 50000, data = data, executeFun = target_mysql.executeManyCMD, \ commitFun = target_mysql.commit, executeSQL =insert_sql) #target_mysql.executeManyCMD(insert_sql, data) #target_mysql.commit() except Exception, e: logger.debug("extral data from tracker2.matchedVideo, %s" %e) sendToMe(subject = "matchedVideo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def main(): sendToMe(subject = "titleBased_infringAllViews start", body = "titleBased_infringAllViews start") matchedVideo_start_date = getMinDatePara("matchedVideo", "firstSendNoticeDate") if not matchedVideo_start_date: matchedVideo_start_date = "2015-02-28" while True: if str(matchedVideo_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365))): break matchedVideo_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(matchedVideo_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getMatchedVideo(start_date = matchedVideo_start_date, end_date = matchedVideo_end_date) matchedVideo_start_date = getMinDatePara("matchedVideo", "firstSendNoticeDate") # --------------------------------------------------------------------------------------------------------------------- insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") if not insight_start_date: insight_start_date = "2015-02-28" while True: if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365))): break insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getInsightViews(start_date = insight_start_date, end_date = insight_end_date) insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") sendToMe(subject = "titleBased_infringAllViews end", body = "titleBased_infringAllViews end")
def getInsightViews(start_date, end_date): logger.info(" extral data from Dashboard.matchedVideoViewCountCompletionAll start") date_dict ={"start_date": start_date, "end_date": end_date} target_server_section = "target_server_staging" insight_server_section = "insight" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) insight_host, insight_user, insight_passwd, insight_port, insight_db = getConfMysqlInfo(insight_server_section) try: get_data_sql = """ select matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, hide_flag, current_timestamp as ETLDate from matchedVideoViewCountCompletionAll where report_at > "%(start_date)s" and report_at <= "%(end_date)s" """ %date_dict insight_mysql = MySQLHelper(host=insight_host, user=insight_user,passwd=insight_passwd, port = insight_port, db_name = insight_db) data = insight_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """ insert into matchedVideoViewCountCompletionAll (matchedVideo_id, trackingWebsite_id, trackingMeta_id, company_id, report_at, view_count, hide_flag, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s) """ commitInTurn(commit_num=100000, data=data, executeFun=target_mysql.executeManyCMD, \ commitFun=target_mysql.commit, executeSQL=insert_sql) #target_mysql.executeManyCMD(insert_sql, data) #target_mysql.commit() except Exception, e: logger.debug("extral data from Dashboard.matchedVideoViewCountCompletionAll, %s" %e) sendToMe(subject = "titleBased_matchedVideoViewCountCompletionAll ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def loadDataToTitleBasedRemoveNum1(): logger.info(":extract data from TitleBasedRemoveNum start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_SQL = """ select a.reportDate, a.takeoffDate, a.trackingWebsite_id, ifnull(c.displayName, a.websiteName) as websiteName, a.websiteType, ifnull(b.mapTitle, a.title) as title, sum(removedNum) as removedNum, sum(complianceTime) as complianceTime, current_timestamp as ETLDate from (select a.reportDate, a.takeoffDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, sum(removedNum) as removedNum, sum(complianceTime) as complianceTime from TitleBasedRemoveNumTmp as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5, 6) as a left join MetaTitleMapTitle as b on a.title = b.metaTitle left join SiteMap as c on a.trackingWebsite_id = c.trackingWebsite_id group by 1, 2, 3, 4, 5, 6 """ target_mysql.queryNoData("delete from TitleBasedRemoveNum") aggregate_result = target_mysql.queryCMD(aggregate_SQL) insertUpdate_SQL = """ INSERT INTO TitleBasedRemoveNum (reportDate, takeoffDate, trackingWebsite_id, websiteName, websiteType, title, removedNum, complianceTime, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE removedNum = VALUES(removedNum), complianceTime = VALUES(complianceTime), ETLDate = VALUES(ETLDate) """ target_mysql.queryNoData("delete from TitleBasedRemoveNum") commitInTurn(commit_num = 50000, data = aggregate_result, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insertUpdate_SQL) #target_mysql.insertUpdateCMD(insertUpdate_SQL, aggregate_result) #target_mysql.commit() except Exception, e: logger.debug(" load data to TitleBasedRemoveNum, %s" %e) sendToMe(subject = "TitleBasedRemove ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def main(): sendToMe(subject = "metaExtraInfo start", body = "metaExtraInfo start") cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" job_name = "metaExtraInfo" judgeFileExist(cfg_file, job_name) target_db = "tracker2" data = getData() dataToTarget(data, target_db) sendToMe(subject = "metaExtraInfo end", body = "metaExtraInfo end")
def updateViews(start_date, end_date): logger.info(" aggregate data from matchedVideoViewCountCompletion start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) date_dict = {"start_date": start_date, "end_date": end_date} try: get_data_sql = """ select a.reportDate, a.trackingWebsite_id, ifnull(c.displayName, a.websiteName) as websiteName, a.websiteType, ifnull(b.mapTitle, a.title) title, sum(a.infringingViews) as infringingViews, sum(a.reportedViews) as reportedViews, current_timestamp as ETLDate from (select a.report_at as reportDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, sum(if(a.hide_flag = 2, a.view_count, 0)) as infringingViews, sum(a.view_count) as reportedViews from matchedVideoViewCountCompletionAll as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id and c.websiteType = 'ugc' and a.report_at > "%(start_date)s" and a.report_at <= "%(end_date)s" group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a.title = b.metaTitle left join SiteMap as c on a.trackingWebsite_id = c.trackingWebsite_id group by 1, 2, 3, 4, 5 """ % date_dict target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name=target_db, port=target_port, charset='utf8') data = target_mysql.queryCMD(get_data_sql) insert_sql = """insert into TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, infringingViews, reportedViews, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE infringingViews = values(infringingViews), reportedViews = values(reportedViews), ETLDate = values(ETLDate), websiteName = VALUES(websiteName) """ commitInTurn(commit_num=100000, data=data, executeFun=target_mysql.executeManyCMD, \ commitFun=target_mysql.commit, executeSQL=insert_sql) except Exception, e: logger.debug("aggregate data to TitleBased1 ERROR , %s" % e) sendToMe(subject="titleBased_infringAllViews ERROR", body=re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getData(sql): logger.info(" extract data from tracker2.metaExtraInfo start") try: vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) data = vtweb_mysql.queryCMD(sql) except Exception, e: logger.debug("extract data from tracker2.metaExtraInfo, %s" %e) sendToMe(subject = "metaExtraInfo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getMinDatePara(table_name, date_para, db = "FOX_DASHBOARD"): #get max report date from table TitleBased target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user,passwd=target_passwd, port = target_port, db_name = db) sql = "select max(%s) from %s" %(date_para, table_name) min_date = target_mysql.queryCMD(sql)[0][0] #if min_date == None: # min_date = time.strftime('%Y-%m-%d',time.localtime(time.time() - 24 * 60 * 60 * 365)) except Exception , e: logger.debug(": get last max report date from %s, %s" %(table_name, e)) sendToMe(subject = table_name, body = re.sub(r'\'|"|!', "", str(e)))
def main(): cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" %cfg_file) sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start") target_server_section = "staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') if True: f = open("id_dis_domain", "r") for line in f.readlines(): line = line.strip("\n") base_url = "http://www.alexa.com/siteinfo/" id, display_name, domain = line.split(",")[1], line.split(",")[2], line.split(",")[3] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [("2016-05-31", id, display_name, alexaGlobalRank,alexaTopCountry,1)] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into Website_Alexa_Info (CreateDate, WebsiteId, DisplayName, Rank, TopOneCountry, IsEndOfMonth) values (%s, %s, %s, %s, %s, %s) on duplicate key update Rank = values(Rank), TopOneCountry = values(TopOneCountry) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def addAlexa(d): sendToMe(subject = "add alexa data start", body = "add alexa data start") logger.info(" addSiteBasedAlexa start") get_alexa_sql = "select '%s', trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, current_timestamp() from SiteBasedAlexa where reportDate = (select max(reportDate) from SiteBasedAlexa);" %d insert_alexa_sql = "insert into SiteBasedAlexa(reportDate,trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry,ETLDate) VALUES(%s,%s,%s,%s,%s,%s);" try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_result = target_mysql.queryCMD(get_alexa_sql) target_mysql.insertUpdateCMD(insert_alexa_sql, alexa_result) target_mysql.commit() except Exception, e: sendToMe(subject = "add alexa data ERROR", body = e) logger.DEBUG("add alexa data %s" %e)
def main(): sendToMe(subject = "matchedVideoViewCountCompletion start", body = "matchedVideoViewCountCompletion start") insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") if not insight_start_date: insight_start_date = "2015-02-28" while True: if str(insight_start_date) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 2 * 24 * 60 * 60))): break insight_end_date = time.strftime("%Y-%m-%d", time.localtime(time.mktime(time.strptime(str(insight_start_date), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) getInsightViews(start_date = insight_start_date, end_date = insight_end_date) insight_start_date = getMinDatePara("matchedVideoViewCountCompletion", "report_at") sendToMe(subject = "matchedVideoViewCountCompletion end", body = "matchedVideoViewCountCompletion end")
def dataToTarget(data, db, insert_sql): logger.info("load data target metaExtraInfo start") try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = db, port = target_port, charset = 'utf8') commitInTurn(commit_num = 50000, data = data, executeFun = target_mysql.executeManyCMD, \ commitFun = target_mysql.commit, executeSQL =insert_sql) #target_mysql.executeManyCMD(insert_sql, data) #target_mysql.commit() except Exception, e: logger.debug("extral data from tracker2.metaExtraInfo, %s" %e) sendToMe(subject = "metaExtraInfo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getData(): logger.info(" extract data from tracker2.metaExtraInfo start") try: get_data_sql = """ select meta_id, company_id, display_name, priority_type, created_at, updated_at from tracker2.metaExtraInfo where company_id = 14 """ vtweb_tracker2_section = "vtweb" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) data = vtweb_mysql.queryCMD(get_data_sql) except Exception, e: logger.debug("extract data from tracker2.metaExtraInfo, %s" %e) sendToMe(subject = "metaExtraInfo ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def updateCountry(): sendToMe(subject = "update Country start", body = "update Country table: SiteBased") logger.info("update Country for table SiteBased start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedCountry as b set a.hostCountry = b.countryName where a.country_id = b.country_id """ try: target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') target_mysql.queryCMD(update_SiteBased_SQL) target_mysql.commit() except Exception, e: sendToMe(subject = "update TrackingWebsite ERROR", body = e) logger.DEBUG("update TrackingWebsite data %s" %e)
def main(): sendToMe(subject = "matchedVideo start", body = "matchedVideo start") cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" job_name = "matchedVideo" judgeFileExist(cfg_file, job_name) target_db = "tracker2" min_reportDate = getMinDatePara("Test", "created_at", db = target_db) min_updateDate = getMinDatePara("Test", "updated_at", db = target_db) if not min_reportDate: min_reportDate = "2015-03-01" if not min_updateDate: min_updateDate = "0000-00-00 00:00:00" min_reportDate = '2016-05-08' data = getMatchedVideo(min_reportDate, min_updateDate) dataToTarget(data, target_db) sendToMe(subject = "matchedVideo end", body = "matchedVideo end")
def loadDataTo123(vtweb_data): logger.info(":load data to TitleBasedMeta start") target_server_section = "target_server_staging" try: target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') insertUpdate_SQL = """ INSERT INTO TitleBasedMeta(trackingMeta_id, metaType, title, ETLDate) VALUES(%s, %s, %s, %s) on duplicate key update title = values(title), ETLDate = values(ETLDate), metaType = values(metaType) """ commitInTurn(commit_num = 50000, data = vtweb_data, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insertUpdate_SQL) #target_mysql.insertUpdateCMD(insertUpdate_SQL, result) #target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBasedMeta, %s" %e) sendToMe(subject = "titleBased_meta ERROR", body = e) sys.exit(0)
def dataToMetaExtraInfo(): sendToMe(subject = "metaExtraInfo start", body = "metaExtraInfo start") get_data_sql = """ select meta_id, company_id, display_name, priority_type, created_at, updated_at from tracker2.metaExtraInfo where company_id = 14 """ insert_sql = """ insert into metaExtraInfo (meta_id, company_id, display_name, priority_type, created_at, updated_at) values(%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE display_name = values(display_name), priority_type = values(priority_type), created_at = values(created_at), updated_at = values(updated_at) """ target_db = "tracker2" data = getData(get_data_sql) dataToTarget(data, target_db, insert_sql) sendToMe(subject = "metaExtraInfo end", body = "metaExtraInfo end")
def dataTo_trackingWebsiteExtraInfo(): logger.info("load data target trackingWebsiteExtraInfo start") sendToMe(subject = "trackingWebsiteExtraInfo start", body = "trackingWebsiteExtraInfo start") get_data_sql = """ select trackingWebsite_id, display_name, created_at, updated_at from tracker2.trackingWebsiteExtraInfo """ insert_sql = """ insert into trackingWebsiteExtraInfo (trackingWebsite_id, display_name, created_at, updated_at) values(%s, %s, %s, %s) ON DUPLICATE KEY UPDATE display_name = values(display_name), created_at = values(created_at), updated_at = values(updated_at) """ target_db = "FOX_DASHBOARD" data = getData(get_data_sql) dataToTarget(data, target_db, insert_sql) sendToMe(subject = "trackingWebsiteExtraInfo end", body = "trackingWebsiteExtraInfo end") logger.info("load data target trackingWebsiteExtraInfo end")
def main(): sendToMe(subject = "matchedVideo start", body = "matchedVideo start") while True: min_reportDate = getMinDatePara("matchedVideo", "reportDate") if not min_reportDate: min_reportDate = "2015-02-28" max_reportDate = time.strftime("%Y-%m-%d", \ time.localtime(time.mktime(time.strptime(str(min_reportDate), "%Y-%m-%d")) + 1 * 24 * 60 * 60)) min_updateDate = getMinDatePara("matchedVideo", "updateDate") if not min_updateDate: min_updateDate = "2015-02-28" if str(max_reportDate) >= str(time.strftime('%Y-%m-%d',time.localtime(time.time() - 1 * 24 * 60 * 60))): break getMatchedVideo(min_reportDate, max_reportDate, min_updateDate) sendToMe(subject = "matchedVideo end", body = "matchedVideo end")
def loadDataTo123(vtweb_data): logger.info(":load data to TitleBasedTrackingWebsite start") target_server_section = "target_server_staging" try: target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') insertUpdate_SQL = """ INSERT INTO TitleBasedTrackingWebsite (trackingWebsite_id, websiteType, websiteName, websiteDomain, country_id, ETLDate) VALUES(%s, %s, %s, %s, %s, %s) on duplicate key update websiteDomain = values(websiteDomain), websiteType = values(websiteType), ETLDate = values(ETLDate), websiteName = values(websiteName), country_id = values(country_id) """ result1 = [(item[0], item[1], item[2], re.sub(r"_ugc|_hybrid|_Hybrid|_cyberlocker|_UGC|_Cy|_cy", '', item[3]), item[4], item[5]) for item in vtweb_data] commitInTurn(commit_num = 50000, data = result1, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insertUpdate_SQL) except Exception, e: logger.debug(": load data to TitleBasedTrackingWebsite, %s" %e) sendToMe(subject = "TitleBasedTrackingWebsite ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedMeta_SQL = """ select a.id as trackingMeta_id, a.meta_type, a.meta_title as title, CURRENT_TIMESTAMP as ETLDate from mddb.meta as a, tracker2.metaExtraInfo as b where a.company_id = 14 and b.company_id = 14 and a.id = b.meta_id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try: vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) result = vtweb_mysql.queryCMD(vt_TitleBasedMeta_SQL) except Exception, e: logger.debug(": extract data from vt for dimension meta, %s" %e) sendToMe(subject = "titleBased_meta ERROR", body = e) sys.exit(0)
logger = logging.getLogger("titleBased_CMS") logger.setLevel(logging.DEBUG) log_file = '/Job/VIACOM/Dashboard/TitleBased/log/titleBased_CMS.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5 * 1024 * 1024, backupCount=10, mode='a') formatter = logging.Formatter( '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject="titleBased_CMS ERROR", body="config file not exists") sys.exit(0) ################################################################################################################################# target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) ################################################################################################################################# sendToMe( subject="titleBased_CMS start", body= "aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) start" ) ##CMS data and reportedViews (UGC) from SelfService_Aggregate_ByNoticedDate aggregate to TitleBased1 #-------------------------------------------------------------------------------------------------------------------------------- logger.info( " aggregate data from DM_VIACOM.SelfService_Aggregate_ByNoticedDate (CMS data) start"
from titleBased import getConfMysqlInfo, getMinDatePara from sendMail import sendToMe import re logger = logging.getLogger("titleBased_matchedVideo") logger.setLevel(logging.DEBUG) log_file = '/Job/VIACOM/Dashboard/TitleBased/log/titleBased_matchedVideo.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a') formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject = "titleBased_matchedVideo ERROR", body = "config file not exists") sys.exit(0) ################################################################################################################################# target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) insight_server_section = "insight" insight_host, insight_user, insight_passwd, insight_port, insight_db= getConfMysqlInfo(insight_server_section) vtweb_tracker2_section = "vtweb_staging" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo(vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) ################################################################################################################################# def getMatchedVideo(min_reportDate, max_reportDate, min_updateDate): logger.info(" extral data from tracker2.matchedVideo start")
def main(): cfg_file = "/Job/HBO/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" %cfg_file) sendToMe(subject = "SiteBased_alexa ERROR", body = "config file not exists") sys.exit(0) logger.info(": extract data from siteBased start") socket.setdefaulttimeout(10.0) sendToMe(subject = "SiteBased_alexa start", body = "extract data from siteBased start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') alexa_date_min = getMinDatePara("SiteBasedAlexa", "reportDate") alexa_date_min = '2016-04-12' if alexa_date_min == None: alexa_date_min = time.strftime("%Y-%m-%d", time.localtime(time.time() - 10 * 24 * 60 * 60)) alexa_date_max = time.strftime("%Y-%m-%d", time.localtime(time.time() - 1 * 24 * 60 * 60)) delt = time.mktime(time.strptime(str(alexa_date_max), "%Y-%m-%d")) - time.mktime(time.strptime(str(alexa_date_min), "%Y-%m-%d")) if delt >= 10*24*60*60: alexa_date_dict = {"alexa_date_min": alexa_date_min, "alexa_date_max": alexa_date_max} site_SQL = """ select distinct trackingWebsite_id, websiteDomain from SiteBased where reportDate <= "%(alexa_date_max)s" and reportDate > "%(alexa_date_min)s" and alexaGlobalRank = 0 and alexaTopCountry = "unknown" """ %alexa_date_dict site_info = target_mysql.queryCMD(site_SQL) if site_info: base_url = "http://www.alexa.com/siteinfo/" for site in site_info: domain = site[1] url = base_url + domain run_time = 0 alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) while True: run_time += 1 if alexaGlobalRank == 0 or alexaTopCountry == "unknown": alexaGlobalRank, alexaTopCountry = getAlexaInfo(url) else: break if run_time == 3: break time.sleep(random.randint(5, 8)) time.sleep(random.randint(5, 8)) alexa_info_tuple = [(alexa_date_max, site[0], site[1], alexaGlobalRank, alexaTopCountry, time.strftime("%Y-%m-%d %H:%M:%S"))] if not (alexaGlobalRank == 0 and alexaTopCountry == "unknown"): insert_SiteBasedAlexa_SQL = """ insert into SiteBasedAlexa (reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
import time from titleBased import getConfMysqlInfo, commitInTurn from sendMail import sendToMe logger = logging.getLogger("titleBased_meta") logger.setLevel(logging.DEBUG) log_file = '/Job/FOX/Dashboard/TitleBased/log/titleBased_meta.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a') formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject = "titleBased_meta Error", body = "config file not exists") sys.exit(0) ################################################################################################################################# def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedMeta_SQL = """ select a.id as trackingMeta_id, a.meta_type, a.meta_title as title, CURRENT_TIMESTAMP as ETLDate from meta as a, metaExtraInfo as b where a.company_id = 10 and b.company_id = 10 and a.id = b.meta_id """ #vtweb_tracker2_section = "vtweb_tracker2" vtweb_tracker2_section = "vtweb_staging" try:
import time from titleBased import getConfMysqlInfo, getMinDatePara from sendMail import sendToMe logger = logging.getLogger("titleBased_views") logger.setLevel(logging.DEBUG) log_file = '/Job/HBO/Dashboard/TitleBased/log/titleBased_views.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a') formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/HBO/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject = "titleBased_views ERROR", body = "config file not exists") sys.exit(0) ################################################################################################################################# target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) ################################################################################################################################# ##CMS data and reportedViews (UGC) from SelfService_Aggregate_ByNoticedDate aggregate to TitleBased1 #-------------------------------------------------------------------------------------------------------------------------------- logger.info(" aggregate data from DM_HBO.SelfService_Aggregate_ByNoticedDate, extract reportedViews start") sendToMe(subject = "titleBased_views start", body = "aggregate data from DM_HBO.SelfService_Aggregate_ByNoticedDate, extract reportedViews start") date_para_reportedViews_min = time.strftime("%Y-%m-%d", time.localtime(time.time()- (65) * 24 * 60 * 60)) date_para_reportedViews_min = "2015-03-01" date_para_reportedViews_max = time.strftime("%Y-%m-%d", time.localtime(time.time()- 1 * 24 * 60 * 60))
logger.setLevel(logging.DEBUG) log_file = '/Job/FOX/Dashboard/TitleBased/log/titleBased.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5 * 1024 * 1024, backupCount=10, mode='a') formatter = logging.Formatter( '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logger.debug(": config file not exists") sendToMe(subject="titleBasedTmp ERROR", body="config file not exists") sys.exit(0) def getConfMysqlInfo(section_name): conf = CfgParser(cfg_file).parse() try: host = conf[section_name]["host"] user = conf[section_name]["user"] passwd = conf[section_name]["passwd"] port = conf[section_name]["port"] db = conf[section_name]["db"] return host, user, passwd, port, db except KeyError, e: logger.debug(": mysql config file section or option not exists, %s" %
logger = logging.getLogger("titleBased_matchedVideoViewCountCompletio") logger.setLevel(logging.DEBUG) log_file = '/Job/FOX/Dashboard/TitleBased/log/titleBased_matchedVideoViewCountCompletion.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5 * 1024 * 1024, backupCount=10, mode='a') formatter = logging.Formatter( '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/FOX/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject="titleBased_matchedVideoViewCountCompletio ERROR", body="config file not exists") sys.exit(0) ################################################################################################################################# target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) insight_server_section = "insight" insight_host, insight_user, insight_passwd, insight_port, insight_db = getConfMysqlInfo( insight_server_section) vtweb_tracker2_section = "vtweb_staging" vt_host, vt_user, vt_passwd, vt_port, vt_db = getConfMysqlInfo( vtweb_tracker2_section) vtweb_mysql = MySQLHelper(host=vt_host,
logger = logging.getLogger("titleBased_trackingWebsite") logger.setLevel(logging.DEBUG) log_file = '/Job/VIACOM/Dashboard/TitleBased/log/titleBased_trackingWebsite.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5 * 1024 * 1024, backupCount=10, mode='a') formatter = logging.Formatter( '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists; file_name %s" % cfg_file) sendToMe(subject="titleBased_trackingWebsite ERROR", body="config file not exists") sys.exit(0) def getDataFromVT(): logger.info(": extract data from tracker2 start") # extract dat from vtweb vt_TitleBasedTrackingWebsite_SQL = """ select a.trackingWebsite_id, b.website_type as websiteType, a.display_name as websiteName, b.website_domain as websiteDomain, b.country_id, CURRENT_TIMESTAMP as ETLDate
(reportDate, trackingWebsite_id, websiteDomain, alexaGlobalRank, alexaTopCountry, ETLDate) values (%s, %s, %s, %s, %s, %s) on duplicate key update alexaGlobalRank = values(alexaGlobalRank), alexaTopCountry = values(alexaTopCountry), ETLDate = values(ETLDate) """ try: target_mysql.insertUpdateCMD(insert_SiteBasedAlexa_SQL, alexa_info_tuple) print alexa_info_tuple target_mysql.commit() except MySQLdb.Error, e: logger.debug(e) sendToMe(subject = "update SiteBasedAlexa Error", body = re.sub(r'\'|"|!', "", str(e))) continue else: logger.info("has no data %s" %alexa_date_max) except Exception, e: logger.debug(": load data to SiteBasedAlexa %s" %e) sendToMe(subject = "SiteBasedAlexa ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0) finally: target_mysql.closeCur() target_mysql.closeConn() logger.info(":load data to SiteBasedAlexa end") sendToMe(subject = "SiteBasedAlexa End", body = "load data to SiteBasedAlexa end") if __name__ == "__main__": main() ################################################################################################################################# #sed "/\/home\/vobile\/cwj\/ViacomProject\/dashboard\/job/\/Job\/HBO\/Dashboard\/TitleBased/g"
def judgeFileExist(cfg_file, job_name): if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject = job_name +" ERROR", body = "config file not exists") sys.exit(0)
logger = logging.getLogger("monitorComplianceRate") logger.setLevel(logging.DEBUG) log_file = '/Job/VIACOM/Dashboard/TitleBased/log/monitorComplianceRate.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5 * 1024 * 1024, backupCount=10, mode='a') formatter = logging.Formatter( '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject="monitorComplianceRate ERROR", body="config file not exists") sys.exit(0) ################################################################################################################################# def monitor(): logger.info("monitor complaince rate start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db = getConfMysqlInfo( target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name=target_db, port=target_port,
clip_duration, matchedFile_id, meta_title, created_at, updated_at from HBO_DASHBOARD.matchedVideoTmp as a, HBO_DASHBOARD.matchedVideoSequence as b where a.id = b.id and a.company_id = b.company_id """ target_mysql.queryNoData("delete from HBO_DASHBOARD.matchedVideo") target_mysql.queryNoData(sql) target_mysql.commit() except Exception, e: logger.debug("extract data from 192.168.111.235 tracker2 error, %s" %e) sendToMe(subject = "extract data from 192.168.111.235 tracker2", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0) finally: target_mysql.closeConn() target_mysql.closeCur() sendToMe(subject = "matchedVideo end", body = "matchedVideo end") logger.info("load data target matchedVideo end") def dataTo_metaExtraInfo(): logger.info("load data target metaExtraInfo start") sendToMe(subject = "metaExtraInfo start", body = "metaExtraInfo start") get_data_sql = """ select meta_id, company_id, display_name, priority_type, created_at, updated_at from tracker2.metaExtraInfo where company_id = 34 """ insert_sql = """ insert into metaExtraInfo (meta_id, company_id, display_name, priority_type, created_at, updated_at) values(%s, %s, %s, %s, %s, %s)
target_mysql.commit() target_mysql.queryCMD(update_TitleBased1_SQL) target_mysql.commit() target_mysql.queryCMD(update_TitleBasedRemoveNum1_SQL) target_mysql.commit() except Exception, e: sendToMe(subject="update TrackingWebsite ERROR", body=e) logger.DEBUG("update TrackingWebsite data %s" % e) finally: target_mysql.closeCur() target_mysql.closeConn() sendToMe( subject="update TrackingWebsite end", body= "update TrackingWebsite table: SiteBased TitleBased1 TitleBasedRemoveNum1" ) logger.info(" update TrackingWebsite end") def updateCountry(): sendToMe(subject="update Country start", body="update Country table: SiteBased") logger.info("update Country for table SiteBased start") update_SiteBased_SQL = """ update SiteBased as a, TitleBasedCountry as b set a.hostCountry = b.countryName where a.country_id = b.country_id """
def main(): sendToMe(subject = "titleBased_meta start", body = "extract data from tracker2 start") vtweb_data = getDataFromVT() loadDataTo123(vtweb_data) sendToMe(subject = "titleBased_meta End", body = "load data to TitleBasedMeta end")
def getMatchedVideo(min_reportDate, max_reportDate, min_updateDate): logger.info(" extral data from tracker2.matchedVideo start") date_dict ={"min_reportDate": min_reportDate, "max_reportDate": max_reportDate, "min_updateDate": min_updateDate} try: get_data_sql = """ select a.id as matchedVideo_id, a.trackingMeta_id, a.trackingWebsite_id, date_format(a.first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, a.hide_flag as hideFlag, count_send_notice as countSendNotice, date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.updated_at, "%%Y-%%m-%%d") as updateDate, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, mddb.trackingWebsite as b where date_format(a.created_at, "%%Y-%%m-%%d") > "%(min_reportDate)s" and date_format(a.created_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and a.company_id =14 and b.website_type = "ugc" and a.created_at >= "2015-03-01" union all select a.id as matchedVideo_id, a.trackingMeta_id, a.trackingWebsite_id, date_format(a.first_send_notice_date, "%%Y-%%m-%%d") as firstSendNoticeDate, a.hide_flag as hideFlag, count_send_notice as countSendNotice, date_format(a.created_at, "%%Y-%%m-%%d") as reportDate, date_format(a.updated_at, "%%Y-%%m-%%d") as updateDate, CURRENT_TIMESTAMP as ETLDate from matchedVideo as a, mddb.trackingWebsite as b where date_format(a.created_at, "%%Y-%%m-%%d") <= "%(min_reportDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") > "%(min_updateDate)s" and date_format(a.updated_at, "%%Y-%%m-%%d") <= "%(max_reportDate)s" and a.company_id =14 and b.website_type = "ugc" and a.created_at >= "2015-03-01" """ %date_dict vtweb_mysql = MySQLHelper(host=vt_host, user=vt_user,passwd=vt_passwd, port = vt_port, db_name = vt_db) vtweb_mysql.queryCMD("set time_zone = '-8:00'") data = vtweb_mysql.queryCMD(get_data_sql) target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, \ db_name = target_db, port = target_port, charset = 'utf8') insert_sql = """ insert into matchedVideo (matchedVideo_id, trackingMeta_id, trackingWebsite_id, firstSendNoticeDate, hideFlag, countSendNotice, reportDate, updateDate, ETLDate) values(%s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE firstSendNoticeDate = values(firstSendNoticeDate), hideFlag = values(hideFlag), countSendNotice = values(countSendNotice), reportDate = values(reportDate), updateDate = values(updateDate), ETLDate =values(ETLDate) """ target_mysql.executeManyCMD(insert_sql, data) target_mysql.commit() except Exception, e: logger.debug("extral data from tracker2.matchedVideo, %s" %e) sendToMe(subject = "titleBased_infringAllViews ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
def main(): sendToMe(subject = "titleBased_titleBased start", body = "aggregate data from TitleBased to TitleBased1 start") aggregateDataToTitleBased1() sendToMe(subject = "titleBased_titleBased end", body = "aggregate data from TitleBased to TitleBased1 end")
from titleBased import getConfMysqlInfo, getMinDatePara, commitInTurn from sendMail import sendToMe import re logger = logging.getLogger("titleBased_titleBased") logger.setLevel(logging.DEBUG) log_file = '/Job/VIACOM/Dashboard/TitleBased/log/titleBased_titleBased.log' filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a') formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sendToMe(subject = "titleBased_titleBased ERROR", body = "config file not exists") sys.exit(0) ################################################################################################################################# #target_server_section = "target_server_staging" #target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) ################################################################################################################################# def aggregateDataToTitleBased1(): logger.info(" aggregate data from TitleBased to TitleBased1 start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_TitleBased1_SQL = """ select a.reportDate,
def aggregateDataToTitleBased1(): logger.info(" aggregate data from TitleBased to TitleBased1 start") target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_TitleBased1_SQL = """ select a.reportDate, a.trackingWebsite_id, a.websiteName, a.websiteType, ifnull(b.mapTitle, a.title) title, min(a.tier) as tier, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero, sum(infringingNum) as infringingNum, sum(infringingNumDurationNoZero) as infringingNumDurationNoZero, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, current_timestamp as ETLDate from (select a.reportDate, a.trackingWebsite_id, c.websiteName, c.websiteType, b.title, min(a.tier) tier, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero, sum(infringingNum) as infringingNum, sum(infringingNumDurationNoZero) as infringingNumDurationNoZero, sum(clipDurationSum) as clipDurationSum, sum(clipDurationInfringingSum) as clipDurationInfringingSum, current_timestamp as ETLDate from TitleBasedTmp as a, TitleBasedMeta as b, TitleBasedTrackingWebsite as c where a.trackingWebsite_id = c.trackingWebsite_id and a.trackingMeta_id = b.trackingMeta_id group by 1, 2, 3, 4, 5) as a left join MetaTitleMapTitle as b on a. title = b.metaTitle group by 1, 2, 3, 4, 5 """ #%date_para_TitleBased1_dict TitleBased1_result = target_mysql.queryCMD(aggregate_TitleBased1_SQL) insert_TitleBased1_SQL = """ INSERT INTO TitleBased1 (reportDate, trackingWebsite_id, websiteName, websiteType, title, tier, matchedNum, matchedNumDurationNoZero, infringingNum, infringingNumDurationNoZero, clipDurationSum, clipDurationInfringingSum, ETLDate) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE matchedNum = VALUES(matchedNum), clipDurationSum = VALUES(clipDurationSum), infringingNum = VALUES(infringingNum), ETLDate = VALUES(ETLDate), matchedNumDurationNoZero = VALUES(matchedNumDurationNoZero), matchedNumDurationNoZero = VALUES(matchedNumDurationNoZero), infringingNumDurationNoZero = VALUES(infringingNumDurationNoZero), clipDurationInfringingSum = VALUES(clipDurationInfringingSum) """ commitInTurn(commit_num = 50000, data = TitleBased1_result, executeFun = target_mysql.insertUpdateCMD, \ commitFun = target_mysql.commit, executeSQL = insert_TitleBased1_SQL) #target_mysql.insertUpdateCMD(insert_TitleBased1_SQL, TitleBased1_result) #target_mysql.commit() except Exception, e: logger.debug(": load data to TitleBased1, %s" %e) sendToMe(subject = "titleBased_titleBased ERROR", body = re.sub(r'\'|"|!', "", str(e))) sys.exit(0)
filehandler = logging.handlers.RotatingFileHandler(filename=log_file, maxBytes=5*1024*1024, backupCount=10, mode='a') formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') filehandler.setFormatter(formatter) logger.addHandler(filehandler) cfg_file = "/Job/VIACOM/Dashboard/TitleBased/conf/viacom_dashboard.cfg" if not os.path.exists(cfg_file): logging.debug(": config file not exists") sys.exit(0) ################################################################################################################################# target_server_section = "target_server_staging" target_host, target_user, target_passwd, target_port, target_db= getConfMysqlInfo(target_server_section) ################################################################################################################################# #-------------------------------------------------------------------------------------------------------------------------------- logger.info(" aggregate data from TitleBased1 to SiteBased start") sendToMe(subject = "SiteBased start", body = "aggregate data from TitleBased1 to SiteBased start") try: target_mysql = MySQLHelper(host=target_host, user=target_user, passwd=target_passwd, db_name = target_db, port = target_port, charset = 'utf8') aggregate_TitleBased1_SQL = """ select a.reportDate, a.trackingWebsite_id, b.websiteName, b.websiteType, b.websiteDomain, b.country_id, c.countryName as hostCountry, a.title, sum(matchedNum) as matchedNum, sum(matchedNumDurationNoZero) as matchedNumDurationNoZero,