from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import sys, time, urllib, os api_address= "http://127.0.0.1/rest_api/" csv_upload_dir = "/opt/trillionmonkeys.com/tm/date/uploads/" file_name = sys.argv[1] batch_name = sys.argv[2] customer_name = sys.argv[3] secret_key = sys.argv[4] access_id = sys.argv[5] ca = ContentAnalyticsAPI(access_id, secret_key, api_address) batch_id = ca.create_batch(batch_name, customer_name) print batch_id if batch_id != -1: os.system("python /var/www/tmeditor/python/crawl_and_generate_results.py " + str(batch_id) + " \"" + file_name + "\" \"" + access_id + "\" \"" + secret_key + "\" >> /tmp/crawl_generate_results.log 2>&1 &")
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import sys, time, urllib, os import HelperMethods as h api_address= "http://127.0.0.1/rest_api/" csv_upload_dir = "/opt/trillionmonkeys.com/tm/date/uploads/" secret_key = sys.argv[1] access_id = sys.argv[2] csv_dir = sys.argv[3] batch_name = sys.argv[4] workflow = sys.argv[5] run_instances = sys.argv[6] ca = ContentAnalyticsAPI(access_id, secret_key, api_address) file_array = ca.UploadCsv(csv_dir) batch_id = ca.create_batch(batch_name) file_name = file_array['files'][0]['name'] status = ca.upload_batch_file(csv_upload_dir + file_name, file_name, batch_id, workflow, run_instances) print batch_id print status h.crawl_batch(batch_id, ca) #if batch_id != -1: # os.system("python /var/www/tmeditor/python/crawl_and_generate_results.py " + str(batch_id) + " \"" + file_name + "\" \"" + access_id + "\" \"" + secret_key + "\" >> /tmp/crawl_generate_results.log 2>&1 &")
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import sys, time, urllib, os, logging import HelperMethods as h secret_key = sys.argv[2] access_id = sys.argv[3] api_address= "http://127.0.0.1/rest_api/" ca = ContentAnalyticsAPI(access_id, secret_key, api_address) log_file = "/tmp/crawl_generate_results.log" logging.basicConfig(filename=log_file,level=logging.DEBUG, format="%(levelname)-5s [%(asctime)s] %(message)s") batch_id = int(sys.argv[1]) print(str(datetime.now()) + ": Start batch " + str(batch_id)) response = ca.get_batch_crawl_status_from_id(batch_id) if int(response["queued"]) == 0: h.crawl_batch(batch_id, ca) #h.process_batch(batch_id, ca) #h.call_zabbix_sender(batch_id, 0) else: logging.error("Already crawling/generating results for this batch. Exiting.") h.call_zabbix_sender(batch_id, 1)
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import sys, time, urllib, os api_address= "http://127.0.0.1/rest_api/" primary_batch = sys.argv[1] customer_name = sys.argv[2] secret_key = sys.argv[3] access_id = sys.argv[4] ca = ContentAnalyticsAPI(access_id, secret_key, api_address) response = ca.getBatchesByCustomerName(primary_batch, customer_name) print response
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import sys, time, urllib, os api_address= "http://127.0.0.1/rest_api/" secret_key = sys.argv[1] access_id = sys.argv[2] batch_id = sys.argv[3] chronicle_id = sys.argv[4] if 5 < len(sys.argv): brand_id = sys.argv[5] else: brand_id = 0 if 6 < len(sys.argv): category_id = sys.argv[6] else: category_id = 0 if 7 < len(sys.argv): filters = sys.argv[7] else: filters = 0 ca = ContentAnalyticsAPI(access_id, secret_key, api_address) response = ca.getFilters(batch_id, chronicle_id, brand_id, category_id, filters) print response
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import sys, time, urllib, os api_address= "http://127.0.0.1/rest_api/" batch_id = sys.argv[1] secret_key = sys.argv[2] access_id = sys.argv[3] ca = ContentAnalyticsAPI(access_id, secret_key, api_address) response = ca.getAllDatesForBatch(batch_id) print response
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI from datetime import datetime import HelperMethods as h import sys, logging, os, time secret_key = sys.argv[1] access_id = sys.argv[2] api_address= "http://127.0.0.1/rest_api/" ca = ContentAnalyticsAPI(access_id, secret_key, api_address) #Logging already started in ContentAnalyticsAPI with the same name #log_file = "/tmp/schedule_crawl_cron.log" #logging.basicConfig(filename=log_file,level=logging.DEBUG) logging.info("Checking to see if there are any batches scheduled to be crawled.") print(str(datetime.now()) + ": Start crawling batches") #This little dance is because datetime returns #the number days of the week as 0-6 and we want 1-7. week_day_off_by_one = datetime.now().strftime('%w') week_day_int = int(week_day_off_by_one) week_day_int += 1 week_day = str(week_day_int) #The lstrip is because we need to remove any leading 0's month_day = datetime.now().strftime('%d').lstrip('0') logging.info("Week day: " + week_day) logging.info("Month day: " + month_day) logging.info("Checking weekly batches.") batches_weekly = ca.get_all_batches_to_crawl(week_day, 1)
from ContentAnalyticsAPIWrapper import ContentAnalyticsAPI import sys api_address= "http://127.0.0.1/rest_api/" secret_key = sys.argv[1] access_id = sys.argv[2] ca = ContentAnalyticsAPI(access_id, secret_key, api_address) print "Median recalculation started." if ca.recalc_median_content_health_score(): print "Median recalculation finished." else: print "Median recalculation failed."