def handle(self, *args, **options): my_logger = fcc_logger() my_logger.info("starting backup run...") conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) b = conn.get_bucket('politicaladsleuth-assets') k = Key(b) pdfs_to_backup = PDF_File.objects.filter( local_file_path__isnull=False, is_backed_up=False).values('id') num_to_process = len(pdfs_to_backup) print "Processing %s files" % num_to_process count = 0 for this_pdf_id in pdfs_to_backup: this_pdf = PDF_File.objects.get(pk=this_pdf_id['id']) if this_pdf.is_backed_up: print "already backed up!" continue count += 1 if (count % 100 == 0): print "Processed %s" % count local_file_path = this_pdf.local_file_path full_file_path = SCRAPER_LOCAL_DOC_DIR + "/" + local_file_path #print "path is: %s" % full_file_path local_file_path = local_file_path.replace("%%", "/") s3_string = "media/fcc_backup/%s" % local_file_path #print "s3 destination is: %s" % s3_string k.key = s3_string try: result = k.set_contents_from_filename(full_file_path, policy='public-read') except: tb = traceback.format_exc() message = "*BACKUP ERROR:* Error uploading %s\n%s" % ( local_file_path, tb) print message my_logger.warn(message) continue this_pdf.is_backed_up = True this_pdf.s3_full_url = s3_string this_pdf.save()
def handle(self, *args, **options): my_logger=fcc_logger() my_logger.info("starting backup run...") conn = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) b = conn.get_bucket('politicaladsleuth-assets') k = Key(b) pdfs_to_backup = PDF_File.objects.filter(local_file_path__isnull=False, is_backed_up=False).values('id') num_to_process = len(pdfs_to_backup) print "Processing %s files" % num_to_process count = 0 for this_pdf_id in pdfs_to_backup: this_pdf = PDF_File.objects.get(pk=this_pdf_id['id']) if this_pdf.is_backed_up: print "already backed up!" continue count+=1 if (count %100 == 0): print "Processed %s" % count local_file_path = this_pdf.local_file_path full_file_path = SCRAPER_LOCAL_DOC_DIR + "/" + local_file_path #print "path is: %s" % full_file_path local_file_path = local_file_path.replace("%%", "/") s3_string = "media/fcc_backup/%s" % local_file_path #print "s3 destination is: %s" % s3_string k.key = s3_string try: result = k.set_contents_from_filename(full_file_path, policy='public-read') except: tb = traceback.format_exc() message = "*BACKUP ERROR:* Error uploading %s\n%s" % (local_file_path, tb) print message my_logger.warn(message) continue this_pdf.is_backed_up = True this_pdf.s3_full_url = s3_string this_pdf.save()
""" This takes the place of the folder scraping routines that were built before there was an rss file available. """ from django.core.management.base import BaseCommand, CommandError from django.conf import settings from scraper.api_scraper import parse_api_feed from scraper.models import PDF_File, StationData from broadcasters.models import Broadcaster FCC_SCRAPER_LOG_DIRECTORY = getattr(settings, 'FCC_SCRAPER_LOG') from scraper.local_log import fcc_logger my_logger = fcc_logger() my_logger.info("starting fcc rss scrape...") class Command(BaseCommand): def handle(self, *args, **options): political_files = None if args: start_date = args[0] end_date = args[1] print "start_date=%s end_date=%s" % (start_date, end_date) political_files = parse_api_feed(start_date, end_date) else: political_files = parse_api_feed() for thisfile in political_files: if not thisfile: # if there's no details, continue
""" This takes the place of the folder scraping routines that were built before there was an rss file available. """ from django.core.management.base import BaseCommand, CommandError from django.conf import settings from scraper.rss_scraper import parse_xml_from_text, get_rss_from_web, get_rss_from_file from scraper.models import PDF_File, StationData from broadcasters.models import Broadcaster FCC_SCRAPER_LOG_DIRECTORY = getattr(settings, 'FCC_SCRAPER_LOG') from scraper.local_log import fcc_logger my_logger=fcc_logger() my_logger.info("starting fcc rss scrape...") def handle_file(thisfile): print thisfile [callsign, nielsen_dma, dma_id, community_state] = [None, None, None, None] try: thisbroadcaster = Broadcaster.objects.get(facility_id=thisfile['facility_id']) callsign = thisbroadcaster.callsign nielsen_dma = thisbroadcaster.nielsen_dma community_state = thisbroadcaster.community_state dma_id = thisbroadcaster.dma_id except Broadcaster.DoesNotExist: pass if not callsign: callsign = thisfile['callsign'] if thisfile['href']: