def handle_row_data(this_data, create_new_ads): #print this_data # does it exist as a pdf file? pdffile = None adbuy = None fac_id = None if this_data['raw_url']: fcc_id = get_fcc_id(this_data['raw_url']) fac_id, pathArray = parse_file_url(this_data['raw_url']) if fcc_id and fac_id: try: pdffile = PDF_File.objects.get(file_id=fcc_id, facility_id=fac_id) except PDF_File.DoesNotExist: if create_new_ads: print "Missing file %s -- now creating" % ( this_data['raw_url']) pdffile = enter_pdf_file(this_data) if pdffile: adbuy = make_ad_buy_from_pdf_file(pdffile.pk) else: print "Missing file %s -- skipping" % (this_data['raw_url']) # if we don't have the related ad buy, get it. if pdffile: try: adbuy = PoliticalBuy.objects.get(related_FCC_file=pdffile) except PoliticalBuy.DoesNotExist: # This shouldn't really happen... print "No PoliticalBuy found for ad buy %s" % (pdffile) return None if this_data['total_spent_raw']: this_data['total_spent_raw'] = clean_numeric( this_data['total_spent_raw']) for key in this_data.keys(): try: current_value = getattr(adbuy, key) except AttributeError: continue if not current_value: if this_data[key]: setattr(adbuy, key, this_data[key]) print "Setting %s %s in %s" % (key, this_data[key], adbuy) adbuy.save(auser) return None
def handle_row_data(this_data, create_new_ads): #print this_data # does it exist as a pdf file? pdffile = None adbuy = None fac_id = None if this_data['raw_url']: fcc_id = get_fcc_id(this_data['raw_url']) fac_id, pathArray = parse_file_url(this_data['raw_url']) if fcc_id and fac_id: try: pdffile = PDF_File.objects.get(file_id=fcc_id, facility_id=fac_id) except PDF_File.DoesNotExist: if create_new_ads: print "Missing file %s -- now creating" % (this_data['raw_url']) pdffile = enter_pdf_file(this_data) if pdffile: adbuy = make_ad_buy_from_pdf_file(pdffile.pk) else: print "Missing file %s -- skipping" % (this_data['raw_url']) # if we don't have the related ad buy, get it. if pdffile: try: adbuy = PoliticalBuy.objects.get(related_FCC_file=pdffile) except PoliticalBuy.DoesNotExist: # This shouldn't really happen... print "No PoliticalBuy found for ad buy %s" % (pdffile) return None if this_data['total_spent_raw']: this_data['total_spent_raw'] = clean_numeric(this_data['total_spent_raw']) for key in this_data.keys(): try: current_value = getattr(adbuy, key) except AttributeError: continue if not current_value: if this_data[key]: setattr(adbuy, key, this_data[key]) print "Setting %s %s in %s" % (key, this_data[key], adbuy) adbuy.save(auser) return None
def enter_pdf_file(thisfile): upload_time = None try: timefound = thisfile['datefound'] timefound = timefound.replace('Today at', todays_date) upload_time = dateparse(timefound) except: pass if thisfile['raw_url']: (facility_id, details) = parse_file_url(thisfile['raw_url']) is_outside_group = True office = None district = None if (details[1] == 'Non-Candidate Issue Ads'): is_outside_group = True elif (details[1] == 'Federal'): office = details[2] if (office == 'US House'): district = details[3] # They're not very consisten about this... path = details[1:] name = path[-2:-1][0] # hard truncate. This data's a mess. federal_office = None federal_district = None ad_type = details[1] if office: federal_office = office[:31] if district: federal_district = district[:31] raw_name_guess = name[:255] nielsen_dma = None callsign = None nielsen_dma = None community_state = None dma_id = None try: thisbroadcaster = Broadcaster.objects.get(facility_id=facility_id) callsign = thisbroadcaster.callsign nielsen_dma = thisbroadcaster.nielsen_dma community_state = thisbroadcaster.community_state dma_id = thisbroadcaster.dma_id except Broadcaster.DoesNotExist: pass (pdffile, created) = PDF_File.objects.get_or_create(raw_url=thisfile['raw_url'], defaults={ 'size': thisfile['file_size'], 'upload_time': upload_time, 'ad_type': ad_type, 'federal_office': federal_office, 'federal_district': federal_district, 'facility_id': facility_id, 'callsign': callsign, 'nielsen_dma': nielsen_dma, 'dma_id': dma_id, 'community_state': community_state, 'raw_name_guess': raw_name_guess }) # make an ad buy object from it as well. if created: return pdffile else: return None else: message = "couldn't parse pdf file %s" % thisfile print message return None
def enter_pdf_file(thisfile): upload_time = None try: timefound = thisfile['datefound'] timefound = timefound.replace('Today at', todays_date) upload_time = dateparse(timefound) except: pass if thisfile['raw_url']: (facility_id, details) = parse_file_url(thisfile['raw_url']) is_outside_group = True office = None district = None if (details[1] == 'Non-Candidate Issue Ads'): is_outside_group = True elif (details[1] == 'Federal'): office = details[2] if (office == 'US House'): district = details[3] # They're not very consisten about this... path = details[1:] name = path[-2:-1][0] # hard truncate. This data's a mess. federal_office = None federal_district = None ad_type =details[1] if office: federal_office = office[:31] if district: federal_district = district[:31] raw_name_guess = name[:255] nielsen_dma = None callsign = None nielsen_dma = None community_state = None dma_id = None try: thisbroadcaster = Broadcaster.objects.get(facility_id=facility_id) callsign = thisbroadcaster.callsign nielsen_dma = thisbroadcaster.nielsen_dma community_state = thisbroadcaster.community_state dma_id = thisbroadcaster.dma_id except Broadcaster.DoesNotExist: pass (pdffile, created) = PDF_File.objects.get_or_create(raw_url=thisfile['raw_url'], defaults={'size':thisfile['file_size'],'upload_time':upload_time,'ad_type':ad_type, 'federal_office':federal_office, 'federal_district':federal_district, 'facility_id':facility_id, 'callsign':callsign, 'nielsen_dma':nielsen_dma, 'dma_id':dma_id, 'community_state':community_state, 'raw_name_guess':raw_name_guess}) # make an ad buy object from it as well. if created: return pdffile else: return None else: message = "couldn't parse pdf file %s" % thisfile print message return None