Пример #1
0
def handle_row_data(this_data, create_new_ads):
    #print this_data
    # does it exist as a pdf file?
    pdffile = None
    adbuy = None
    fac_id = None

    if this_data['raw_url']:
        fcc_id = get_fcc_id(this_data['raw_url'])
        fac_id, pathArray = parse_file_url(this_data['raw_url'])
    if fcc_id and fac_id:
        try:
            pdffile = PDF_File.objects.get(file_id=fcc_id, facility_id=fac_id)
        except PDF_File.DoesNotExist:

            if create_new_ads:
                print "Missing file %s -- now creating" % (
                    this_data['raw_url'])

                pdffile = enter_pdf_file(this_data)
                if pdffile:
                    adbuy = make_ad_buy_from_pdf_file(pdffile.pk)
            else:
                print "Missing file %s -- skipping" % (this_data['raw_url'])

        # if we don't have the related ad buy, get it.
        if pdffile:
            try:
                adbuy = PoliticalBuy.objects.get(related_FCC_file=pdffile)
            except PoliticalBuy.DoesNotExist:
                # This shouldn't really happen...
                print "No PoliticalBuy found for ad buy %s" % (pdffile)
                return None

            if this_data['total_spent_raw']:
                this_data['total_spent_raw'] = clean_numeric(
                    this_data['total_spent_raw'])

            for key in this_data.keys():
                try:
                    current_value = getattr(adbuy, key)
                except AttributeError:
                    continue
                if not current_value:
                    if this_data[key]:
                        setattr(adbuy, key, this_data[key])
                        print "Setting %s %s in %s" % (key, this_data[key],
                                                       adbuy)
            adbuy.save(auser)

    return None
def handle_row_data(this_data, create_new_ads):
    #print this_data
    # does it exist as a pdf file? 
    pdffile = None
    adbuy = None
    fac_id = None
    
    if this_data['raw_url']:
        fcc_id = get_fcc_id(this_data['raw_url'])
        fac_id, pathArray = parse_file_url(this_data['raw_url'])
    if fcc_id and fac_id:
        try:
            pdffile = PDF_File.objects.get(file_id=fcc_id, facility_id=fac_id)
        except PDF_File.DoesNotExist:
        
            if create_new_ads:
                print "Missing file %s -- now creating" % (this_data['raw_url'])
            
                pdffile = enter_pdf_file(this_data)
                if pdffile:
                    adbuy = make_ad_buy_from_pdf_file(pdffile.pk)
            else:
                print "Missing file %s -- skipping" % (this_data['raw_url'])
            
            
        # if we don't have the related ad buy, get it. 
        if pdffile:
            try:
                adbuy = PoliticalBuy.objects.get(related_FCC_file=pdffile)
            except PoliticalBuy.DoesNotExist:
                # This shouldn't really happen...
                print "No PoliticalBuy found for ad buy %s" % (pdffile)
                return None
    
            if this_data['total_spent_raw']:
                this_data['total_spent_raw'] = clean_numeric(this_data['total_spent_raw'])
    
            for key in this_data.keys():
                try:
                    current_value = getattr(adbuy, key)
                except AttributeError:
                    continue
                if not current_value:
                    if this_data[key]:
                        setattr(adbuy, key, this_data[key]) 
                        print "Setting %s %s in %s" % (key, this_data[key], adbuy)
            adbuy.save(auser)
        
    return None
Пример #3
0
def enter_pdf_file(thisfile):
    upload_time = None
    try:
        timefound = thisfile['datefound']
        timefound = timefound.replace('Today at', todays_date)
        upload_time = dateparse(timefound)
    except:
        pass

    if thisfile['raw_url']:
        (facility_id, details) = parse_file_url(thisfile['raw_url'])
        is_outside_group = True
        office = None
        district = None
        if (details[1] == 'Non-Candidate Issue Ads'):
            is_outside_group = True
        elif (details[1] == 'Federal'):
            office = details[2]
            if (office == 'US House'):
                district = details[3]
        # They're not very consisten about this...
        path = details[1:]
        name = path[-2:-1][0]

        # hard truncate. This data's a mess.
        federal_office = None
        federal_district = None

        ad_type = details[1]
        if office:
            federal_office = office[:31]
        if district:
            federal_district = district[:31]
        raw_name_guess = name[:255]

        nielsen_dma = None
        callsign = None
        nielsen_dma = None
        community_state = None
        dma_id = None

        try:
            thisbroadcaster = Broadcaster.objects.get(facility_id=facility_id)
            callsign = thisbroadcaster.callsign
            nielsen_dma = thisbroadcaster.nielsen_dma
            community_state = thisbroadcaster.community_state
            dma_id = thisbroadcaster.dma_id
        except Broadcaster.DoesNotExist:
            pass

        (pdffile,
         created) = PDF_File.objects.get_or_create(raw_url=thisfile['raw_url'],
                                                   defaults={
                                                       'size':
                                                       thisfile['file_size'],
                                                       'upload_time':
                                                       upload_time,
                                                       'ad_type':
                                                       ad_type,
                                                       'federal_office':
                                                       federal_office,
                                                       'federal_district':
                                                       federal_district,
                                                       'facility_id':
                                                       facility_id,
                                                       'callsign':
                                                       callsign,
                                                       'nielsen_dma':
                                                       nielsen_dma,
                                                       'dma_id':
                                                       dma_id,
                                                       'community_state':
                                                       community_state,
                                                       'raw_name_guess':
                                                       raw_name_guess
                                                   })
        # make an ad buy object from it as well.
        if created:
            return pdffile
        else:
            return None

    else:
        message = "couldn't parse pdf file %s" % thisfile
        print message
        return None
def enter_pdf_file(thisfile):
    upload_time = None
    try:
        timefound = thisfile['datefound']
        timefound = timefound.replace('Today at', todays_date)
        upload_time = dateparse(timefound)
    except:
        pass

    if thisfile['raw_url']:
        (facility_id, details) = parse_file_url(thisfile['raw_url'])
        is_outside_group = True
        office = None
        district = None
        if (details[1] == 'Non-Candidate Issue Ads'):
            is_outside_group = True 
        elif (details[1] == 'Federal'):
            office = details[2]
            if (office == 'US House'):
                district = details[3]
        # They're not very consisten about this... 
        path = details[1:]
        name = path[-2:-1][0]

        # hard truncate. This data's a mess.
        federal_office = None
        federal_district = None
    
        ad_type =details[1]
        if office:
            federal_office = office[:31]
        if district:
            federal_district = district[:31]
        raw_name_guess = name[:255]
    
        nielsen_dma = None
        callsign = None
        nielsen_dma = None
        community_state = None
        dma_id = None
    
        try:
            thisbroadcaster = Broadcaster.objects.get(facility_id=facility_id)
            callsign = thisbroadcaster.callsign
            nielsen_dma = thisbroadcaster.nielsen_dma
            community_state = thisbroadcaster.community_state
            dma_id = thisbroadcaster.dma_id
        except Broadcaster.DoesNotExist:
            pass
        
        (pdffile, created) = PDF_File.objects.get_or_create(raw_url=thisfile['raw_url'],   defaults={'size':thisfile['file_size'],'upload_time':upload_time,'ad_type':ad_type, 'federal_office':federal_office, 'federal_district':federal_district, 'facility_id':facility_id, 'callsign':callsign, 'nielsen_dma':nielsen_dma, 'dma_id':dma_id, 'community_state':community_state, 'raw_name_guess':raw_name_guess})
        # make an ad buy object from it as well. 
        if created:
            return pdffile
        else: 
            return None
    
    else:
        message = "couldn't parse pdf file %s" % thisfile
        print message
        return None