示例#1
0
    def import_programs(self, file):
        date = datetime.today()
        new_program_count = 0
        f = open(file, 'rU')
        this_version = int(file[-9:-4]) #pull the date off of the programs csv file
        #updated kevin's regex to include 2010 funding
        re_funding = re.compile('FY ([0-1][0,6-9]{1,1})( est. | est | )[\$]([0-9,]+)')
        re_funding_type = re.compile('\((.*?)\)')
        re_exclude = re.compile('[sS]alaries')
        re_loan = re.compile('[lL]oan')
        re_guar = re.compile('[gG]uarantee')
        re_insur = re.compile('[iI]nsur')
        
        #regex to pull account numbers ONLY out of free text
        account = re.compile('[\d]{2}[-][\d]{4}[-][\d]{1}[-][\d]{1}[-][\d]{3}')

        re_writer = csv.writer(open('csv/regex_check.csv', 'w')) 

        reader = csv.reader(f)
        reader.next() # skip headers
        while True:
            try:
                row = reader.next()
            except:
                break

            if not row:
                break
            
            if len(row) == 0 or len(row) < 10:
                continue 

            program_number = row[1].strip()
            matching_programs = Program.objects.filter(program_number=program_number)
           
            if len(matching_programs)==0:
                matching_program = Program()
                new_program_count += 1
            else:
                matching_program = matching_programs[0]
       
            try:
                agency = Agency.objects.get(code=int(program_number[:2]))
                matching_program.agency = agency
            
            except Exception,e:
                print "cfda program: %s, %s" % (program_number, e)
        
            for (i,s) in enumerate(self.FIELD_MAPPINGS):
                if s is None or i==2:
                    continue

                elif s == 'obligations':
                    # do obligations parsing
                    try:
                        clean_obs = smart_unicode(un.kill_gremlins(row[i]))
                        matches = re_funding.findall(clean_obs)
                        type_matches = re_funding_type.findall(clean_obs)
                        edited = []
                        type_iter = iter(type_matches)
                        if type_matches:
                            curr_type = type_iter.next()
                        else:
                            curr_type = 'default'
                        curr_year = '2000'
                        for tuple in matches:
                            year = '20' + tuple[0]

                            if year < curr_year:
                                try:
                                    curr_type = type_iter.next()

                                except StopIteration:
                                    pass
                                                                    
                            curr_year = year
                                 
                            if len(re_exclude.findall(curr_type)) <= 0:
                                obligation = tuple[2].replace(",", "")
                                
                                if len(re_guar.findall(curr_type)) > 0: type = 2  #guarantees and insurance had their own types but it was getting complicated so I collapsed them
                                elif len(re_loan.findall(curr_type)) > 0: type = 2
                                elif len(re_insur.findall(curr_type)) > 0: type = 2
                                else:
                                    try: 
                                        assist_types = matching_program.types_of_assistance.all()
                                        if assist_types[0].code == 6:
                                            type = 2
                                        elif assist_types[0].code == 7:
                                            type = 2
                                        elif assist_types[0].code == 5:
                                            type = 2
                                        else:
                                            type = 1

                                    except Exception:
                                        type = 1
                                    
                                matching_obligation = ProgramObligation.objects.filter(program=matching_program, fiscal_year=int(year), type=type)
                                if len(matching_obligation) == 0 or matching_obligation[0].cfda_version <= this_version:
                                    try:
                                        #either it doesn't exist yet or this is a newer version of cfda
                                        if len(matching_obligation) == 0:
                                            matching_ob = ProgramObligation(program=matching_program, fiscal_year=int(year), type=type)
                                        else:
                                            matching_ob = matching_obligation[0]
                                        
                                        if not matching_ob.corrected: #if it's been corrected don't update it
                                            matching_ob.cfda_version = this_version
                                            if matching_ob in edited:
                                                #there are multiple line items for this type, year and program in the obligation text, so we add instead of replacing
                                                matching_ob = edited[edited.index(matching_ob)]
                                                matching_ob.obligation += int(obligation)
                                            else:
                                                matching_ob.obligation = int(obligation)
                                                edited.append(matching_ob)
                                            
                                            matching_ob.delta = (matching_ob.usaspending_obligation or 0) - (matching_ob.obligation or 0)
                                            try:
                                                matching_ob.weighted_delta = matching_ob.delta / matching_ob.obligation
                                            except:
                                                matching_ob.weighted_delta = 0
    
                                            matching_ob.save()

                                    except Exception, e:
                                        print "in obs %s" % e


                    except Exception, e:
                        print "in obs exception %s" % e
                        print "\n"

                elif s == 'types_of_assistance':
                    # do extra assistance classifying
                    test = ''
                    try:
                        asst_types = smart_unicode(un.kill_gremlins(row[i])).strip('.').split(';')
                        for asst in asst_types:

                            clean_asst = asst.lower().strip().replace("\n", "")

                            for type_tuple in AssistanceType.CODE_OPTIONS:
                                if clean_asst == type_tuple[1].lower():
                                            
                                    matching_assistance_relations = matching_program.types_of_assistance.filter(code=type_tuple[0])
                                    if len(matching_assistance_relations) == 0:
                                        #need to add
                                        matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0]))
                                        matching_program.save()
                                    
                                    test = 'match'

                                elif len(type_tuple) > 2:
                                    for other_name in type_tuple[2]:
                                        if clean_asst == other_name.lower():
                                            matching_assistance_relations = matching_program.types_of_assistance.filter(code=type_tuple[0])
                                            if len(matching_assistance_relations) == 0:
                                                #need to add
                                                matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0]))
                                                matching_program.save()
                                    test = 'match'

                            if test != 'match':
                                print "Assistance type didn't match: %s" % asst
                                test = ''

                    except Exception, e:
                        print str(e) + 'bla'
示例#2
0
                                                matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0]))
                                                matching_program.save()
                                    test = 'match'

                            if test != 'match':
                                print "Assistance type didn't match: %s" % asst
                                test = ''

                    except Exception, e:
                        print str(e) + 'bla'

                elif s == 'account_identification':
                    # do extra accounts parsing
                    try:
                        #account is a regex described above 
                        accts = account.findall(un.kill_gremlins(row[i]))

                        for a in accts:
                            matching_accounts = ProgramAccount.objects.filter(account_number=a)
                            if len(matching_accounts) == 0:
                                matching_account = ProgramAccount(account_number=a)
                                matching_account.save()

                            else:
                                matching_account = matching_accounts[0]
                            
                            if matching_account not in matching_program.account_identification.all():
                                matching_program.account_identification.add(matching_account)
                                matching_program.save()

                    except Exception, e:
示例#3
0
                                            matching_ob.weighted_delta = str(matching_ob.weighted_delta)
                                            matching_ob.save()

                                    except Exception, e:
                                        print "in obs %s" % e


                    except Exception, e:
                        print "in obs exception %s" % e
                        print "\n"

                elif s == 'types_of_assistance':
                    # do extra assistance classifying
                    test = ''
                    try:
                        asst_types = smart_unicode(un.kill_gremlins(row[i])).strip('.').split(';')
                        for asst in asst_types:

                            clean_asst = asst.lower().strip().replace("\n", "")

                            for type_tuple in AssistanceType.CODE_OPTIONS:
                                if clean_asst == type_tuple[1].lower():
                                            
                                    matching_assistance_relations = matching_program.types_of_assistance.filter(code=type_tuple[0])
                                    if len(matching_assistance_relations) == 0:
                                        #need to add
                                        matching_program.types_of_assistance.add(AssistanceType.objects.get(code=type_tuple[0]))
                                        matching_program.save()
                                    
                                    test = 'match'
示例#4
0
    def import_programs(self, file):
        date = datetime.today()
        new_program_count = 0
        f = open(file, 'rU')
        this_version = int(
            file[-9:-4])  #pull the date off of the programs csv file
        #updated kevin's regex to include 2010 funding
        re_funding = re.compile(
            'FY ([20]*[0-1][0,1,6-9]{1,1})( est. | est | )[\$]([0-9,]+)')
        re_funding_type = re.compile('\((.*?)\)')
        re_exclude = re.compile('[sS]alaries')
        re_loan = re.compile('[lL]oan')
        re_guar = re.compile('[gG]uarantee')
        re_insur = re.compile('[iI]nsur')

        #regex to pull account numbers ONLY out of free text
        account = re.compile('[\d]{2}[-][\d]{4}[-][\d]{1}[-][\d]{1}[-][\d]{3}')

        re_writer = csv.writer(open('csv/regex_check.csv', 'w'))

        reader = csv.reader(f)
        reader.next()  # skip headers
        while True:
            try:
                row = reader.next()
            except:
                break

            if not row:
                break

            if len(row) == 0 or len(row) < 10:
                continue

            program_number = row[1].strip()
            matching_programs = Program.objects.filter(
                program_number=program_number)

            if len(matching_programs) == 0:
                matching_program = Program()
                new_program_count += 1
            else:
                matching_program = matching_programs[0]

            try:
                agency = Agency.objects.get(code=int(program_number[:2]))
                matching_program.agency = agency

            except Exception, e:
                print "cfda program: %s, %s" % (program_number, e)

            for (i, s) in enumerate(self.FIELD_MAPPINGS):
                if s is None or i == 2:
                    continue

                elif s == 'obligations':
                    # do obligations parsing
                    try:
                        clean_obs = smart_unicode(un.kill_gremlins(row[i]))
                        matches = re_funding.findall(clean_obs)
                        type_matches = re_funding_type.findall(clean_obs)
                        edited = []
                        type_iter = iter(type_matches)
                        if type_matches:
                            curr_type = type_iter.next()
                        else:
                            curr_type = 'default'
                        curr_year = '2000'
                        for tuple in matches:
                            if len(tuple[0]) == 2:
                                year = '20' + tuple[0]

                            if year < curr_year:
                                try:
                                    curr_type = type_iter.next()

                                except StopIteration:
                                    pass

                            curr_year = year

                            if len(re_exclude.findall(curr_type)) <= 0:
                                obligation = tuple[2].replace(",", "")

                                if len(re_guar.findall(curr_type)) > 0:
                                    type = 2  #guarantees and insurance had their own types but it was getting complicated so I collapsed them
                                elif len(re_loan.findall(curr_type)) > 0:
                                    type = 2
                                elif len(re_insur.findall(curr_type)) > 0:
                                    type = 1
                                else:
                                    try:
                                        assist_types = matching_program.types_of_assistance.all(
                                        )
                                        if assist_types[0].code == 6:
                                            type = 2
                                        elif assist_types[0].code == 7:
                                            type = 2
                                        elif assist_types[0].code == 5:
                                            type = 2
                                        else:
                                            type = 1

                                    except Exception:
                                        type = 1

                                matching_obligation = ProgramObligation.objects.filter(
                                    program=matching_program,
                                    fiscal_year=int(year),
                                    type=type)
                                if len(matching_obligation
                                       ) == 0 or matching_obligation[
                                           0].cfda_version <= this_version:
                                    try:
                                        #either it doesn't exist yet or this is a newer version of cfda
                                        if len(matching_obligation) == 0:
                                            matching_ob = ProgramObligation(
                                                program=matching_program,
                                                fiscal_year=int(year),
                                                type=type)
                                        else:
                                            matching_ob = matching_obligation[
                                                0]

                                        if not matching_ob.corrected:  #if it's been corrected don't update it
                                            matching_ob.cfda_version = this_version
                                            if matching_ob in edited:
                                                #there are multiple line items for this type, year and program in the obligation text, so we add instead of replacing
                                                matching_ob = edited[
                                                    edited.index(matching_ob)]
                                                matching_ob.obligation += int(
                                                    obligation)
                                            else:
                                                matching_ob.obligation = int(
                                                    obligation)
                                                edited.append(matching_ob)

                                            matching_ob.delta = (
                                                matching_ob.
                                                usaspending_obligation
                                                or 0) - (matching_ob.obligation
                                                         or 0)

                                            try:
                                                matching_ob.weighted_delta = float(
                                                    matching_ob.delta) / float(
                                                        matching_ob.obligation)
                                            except (ZeroDivisionError,
                                                    DivisionByZero):
                                                if fabs(matching_ob.delta) > 0:
                                                    matching_ob.weighted_delta = float(
                                                        1.0)
                                                else:
                                                    matching_ob.weighted_delta = float(
                                                        0.0)
                                            except Exception, e:
                                                print "Generic exception: %s" % str(
                                                    e)
                                                matching_ob.weighted_delta = float(
                                                    0.0)
                                            except:
                                                print "Untyped exception caught."

                                            matching_ob.weighted_delta = str(
                                                matching_ob.weighted_delta)
                                            matching_ob.save()

                                    except Exception, e:
                                        print "in obs %s" % e
示例#5
0
                                            matching_ob.weighted_delta = str(
                                                matching_ob.weighted_delta)
                                            matching_ob.save()

                                    except Exception, e:
                                        print "in obs %s" % e

                    except Exception, e:
                        print "in obs exception %s" % e
                        print "\n"

                elif s == 'types_of_assistance':
                    # do extra assistance classifying
                    test = ''
                    try:
                        asst_types = smart_unicode(un.kill_gremlins(
                            row[i])).strip('.').split(';')
                        for asst in asst_types:

                            clean_asst = asst.lower().strip().replace("\n", "")

                            for type_tuple in AssistanceType.CODE_OPTIONS:
                                if clean_asst == type_tuple[1].lower():

                                    matching_assistance_relations = matching_program.types_of_assistance.filter(
                                        code=type_tuple[0])
                                    if len(matching_assistance_relations) == 0:
                                        #need to add
                                        matching_program.types_of_assistance.add(
                                            AssistanceType.objects.get(
                                                code=type_tuple[0]))
                                        matching_program.save()
示例#6
0
    def import_programs(self, file_name):
        date = datetime.today()
        new_program_count = 0
        new_programs = []
        f = open(file_name, 'rU')
        this_version = int(
            file_name[-9:-4])  #pull the date off of the programs csv file

        reader = csv.reader(f)
        reader.next()  # skip headers
        while True:
            try:
                row = reader.next()
            except:
                break

            if not row:
                break

            if len(row) == 0 or len(row) < 10:
                continue

            program_number = row[1].strip()
            program_title = row[0].strip()
            matching_programs = ProgramDescription.objects.filter(
                program_number=program_number)

            if len(matching_programs) == 0:
                matching_program = ProgramDescription()
                new_program_count += 1
                new_programs.append("%s - %s" %
                                    (program_number, program_title))
                print "new program: %s" % (program_number)

            else:
                matching_program = matching_programs[0]

            matching_program.agency = Agency.objects.get(
                cfda_code=program_number[:2])

            for (i, s) in enumerate(self.FIELD_MAPPINGS):

                # try:
                prepared_string = smart_unicode(un.kill_gremlins(row[i]),
                                                errors='ignore')
                setattr(matching_program, s, prepared_string)
                if i == 1:
                    #we have the program vitals, save so we can use as foreign key for other attributes
                    matching_program.save()
                if i == 24:
                    #print "parsing Obligation"
                    self.parseObligations(prepared_string, matching_program,
                                          this_version)

            # except Exception, e:
            #     print e
            #    continue

            matching_program.save()
        f.close()

        mail_text = "CFDA programs added on %s\n" % datetime.now()
        for n in new_programs:
            mail_text += "%s\n" % n

        admins = []
        for ad in settings.ADMINS:
            admins.append(ad[1])

        if new_programs:
            send_mail("New CFDA Programs",
                      mail_text,
                      '*****@*****.**',
                      admins,
                      fail_silently=False)
        else:
            send_mail("No New CFDA Programs - Cron ran successfully",
                      "",
                      '*****@*****.**',
                      admins,
                      fail_silently=False)

        print "Run complete. \n%s new programs were added" % new_program_count