示例#1
0
def load(source, s3=False):
    """
    Loads program data from a local or S3 file.
    For a local file, 'source' should be a CSV file path.
    For an s3 file, 'source' should be the file name of a CSV
    in the 'validated_program_data' folder on s3.
    """
    test_program = False
    new_programs = 0
    updated_programs = 0
    FAILED = []  # failed messages
    if s3:
        s3_url = ('https://files.consumerfinance.gov'
                  '/pb/paying_for_college/csv/validated_program_data/{}')
        raw_data = read_in_s3(s3_url.format(source))
    else:
        raw_data = read_in_data(source)
    if not raw_data[0]:
        return (["ERROR: could not read data from {0}".format(source)], "")

    for row in raw_data:
        if 'test' in row.keys() and row['test'].lower() == 'true':
            test_program = True
        fixed_data = clean(row)
        serializer = ProgramSerializer(data=fixed_data)

        if serializer.is_valid():
            data = serializer.validated_data
            if not validate_pid(data['program_code']):
                print("ERROR: invalid program code: "
                      "{}".format(data['program_code']))
                continue
            (school, error) = get_school(data['ipeds_unit_id'])
            if error:
                print(error)
                continue

            program, cr = Program.objects.get_or_create(
                institution=school, program_code=data['program_code'])
            if cr:
                new_programs += 1
            else:
                updated_programs += 1

            program.accreditor = data['accreditor']
            program.cip_code = data['cip_code']
            program.completion_rate = data['completion_rate']
            program.default_rate = data['default_rate']
            program.mean_student_loan_completers = data['mean_student_'
                                                        'loan_completers']
            program.median_student_loan_completers = data['median_student_'
                                                          'loan_completers']
            program.program_code = data['program_code']
            program.program_name = strip_control_chars(data['program_name'])
            program.program_length = data['program_length']
            # program.soc_codes = data['soc_codes']
            program.total_cost = data['total_cost']

            program.campus = strip_control_chars(data['campus_name'])
            program.level = data['program_level']
            program.time_to_complete = data['average_time_to_complete']
            program.salary = data['median_salary']
            program.job_rate = data['job_placement_rate']
            program.job_note = data['job_placement_note']
            program.tuition = data['tuition_fees']
            program.books = data['books_supplies']
            program.completers = data['completers']
            program.completion_cohort = data['completion_cohort']
            program.test = test_program
            program.save()

        else:  # There is error
            for key, error_list in dict.items(serializer.errors):

                fail_msg = ('ERROR on row {}: {}: '.format(
                    raw_data.index(row) + 1, key))
                for e in error_list:
                    fail_msg = '{} {},'.format(fail_msg, e)
                FAILED.append(fail_msg)

    endmsg = ('{} programs created. '
              '{} programs updated.'.format(new_programs, updated_programs))

    return (FAILED, endmsg)
示例#2
0
 def test_validate_pid(self):
     # bad_chars = [';', '<', '>', '{', '}']
     self.assertFalse(validate_pid('490<script>'))
     self.assertFalse(validate_pid('{value}'))
     self.assertFalse(validate_pid('DROP TABLE;'))
     self.assertTrue(validate_pid('108b'))
 def test_validate_pid(self):
     # bad_chars = [';', '<', '>', '{', '}']
     self.assertFalse(validate_pid("490<script>"))
     self.assertFalse(validate_pid("{value}"))
     self.assertFalse(validate_pid("DROP TABLE;"))
     self.assertTrue(validate_pid("108b"))
示例#4
0
def load(source, s3=False):
    """
    Loads program data from a local or S3 file.
    For a local file, 'source' should be a CSV file path.
    For an s3 file, 'source' should be the file name of a CSV
    in the 'validated_program_data' folder on s3.
    """
    test_program = False
    new_programs = 0
    updated_programs = 0
    FAILED = []  # failed messages
    if s3:
        s3_url = ('http://files.consumerfinance.gov.s3.amazonaws.com'
                  '/pb/paying_for_college/csv/validated_program_data/{}')
        raw_data = read_in_s3(s3_url.format(source))
    else:
        raw_data = read_in_data(source)
    if not raw_data[0]:
        return (["ERROR: could not read data from {0}".format(source)], "")

    for row in raw_data:
        if 'test' in row.keys() and row['test'].lower() == 'true':
            test_program = True
        fixed_data = clean(row)
        serializer = ProgramSerializer(data=fixed_data)

        if serializer.is_valid():
            data = serializer.validated_data
            if not validate_pid(data['program_code']):
                print("ERROR: invalid program code: "
                      "{}".format(data['program_code']))
                continue
            (school, error) = get_school(data['ipeds_unit_id'])
            if error:
                print(error)
                continue

            program, cr = Program.objects.get_or_create(
                institution=school,
                program_code=data['program_code']
            )
            if cr:
                new_programs += 1
            else:
                updated_programs += 1

            program.accreditor = data['accreditor']
            program.cip_code = data['cip_code']
            program.completion_rate = data['completion_rate']
            program.default_rate = data['default_rate']
            program.mean_student_loan_completers = data['mean_student_'
                                                        'loan_completers']
            program.median_student_loan_completers = data['median_student_'
                                                          'loan_completers']
            program.program_code = data['program_code']
            program.program_name = strip_control_chars(data['program_name'])
            program.program_length = data['program_length']
            # program.soc_codes = data['soc_codes']
            program.total_cost = data['total_cost']

            program.campus = strip_control_chars(data['campus_name'])
            program.level = data['program_level']
            program.time_to_complete = data['average_time_to_complete']
            program.salary = data['median_salary']
            program.job_rate = data['job_placement_rate']
            program.job_note = data['job_placement_note']
            program.tuition = data['tuition_fees']
            program.books = data['books_supplies']
            program.completers = data['completers']
            program.completion_cohort = data['completion_cohort']
            program.test = test_program
            program.save()

        else:  # There is error
            for key, error_list in serializer.errors.iteritems():

                fail_msg = (
                    'ERROR on row {}: {}: '.format(
                        raw_data.index(row) + 1, key))
                for e in error_list:
                    fail_msg = '{} {},'.format(fail_msg, e)
                FAILED.append(fail_msg)

    endmsg = ('{} programs created. '
              '{} programs updated.'.format(new_programs, updated_programs))

    return (FAILED, endmsg)
示例#5
0
 def test_validate_pid(self):
     # bad_chars = [';', '<', '>', '{', '}']
     self.assertFalse(validate_pid('490<script>'))
     self.assertFalse(validate_pid('{value}'))
     self.assertFalse(validate_pid('DROP TABLE;'))
     self.assertTrue(validate_pid('108b'))