def main(root): for path in glob.glob(os.path.join(root, '*.pdf.html')): parser = Parser(path) parser.parse() matcher = InstitutionFuzzyMatcher() for institution, data in parser.data.iteritems(): attrs = dict(institution=institution, year=parser.year, **data) # Derive acceptance and enrollment rates acceptance_rate = derive_rate(data['accepted'], data['applied']) enrollment_rate = derive_rate(data['enrolled'], data['accepted']) # Create or update institution admissions for this year institution = matcher.match(institution) defaults = { 'year_type': 'fall', 'number_of_applicants': data['applied'], 'number_admitted': data['accepted'], 'number_admitted_who_enrolled': data['enrolled'], 'percent_of_applicants_admitted': acceptance_rate, 'percent_of_admitted_who_enrolled': enrollment_rate } obj, row_count = create_or_update(PublicAdmissions.objects, institution=institution, year=parser.year, defaults=defaults) if obj: print 'created %s %d admissions...' % ( institution.name, parser.year) else: print 'updated %s %d admissions...' % ( institution.name, parser.year)
def main(path): # Parse 2011 6-year graduation rates json_text = open(path).read().decode("iso-8859-1") page_data = json.loads(json_text)[0] parser = Parser() for el in page_data["text"]: parser.feed(el) # Match institutions by name and create or update matcher = InstitutionFuzzyMatcher() for name, bachelor_6yr in parser.iter_results(): institution = matcher.match(name) defaults = dict(bachelor_6yr=bachelor_6yr) obj, row_count = create_or_update( PublicGraduationRates.objects, institution=institution, year=2011, defaults=defaults ) if obj: print "created %s graduation rates..." % institution.name else: print "updated %s graduation rates..." % institution.name
def main(path): # Parse 2011 6-year graduation rates json_text = open(path).read().decode('iso-8859-1') page_data = json.loads(json_text)[0] parser = Parser() for el in page_data['text']: parser.feed(el) # Match institutions by name and create or update matcher = InstitutionFuzzyMatcher() for name, bachelor_6yr in parser.iter_results(): institution = matcher.match(name) defaults = dict(bachelor_6yr=bachelor_6yr) obj, row_count = create_or_update(PublicGraduationRates.objects, institution=institution, year=2011, defaults=defaults) if obj: print "created %s graduation rates..." % institution.name else: print "updated %s graduation rates..." % institution.name