def ingest(cls, package="osp.fields", path="data/fields.csv"): """ Ingest fields. Args: package (str) path (str) """ reader = read_csv(package, path) for row in reader: # Sanifize field names. pf = clean_field_name(row["Primary Field"]) sf = clean_field_name(row["Secondary Field"]) # Parse abbreviations. abbrs = parse_abbrs(row["ABBRV"]) if abbrs: abbrs = filter_abbrs(abbrs) # If parent field, write row. if bool(row["Alpha Category"]): Field.create(name=pf) # Query for a parent field. field = Field.select().where(Field.name == pf).first() if field: Subfield.create(name=sf, abbreviations=abbrs, field=field)
def ingest_world(cls, package='osp.institutions', path='data/world.csv', ): """ Insert world universities. """ reader = read_csv(package, path) for row in reader: if row['country'] != 'US': # Normalize the URL. url = row['url'].strip() domain = parse_domain(url) # Clean the fields. name = row['name'].strip() country = row['country'].strip() try: cls.create( name=name, url=url, domain=domain, state=None, country=country, ) except IntegrityError: pass
def ingest(cls, package='osp.fields', path='data/fields.csv'): """ Ingest fields. Args: package (str) path (str) """ reader = read_csv(package, path) for row in reader: # Sanifize field names. pf = clean_field_name(row['Primary Field']) sf = clean_field_name(row['Secondary Field']) # Parse abbreviations. abbrs = parse_abbrs(row['ABBRV']) if abbrs: abbrs = filter_abbrs(abbrs) # If parent field, write row. if bool(row['Alpha Category']): Field.create(name=pf) # Query for a parent field. field = Field.select().where(Field.name == pf).first() if field: Subfield.create( name=sf, abbreviations=abbrs, field=field, )
def ingest_usa(cls, package='osp.institutions', path='data/usa.csv', ): """ Insert US universities. """ reader = read_csv(package, path) for row in reader: if row['e_country'] == 'USA': # Normalize the URL. url = row['web_url'].strip() domain = parse_domain(url) # Clean the fields. name = row['biz_name'].strip() state = row['e_state'].strip() try: cls.create( name=name, url=url, domain=domain, state=state, country='US', ) except IntegrityError: pass
def insert_institutions(cls): """ Write institution rows into the database. """ reader = read_csv("osp.institutions", "data/institutions.csv") rows = [] for row in reader: rows.append({"metadata": row}) with cls._meta.database.transaction(): cls.insert_many(rows).execute() reader = read_csv("osp.institutions", "data/institutions_intl.csv") rows = [] for row in reader: rows.append({"metadata": row}) with cls._meta.database.transaction(): cls.insert_many(rows).execute()
def insert_institutions(cls): """ Write institution rows into the database. """ reader = read_csv( 'osp.institutions', 'data/institutions.csv' ) rows = [] for row in reader: rows.append({'metadata': row}) with cls._meta.database.transaction(): cls.insert_many(rows).execute()
def ingest_world( cls, package='osp.institutions', path='data/world.csv', ): """ Insert world universities. """ reader = read_csv(package, path) for row in map(strip_csv_row, reader): if row['country'] != 'US': try: cls.create( name=row['name'], url=row['url'], country=row['country'], ) except Exception as e: print(e)
def ingest_usa( cls, package='osp.institutions', path='data/usa.csv', ): """ Insert US universities. """ reader = read_csv(package, path) for row in map(strip_csv_row, reader): if row['e_country'] == 'USA': try: cls.create( name=row['biz_name'], url=row['web_url'], state=row['e_state'], country='US', ) except Exception as e: print(e)