Пример #1
0
from vivint.db.core import connect, session_factory
from vivint.db.models import School
from vivint.grab.ratemyprof import get_driver, get_school_url

StreamHandler(sys.stdout).push_application()
logger = Logger(__name__)

# process constants
MAX_WORKERS = 15
TIMEOUT_SECS = 15
CONN_STRING = 'sqlite:///./data/school-data.original.db'
SEARCH_URL = 'https://duckduckgo.com/?q={query}&t=h_&ia=web'

# setup the database
engine = connect(CONN_STRING, echo=True, pool_recycle=3600)
get_session = session_factory(engine)


def find_school_url_on_rmp(row):
    driver = get_driver()
    session = get_session()

    name, sid = row

    if driver is None:
        logger.error('SKIPPING ' + name)
        session.close()
        return

    try:
Пример #2
0
    'Institution_City', 'Institution_State', 'Institution_Zip',
    'Institution_Phone', 'Institution_OPEID', 'Institution_IPEDS_UnitID',
    'Institution_Web_Address', 'Campus_ID', 'Campus_Name', 'Campus_Address',
    'Campus_City', 'Campus_State', 'Campus_Zip', 'Campus_IPEDS_UnitID',
    'Accreditation_Type', 'Agency_Name', 'Agency_Status', 'Program_Name',
    'Accreditation_Status', 'Accreditation_Date_Type', 'Periods', 'Last Action'
]

skip_actions = [
    'Resigned', 'Terminated'
]

School = namedtuple('School', ' '.join(map(lambda s: s.replace(' ', '_').lower(), csv_header)))

# setup the database
engine = connect('sqlite:///./data/school-data.original.db', echo=True, pool_recycle=3600)
get_session = session_factory(engine)


# grab the raw list of schools from the CSV file
def get_schools(from_file=None):
    with open(from_file) as csv_in:
        reader = csv.reader(csv_in, delimiter=',', quotechar='"')
        for idx, row in enumerate(reader):
            if idx == 0:
                continue
            yield School(*row)


# get all the database rows where there is no URL set
session = get_session()