Пример #1
0
def update_salaries(jobs=[], cities=[], df=[], skiprows=0,
                    table='salary', verbose_=True):

    columns = ['job', 'city', 'state', 'salary']
    #columns += ['n_postings', 'state_name']
    #columns += ['relative_salary', 'salaries_max', 'salaries_median',
    #            'trend_last2first', 'trend_median', 'trend_max']

    # get jobs from text file
    if not any(jobs):
        jobs = pd.read_csv(PATH + 'jobs.txt')[skiprows:]
        #jobs.job = jobs.job.str.title()

    # get unique cities from postings
    if not any(cities):
        cities = db.get_cities_from_db()

    if not any(df):
        df = pd.DataFrame(columns=columns)

    for job, location in itertools.product(jobs.job.values, cities.values):
        city, state = location
        if db.queryNotInDb(job, city, state, table):
            df = scrape_indeed(job, city, state, df)
            if verbose_:
                print df.tail(1)
            db.to_sql(df.tail(1), table, 'append', null=0)

    return df
Пример #2
0
def update_postings(jobs=[], cities=[], skiprows=0, force=False,
                    table='postings'):

    # get jobs from text file
    if not any(jobs):
        jobs = pd.read_csv(PATH + 'jobs.txt')[skiprows:]
        #jobs.job = jobs.job.str.title()

    # get unique cities from postings
    if not any(cities):
        cities = db.get_cities_from_db()

    for job, location in itertools.product(jobs.job.values, cities.values):
        city, state = location
        print job, city, state
        if db.queryNotInDb(job, city, state, table) or force:
            indeed_api(job, city, state, table)