def update_salaries(jobs=[], cities=[], df=[], skiprows=0, table='salary', verbose_=True): columns = ['job', 'city', 'state', 'salary'] #columns += ['n_postings', 'state_name'] #columns += ['relative_salary', 'salaries_max', 'salaries_median', # 'trend_last2first', 'trend_median', 'trend_max'] # get jobs from text file if not any(jobs): jobs = pd.read_csv(PATH + 'jobs.txt')[skiprows:] #jobs.job = jobs.job.str.title() # get unique cities from postings if not any(cities): cities = db.get_cities_from_db() if not any(df): df = pd.DataFrame(columns=columns) for job, location in itertools.product(jobs.job.values, cities.values): city, state = location if db.queryNotInDb(job, city, state, table): df = scrape_indeed(job, city, state, df) if verbose_: print df.tail(1) db.to_sql(df.tail(1), table, 'append', null=0) return df
def update_postings(jobs=[], cities=[], skiprows=0, force=False, table='postings'): # get jobs from text file if not any(jobs): jobs = pd.read_csv(PATH + 'jobs.txt')[skiprows:] #jobs.job = jobs.job.str.title() # get unique cities from postings if not any(cities): cities = db.get_cities_from_db() for job, location in itertools.product(jobs.job.values, cities.values): city, state = location print job, city, state if db.queryNotInDb(job, city, state, table) or force: indeed_api(job, city, state, table)