def search(): region1 = request.args.get("region1") region2 = request.args.get("region2") items = get_danggn_items(region1=region1, region2=region2) save_to_csv(items, filename="danggn.csv") return render_template("danggn.html", items=items, filename="danggn.csv")
def save_file(): try: word = request.args.get("word") if not word: raise Exception() word = word.lower() jobs = db.get(word) if not jobs: raise Exception() save.save_to_csv(jobs) return send_file("jobs.csv", mimetype="text/csv", attachment_filename=f"{word}.csv", as_attachment=True) except: return redirect("/")
'-f', '--filename', type=str, default=FILE_DEFAULT, help=f"File to save to. Default name is {FILE_DEFAULT}", ) return new_parser if __name__ == '__main__': parser = create_argparser() args = parser.parse_args() file_path = os.path.abspath(args.filename) logger.info(f"Starting poller version {__version__}.") logger.info(f"Time between polls: {args.time} seconds") logger.info(f"Saving to file: {file_path}") while True: logger.info("Scraping data...") scraped_data = scrape.scrape_inara_cgs() ongoing_cgs = [ data for data in scraped_data if data['status'] == 'Ongoing' ] logger.info("Saving data...") save.save_to_csv(file_path, ongoing_cgs) logger.info(f"Complete. Next poll in {args.time} seconds.") time.sleep(int(args.time))
from head_hunter import get_all_pages_hh, get_elements_from_hh from so import get_all_pages_so, get_elements_from_so from save import save_to_csv # Методы получения всех вакансий с каждой страницы hh_jobs = get_elements_from_hh(get_all_pages_hh()) so_jobs = get_elements_from_so(get_all_pages_so()) all_jobs = hh_jobs + so_jobs save_to_csv(all_jobs)
from base import extract_pages, extract_jobs from save import save_to_csv last_page = extract_pages() jobs = extract_jobs(last_page) save_to_csv(jobs)
from webdev import get_posts from save import save_to_csv posts = get_posts() save_to_csv(posts)
import gspread from indeed import search_ideed from stackoverflow import search_so from save import save_to_csv #busca search = 'python' #salva resultado do indeed result_indeed = search_ideed(search) #salva resultado do indeed result_so = search_so(search) #junta os resultados em all_results all_resuts = result_indeed + result_so #envia para salvar no csv save_to_csv(all_resuts) #enviar os dados para a planilha spreadsheetId = '1uRDAuGudRxYx77JH4b1wnYNflaFo0cemk3bG3ocTh_A' gc = gspread.service_account(filename='credentials.json') sh = gc.open_by_key(spreadsheetId) worksheet = sh.sheet1 csvFile = 'jobs.csv' sheetName = 'CSV' content = open('jobs.csv', 'r').read() gc.import_csv(spreadsheetId, content.encode('utf-8'))
links = ranklist.find_all('li') #dict totalRankList = [] for link in links[:-1]: rankDict = {} rankDict['rank'] = link.find("span", {"class": "num"}).contents[0] rankDict['name'] = link.find("a", { "class": "coLink" }).find("b").contents[0] rankDict['url'] = 'www.jobkorea.co.kr' + link.find("a", { "class": "link" }).attrs['href'] rankDict['title'] = link.find("a", { "class": "link" }).find("span").contents[0] categoryLists = link.find("div", {"class": "sTit"}).find_all("span") categoryContents = [] for categoryList in categoryLists: categoryContents.append(categoryList.contents[0]) rankDict['category'] = categoryContents # rankDict['detail'] rankDict['endDay'] = link.find("span", {"class": "day"}).contents[0] totalRankList.append(rankDict) save_to_csv(totalRankList) save_to_json(totalRankList)
def get_jobs(page, city, job_type): Chrome_driver = webdriver.Chrome(options=options) c_code = city_code[city] for i in range(1, page + 1): try: print("正在抓取第 %s 页数据" % i) uri = '/%s/?query=%s&page=%s' % (c_code, job_type, i) Chrome_driver.get(config.url + uri) time.sleep(2) job_dict = {} if i == 1: jobs = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[3]/ul') jobs_list = jobs.find_elements_by_tag_name('li') for job in range(1, len(jobs_list) + 1): job_details = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[3]/ul/li[%i]/div/div[1]/h3' % job) job_details_uri = job_details.find_element_by_tag_name( 'a').get_attribute('href') job_details_name = job_details.find_element_by_xpath( '//*[@id="main"]/div/div[3]/ul/li[%i]/div/div[1]/h3/a/div[1]' % job).text job_details_salary = job_details.find_element_by_xpath( '//*[@id="main"]/div/div[3]/ul/li[%i]/div/div[1]/h3/a/span' % job).text job_company = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[3]/ul/li[%i]/div/div[2]/div/h3' % job).text details = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[3]/ul/li[%i]/div/div[1]/p' % job).get_attribute('outerHTML') job_rege = re.match(rege, details) job_dict['company_name'] = job_company job_dict['uri'] = job_details_uri job_dict['salary'] = job_details_salary try: job_dict['site'] = job_rege.group(1) job_dict['year'] = job_rege.group(2) job_dict['edu'] = job_rege.group(3) except: continue job_dict['job_name'] = job_details_name job_dict['city'] = city job_dict['job_type'] = job_type # save data try: save_to_csv(job_dict, city) except: raise time.sleep(1) print(job_dict) else: jobs = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[2]/ul') jobs_list = jobs.find_elements_by_tag_name('li') for job in range(1, len(jobs_list) + 1): job_details = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[2]/ul/li[%i]/div/div[1]/h3' % job) job_details_uri = job_details.find_element_by_tag_name( 'a').get_attribute('href') job_details_name = job_details.find_element_by_xpath( '//*[@id="main"]/div/div[2]/ul/li[%i]/div/div[1]/h3/a/div[1]' % job).text job_details_salary = job_details.find_element_by_xpath( '//*[@id="main"]/div/div[2]/ul/li[%i]/div/div[1]/h3/a/span' % job).text job_company = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[2]/ul/li[%i]/div/div[2]/div/h3' % job).text details = Chrome_driver.find_element_by_xpath( '//*[@id="main"]/div/div[2]/ul/li[%i]/div/div[1]/p' % job).get_attribute('outerHTML') job_rege = re.match(rege, details) job_dict['company_name'] = job_company job_dict['uri'] = job_details_uri job_dict['salary'] = job_details_salary try: job_dict['site'] = job_rege.group(1) job_dict['year'] = job_rege.group(2) job_dict['edu'] = job_rege.group(3) except: continue job_dict['job_name'] = job_details_name job_dict['city'] = city job_dict['job_type'] = job_type # save data try: save_to_csv(job_dict, city) except: raise time.sleep(1) print(job_dict) except: raise Chrome_driver.close() time.sleep(3)