def get_jobs(self): jobs = [] self.get_max_page() for page_number in range(self.max_page): page_soup = self.request_page(page_number=page_number) vacancy_list = page_soup.find_all('div', 'vacancy-serp-item') for vacancy in vacancy_list: title = vacancy.find('a').text link = vacancy.find('a')['href'] location = vacancy.find( 'span', 'vacancy-serp-item__meta-info').text.split(',')[0] company = vacancy.find( 'div', 'vacancy-serp-item__meta-info-company').text salary = vacancy.find( 'div', 'vacancy-serp-item__sidebar').find('span') if salary is not None: salary = salary.text else: salary = '' description = vacancy.select( 'div[data-qa="vacancy-serp__vacancy_snippet_responsibility"]' )[0].text jobs.append( Vacancy(title=title, link=link, location=location, company=company, salary=salary, description=description)) return jobs
def get_jobs(self): jobs = [] self.get_max_page() for page_number in range(1, self.max_page + 1): page_soup = self.request_page(page_number=page_number) vacancy_list = page_soup.find_all('div', {'class': '-job'}) for vacancy in vacancy_list: title = vacancy.find('h2').text link = f'https://stackoverflow.com/{vacancy.find("h2").find("a")["href"]}' company_data = vacancy.find('h3').find_all('span', recursive=False) location = company_data[1].text company = company_data[0].text jobs.append( Vacancy(title=title, link=link, location=location, company=company, salary='', description='')) return jobs
} companydb.insert(new_company_info) company_object_id = companydb.find_one( {"name": company}) company_object_id = company_object_id["_id"] print(company_object_id) print("done") else: company_object_id = companydb.find_one( {"name": company}) company_object_id = company_object_id["_id"] print("Company already exists: ", company_object_id) # Vacany User vacancy_returned = Vacancy(vacancy_link, location_id, cookies) if vacancy_returned["Email"] == "": if vacancy_returned["Phone_Number"] == "": user_object_id = 100000000000000000000000 else: check = userdb.find_one({ "phone": vacancy_returned["Phone_Number"] }) if check is None: phone = vacancy_returned["Phone_Number"] add = phone.split(" ", 1) code = add[0] code = code.replace("+", "") number = add[1] number = number.replace(" ", "")
try: logo = Selector(response=page).xpath(f'/html/body/div[{div}]/div/img/@src').get() logo = "https://www.worknet.am" + logo except: logo = "" data = { "company" : company, "c_link" : c_link, "position" : position, "v_link" : v_link, "logo" : logo } returned = Vacancy(v_link) print(data) # COMPANY DATA # Check if company already exists in a collection check = companydb.find_one({"name" : company}) if check is None: new_company_info = { "name" : company, "url" : c_link, "industry" : "1", "logo" : logo, "created_at" : datetime.datetime.utcnow(), "websites" : returned["website"],
"custom_button_title": "Visit", "custom_button_url": c_link }, "country": "GE" } companydb.insert(new_company_info) company_object_id = companydb.find_one({"name": company}) company_object_id = company_object_id["_id"] print(company_object_id) print("done") else: company_object_id = companydb.find_one({"name": company}) company_object_id = company_object_id["_id"] print("Company already exists: ", company_object_id) vacancy_returned = Vacancy(v_link, cookies) # Vacancy User if vacancy_returned["email"] == "": user_object_id = 100000000000000000000000 else: check = userdb.find_one( {"email": vacancy_returned["email"]}) if check is None: new_user_info = { "email": vacancy_returned["email"], "company_id": ObjectId(f"{company_object_id}"), "created_at": datetime.datetime.utcnow() } userdb.insert(new_user_info) user_object_id = userdb.find_one(
attrs={ 'data-qa': 'vacancy-serp__vacancy-employer' }).text requirement = div.find( "div", attrs={ 'data-qa': 'vacancy-serp__vacancy_snippet_requirement' }).text salary = div.find('div', attrs={ 'data-qa': 'vacancy-serp__vacancy-compensation' }).text vac = Vacancy(title=title, href=href, company=company, requirement=requirement, salary=salary) vacancy_list.append(vac) except: pass for i in vacancy_list: print(i.get_title(), i.get_salary(), i.get_requirement()) data2 = { "title": [x.get_title() for x in vacancy_list], "href": [x.get_href() for x in vacancy_list], "company": [x.get_company() for x in vacancy_list], "requirement": [x.get_requirement() for x in vacancy_list],
session = requests.Session() req = session.get(url, headers = headers) if req.status_code == 200: soup = bs(req.content, "html.parser") divs = soup.find_all("div", attrs = {"data-qa":"vacancy-serp__vacancy"}) for div in divs: try: title = div.find("a", attrs = {"data-qa":"vacancy-serp__vacancy-title"}).text company = div.find("a", attrs = {"data-qa":"vacancy-serp__vacancy-employer"}).text salary = div.find("div", attrs = {"data-qa":"vacancy-serp__vacancy-compensation"}).text vac = Vacancy(text_request, title, company, salary) vac.salaryParse() vac.save_to_db() except: pass else: print("ERROR!", req.status_code)