def test_days_numeric(self): self.assertEqual(date_converter.convert_postdate("Avaldatud 2 päeva tagasi", current_date = self.test_datetime), "2021.02.12") self.assertEqual(date_converter.convert_postdate("Uuendatud 2 päeva tagasi", current_date = self.test_datetime), "2021.02.12") self.assertEqual(date_converter.convert_postdate("Avaldatud 3 päeva tagasi", current_date = self.test_datetime), "2021.02.11") self.assertEqual(date_converter.convert_postdate("Uuendatud 3 päeva tagasi", current_date = self.test_datetime), "2021.02.11") self.assertEqual(date_converter.convert_postdate("Avaldatud 12 päeva tagasi", current_date = self.test_datetime), "2021.02.02") self.assertEqual(date_converter.convert_postdate("Uuendatud 12 päeva tagasi", current_date = self.test_datetime), "2021.02.02") self.assertEqual(date_converter.convert_postdate("Avaldatud 14 päeva tagasi", current_date = self.test_datetime), "2021.01.31") self.assertEqual(date_converter.convert_postdate("Uuendatud 14 päeva tagasi", current_date = self.test_datetime), "2021.01.31") self.assertEqual(date_converter.convert_postdate("Avaldatud 30 päeva tagasi", current_date = self.test_datetime), "2021.01.15") self.assertEqual(date_converter.convert_postdate("Uuendatud 30 päeva tagasi", current_date = self.test_datetime), "2021.01.15")
def test_month_numeric(self): self.assertEqual(date_converter.convert_postdate("Avaldatud 2 kuud tagasi", current_date = self.test_datetime), "2020.12.16") # subtract 60 days self.assertEqual(date_converter.convert_postdate("Uuendatud 2 kuud tagasi", current_date = self.test_datetime), "2020.12.16") self.assertEqual(date_converter.convert_postdate("Avaldatud 3 kuud tagasi", current_date = self.test_datetime), "2020.11.16") # subtract 90 days self.assertEqual(date_converter.convert_postdate("Uuendatud 3 kuud tagasi", current_date = self.test_datetime), "2020.11.16") self.assertEqual(date_converter.convert_postdate("Avaldatud 5 kuud tagasi", current_date = self.test_datetime), "2020.09.17") # subtract self.assertEqual(date_converter.convert_postdate("Uuendatud 5 kuud tagasi", current_date = self.test_datetime), "2020.09.17")
def test_month_alphabetic(self): self.assertEqual(date_converter.convert_postdate("Avaldatud umbes üks kuu tagasi", current_date = self.test_datetime), "2020.12.31") # subtract 45 days self.assertEqual(date_converter.convert_postdate("Uuendatud umbes üks kuu tagasi", current_date = self.test_datetime), "2020.12.31")
def test_days_alphabetic(self): self.assertEqual(date_converter.convert_postdate("Avaldatud üks päev tagasi", current_date = self.test_datetime), "2021.02.13") self.assertEqual(date_converter.convert_postdate("Uuendatud üks päev tagasi", current_date = self.test_datetime), "2021.02.13")
def test_hours_alphabetic(self): self.assertEqual(date_converter.convert_postdate("Avaldatud umbes üks tund tagasi", current_date = self.test_datetime), "2021.02.14") self.assertEqual(date_converter.convert_postdate("Uuendatud umbes üks tund tagasi", current_date = self.test_datetime), "2021.02.14")
def test_hours_numeric(self): self.assertEqual(date_converter.convert_postdate("Avaldatud umbes 14 tundi tagasi", current_date = self.test_datetime), "2021.02.14") self.assertEqual(date_converter.convert_postdate("Uuendatud umbes 14 tundi tagasi", current_date = self.test_datetime), "2021.02.14") self.assertEqual(date_converter.convert_postdate("Avaldatud umbes 20 tundi tagasi", current_date = self.test_datetime), "2021.02.13") self.assertEqual(date_converter.convert_postdate("Uuendatud umbes 20 tundi tagasi", current_date = self.test_datetime), "2021.02.13")
def test_minutes_numeric(self): self.assertEqual(date_converter.convert_postdate("Avaldatud umbes 30 minutit tagasi", current_date = self.test_datetime), "2021.02.14") self.assertEqual(date_converter.convert_postdate("Uuendatud umbes 30 minutit tagasi", current_date = self.test_datetime), "2021.02.14") self.assertEqual(date_converter.convert_postdate("Avaldatud umbes 45 minutit tagasi", current_date = self.test_datetime), "2021.02.14") self.assertEqual(date_converter.convert_postdate("Uuendatud umbes 45 minutit tagasi", current_date = self.test_datetime), "2021.02.14")
def test_seconds_numeric(self): self.assertEqual(date_converter.convert_postdate("Avaldatud umbes 30 sekundit tagasi", current_date = self.test_datetime), "2021.02.14")
url = advert.find('a', {'class': 'vacancy-item__logo'})['href'] # returns "/12345678/some-name-of-an-advertisement", url = url.split("/")[2] # however only an ad's id is necessary for a successful redirection (e.g. "cv.ee/12345678/") img_tag = advert.find('img') if img_tag != None: #logo_url = "https://cv.ee" + img_tag['src'] # returns "https://cv.ee/api/v1/files-service/1461a4b3-1db3-4ac5-b736-26e67a93e706" logo_url = img_tag['src'] # returns "/api/v1/files-service/1461a4b3-1db3-4ac5-b736-26e67a93e706" logo_url = logo_url.split('/')[4] # returns "1461a4b3-1db3-4ac5-b736-26e67a93e706" # It's best to save as little as required to a database else: logo_url = '' info_block = advert.find('div', {'class': 'vacancy-item__info-main'}) company = info_block.find('a').text location = info_block.find('span', {'class': 'vacancy-item__locations'}).text[3:] # First three characters are dashes, skip them posted_when = advert.find('span', {'class': 'secondary-text'}).text posted_when = date_converter.convert_postdate(posted_when) time.sleep(0.1) advert_soup = BeautifulSoup(urlopen("https://cv.ee/vacancy/" + url + "/"), 'html.parser') end_date = advert_soup.find('span', {'class': 'vacancy-info__deadline'}).text.rstrip() end_date = end_date[9:] # erase "Tähtaeg: " from the beginning end_date = date_converter.convert_enddate(end_date) found_adverts += [(company, job_title, location, url, logo_url, posted_when, end_date)] print(found_adverts) #save(found_adverts)