def test_insert(self): with self.app.app_context(): title = 'Community manager senior' url = 'https://emploi.educarriere.ci/offre-68967-community-manager-senior.html' cron = EducarriereCron(page_number=1) content = cron.extract_content(url, cron.DETAILS_SELECTOR) dates = cron.extract_dates(content) pub_date, exp_date = (dates[0], dates[1]) offer = Offer(url, title, content, pub_date, exp_date) offer.set_type(cron.extract_type(offer.content)) offer.set_satus(cron.PENDING) dao = OfferDao() offer.tags = dao.create_or_update_tags(offer) offer.degrees = dao.create_or_update_degrees(offer, cron) # Save to database result = dao.create_or_update_offer(offer) print(type(result)) assert type(result) in [Offer, tuple]
def scrape_home_page(self, url): """ Comb through url to extract content """ html_doc = requests.get(url).text soup = BeautifulSoup(html_doc, 'html.parser') nodes = soup.select(self.OFFERS_SELECTOR) dao = OfferDao() for node in nodes: # Data mapping url = "".join( [x['href'] for x in node.select(self.TITLES_SELECTOR)]) title = "".join( [x.get_text() for x in node.select(self.TITLES_SELECTOR)]) desc = "".join( [x.get_text() for x in node.select(self.DESC_SELECTOR)]) dates = self.extract_dates(node.get_text()) # if empty pub_date is always generated automatically pub_date = dates[0] exp_date = None if len(dates) > 1: exp_date = dates[1] # Check whether we have a valid url if len(url) > 10: # Extract additional details: degree, type of offers, etc. # print('{} {} {}'.format(url, title, desc, pub_date, exp_date)) offer = Offer(url, title, desc, pub_date, exp_date) offer.content = self.extract_content(url, self.DETAILS_SELECTOR) offer.set_type(self.extract_type(offer.content)) offer.set_satus(self.PENDING) offer.tags = dao.create_or_update_tags(offer) offer.degrees = dao.create_or_update_degrees(offer, self) if len(offer.tags) > 0: offer.set_image(offer.tags) # Save to database print('saving {}'.format(offer)) dao.create_or_update_offer(offer)
def find_by_title(title): offer_schema = OfferSchema(many=True) data = offer_schema.dump(OfferDao().find_by_title(title)) return jsonify(data)
def tags(): tag_schema = TagsSchema(many=True) data = tag_schema.dump(OfferDao().get_tags()) return jsonify(data)
def index(): offer_schema = OfferSchema(many=True) data = offer_schema.dump(OfferDao().fetch(30)) return jsonify(data)
def test_offer_by_title(self): offers = OfferDao().find_by_title('Finance') assert (type(offers) is list)
def test_find_tag_by_title(self): tags = OfferDao().find_tag_by_title('Finance') assert (type(tags) is Tag or tags is None)