Пример #1
0
    def test_insert(self):
        with self.app.app_context():
            title = 'Community manager senior'
            url = 'https://emploi.educarriere.ci/offre-68967-community-manager-senior.html'
            cron = EducarriereCron(page_number=1)
            content = cron.extract_content(url, cron.DETAILS_SELECTOR)
            dates = cron.extract_dates(content)
            pub_date, exp_date = (dates[0], dates[1])
            offer = Offer(url, title, content, pub_date, exp_date)
            offer.set_type(cron.extract_type(offer.content))
            offer.set_satus(cron.PENDING)

            dao = OfferDao()
            offer.tags = dao.create_or_update_tags(offer)
            offer.degrees = dao.create_or_update_degrees(offer, cron)

            # Save to database
            result = dao.create_or_update_offer(offer)
            print(type(result))

        assert type(result) in [Offer, tuple]
Пример #2
0
    def scrape_home_page(self, url):
        """
        Comb through url to extract content
        """

        html_doc = requests.get(url).text
        soup = BeautifulSoup(html_doc, 'html.parser')
        nodes = soup.select(self.OFFERS_SELECTOR)
        dao = OfferDao()

        for node in nodes:

            # Data mapping
            url = "".join(
                [x['href'] for x in node.select(self.TITLES_SELECTOR)])
            title = "".join(
                [x.get_text() for x in node.select(self.TITLES_SELECTOR)])
            desc = "".join(
                [x.get_text() for x in node.select(self.DESC_SELECTOR)])
            dates = self.extract_dates(node.get_text())

            # if empty pub_date is always generated automatically
            pub_date = dates[0]
            exp_date = None

            if len(dates) > 1:
                exp_date = dates[1]

            # Check whether we have a valid url
            if len(url) > 10:

                # Extract additional details: degree, type of offers, etc.
                # print('{} {} {}'.format(url, title, desc, pub_date, exp_date))
                offer = Offer(url, title, desc, pub_date, exp_date)
                offer.content = self.extract_content(url,
                                                     self.DETAILS_SELECTOR)

                offer.set_type(self.extract_type(offer.content))
                offer.set_satus(self.PENDING)

                offer.tags = dao.create_or_update_tags(offer)
                offer.degrees = dao.create_or_update_degrees(offer, self)

                if len(offer.tags) > 0:
                    offer.set_image(offer.tags)

                # Save to database
                print('saving {}'.format(offer))
                dao.create_or_update_offer(offer)
Пример #3
0
def find_by_title(title):

    offer_schema = OfferSchema(many=True)
    data = offer_schema.dump(OfferDao().find_by_title(title))
    return jsonify(data)
Пример #4
0
def tags():

    tag_schema = TagsSchema(many=True)
    data = tag_schema.dump(OfferDao().get_tags())

    return jsonify(data)
Пример #5
0
def index():

    offer_schema = OfferSchema(many=True)
    data = offer_schema.dump(OfferDao().fetch(30))

    return jsonify(data)
Пример #6
0
    def test_offer_by_title(self):
        offers = OfferDao().find_by_title('Finance')

        assert (type(offers) is list)
Пример #7
0
    def test_find_tag_by_title(self):
        tags = OfferDao().find_tag_by_title('Finance')

        assert (type(tags) is Tag or tags is None)