def scrape(self):
        response = RequestService.get(self.url,
                                      headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content, 'html.parser')
        data = soup.select(".postArticle.postArticle--short")

        for eachData in data:
            try:
                data = {
                    "label":
                    eachData.find('div', class_='section-content').h3.string,
                    "link":
                    eachData.find(
                        'div',
                        class_='postArticle-content').parent.attrs['href'],
                    "identifier":
                    self.identifier,
                    "tags":
                    self.tags
                }
                self.posts.append(data)
            except Exception as e:
                logging.error("Error while parsing data", str(e))
                logging.error("Error while parsing data")
                continue

        return self.posts
Exemplo n.º 2
0
    def scrape(self):
        response = RequestService.get(self.url,
                                      headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content, 'html.parser')
        data = soup.select("article.post")

        for eachData in data:
            try:
                data = {
                    "label":
                    eachData.header.h1.a.string,
                    "link":
                    "{}{}".format(self.url,
                                  eachData.header.h1.a.attrs['href']),
                    "identifier":
                    self.identifier,
                    "tags":
                    self.tags
                }
                self.posts.append(data)
            except Exception as e:
                logging.error(str(e))

        print(self.posts)
        return self.posts
Exemplo n.º 3
0
    def scrape(self):
        # creating url for each user
        response = RequestService.get(self.url,headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content,'html.parser')
        data = soup.select(".loophead h2 a")
        for eachData in data:
            data = {
                "label" : eachData.string,
                "link" : eachData.attrs['href'],
                "identifier" : self.identifier,
                "tags" : self.tags
            }
            self.posts.append(data)

        return self.posts
Exemplo n.º 4
0
 def scrape(self):
     # creating url for each user
     response = RequestService.get(self.url,
                                   headers={'User-Agent': 'Mozilla/5.0'})
     soup = BeautifulSoup(response.content, 'html.parser')
     data = soup.select("h2.entry-title")
     for eachData in data:
         element = eachData.find_all('a')[0]
         data = {
             "label": element.string,
             "link": "{}".format(element.attrs['href']),
             "identifier": self.identifier,
             "tags": self.tags
         }
         self.posts.append(data)
     return self.posts
Exemplo n.º 5
0
    def scrape(self):
        for eachUser in self.users:

            # creating url for each user
            url = self.url.format(eachUser)
            response = RequestService.get(
                url, headers={'User-Agent': 'Mozilla/5.0'})
            soup = BeautifulSoup(response.content, 'html.parser')
            data = soup.select("td.name")
            for eachData in data:
                element = eachData.find_all('a')[1]
                data = {
                    "label": element.string,
                    "link": "{}{}".format(self.baseUrl, element.attrs['href']),
                    "identifier": self.identifier
                }
                self.posts.append(data)
        return self.posts
Exemplo n.º 6
0
    def scrape(self):
        response = RequestService.get(self.url,headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content,'html.parser')
        data = soup.select(".repo-list li")
        
        for eachData in data:
            try :
                label = "{} - {}".format(eachData.h3.a.text,eachData.p.string)
                newLabel = label.replace('\n',"")
                newLabel = re.sub(' +',' ',newLabel)
                data = {
                         "label" : newLabel,
                         "link" : "https://github.com{}".format(eachData.h3.a.attrs['href']),
                         "identifier" : self.identifier,
                         "tags" : self.tags
                 }
                self.posts.append(data)
            except Exception as e:
                logger.error(str(e))

        return self.posts
    def scrape(self):
        response = RequestService.get(self.url,
                                      headers={'User-Agent': 'Mozilla/5.0'})
        soup = BeautifulSoup(response.content, 'html.parser')
        data = soup.select("li.tutorial")

        for eachData in data:
            try:
                data = {
                    "label":
                    eachData.h3.a.string,
                    "link":
                    "https://www.digitalocean.com{}".format(
                        eachData.h3.a.attrs['href']),
                    "identifier":
                    self.identifier
                }
                self.posts.append(data)
            except Exception as e:
                logging.error(str(e))

        return self.posts
Exemplo n.º 8
0
    def scrape(self):

        try:
            response = RequestService.get(
                self.url, headers={'User-Agent': 'Mozilla/5.0'})
            soup = BeautifulSoup(response.content, 'html.parser')
            data = soup.select("#question_preview li.question")

            for eachData in data:

                try:

                    data = {
                        "label":
                        "{}".format(eachData.find('span', class_='entry').a.p),
                        "link":
                        "{}{}".format(
                            "https://www.careercup.com",
                            eachData.find('span',
                                          class_='entry').a.attrs['href']),
                        "identifier":
                        self.identifier,
                        "tags":
                        self.tags
                    }
                    self.posts.append(data)

                except Exception as e:
                    logging.error(
                        "{} === Error occured in processing  {}".format(
                            self.identifier, e))
                    continue
            #print(self.posts)
            return self.posts

        except Exception as e:
            logging.error(e)
Exemplo n.º 9
0
    def scrape(self):
        # for each tag we need to call medium website
        for eachTagData in self.tagData:
            tag_to_search = eachTagData['tagName']
            post_tags = eachTagData['tags']

            # making the url to search
            url = self.url.format(tag_to_search)

            response = RequestService.get(
                url, headers={'User-Agent': 'Mozilla/5.0'})
            soup = BeautifulSoup(response.content, 'html.parser')
            data = soup.select(".postArticle.postArticle--short")

            for eachData in data:
                try:
                    data = {
                        "label":
                        eachData.find('div',
                                      class_='section-content').h3.string,
                        "link":
                        eachData.find(
                            'div',
                            class_='postArticle-content').parent.attrs['href'],
                        "identifier":
                        self.identifier,
                        "tags":
                        post_tags
                    }
                    self.posts.append(data)
                except Exception as e:
                    logging.error("Error while parsing data", str(e))
                    logging.error("Error while parsing data")
                    continue

        return self.posts