Пример #1
0
    def bind_news(self, url: str):
        # Retrieve id from news using url
        query = "SELECT id FROM news " \
                "WHERE url = '{url}'" \
            .format(url=url)
        result = dbconnection.select(query)
        try:
            news_id = result[0][0]
        except:
            raise exceptions.UnexpectedBehavior()

        # Check if news and search already is binded
        query = "SELECT * FROM search_has_news " \
                "WHERE search_id = {search_id} " \
                "AND news_id = {news_id}" \
            .format(search_id=self.id, news_id=news_id)
        result = dbconnection.select(query)
        if result:
            raise exceptions.InfoAlreadyBinded()

        # Record data in database
        query = "INSERT INTO search_has_news " \
                "VALUES ({search_id}, {news_id})" \
            .format(search_id=self.id, news_id=news_id)
        dbconnection.modify(query)
Пример #2
0
    def __add_base_html(self, source: str):
        # Query for correct encoding
        query = "SELECT domain.encoding from domain " \
                "JOIN section ON domain.url = section.domain_url " \
                "JOIN structure ON section.url = structure.section_url " \
                "WHERE structure.id = {id} " \
                "LIMIT 1" \
            .format(id=self.id)
        result = dbconnection.select(query)
        if result:
            encoding = result[0][0]
        else:
            raise exceptions.IncorrectQuery()

        # Build specific head for page
        head = "<head>\n" \
               "<style>\n" \
               ".main-content {{text-align: justify; text-indent: 50px;}}\n" \
               ".caption {{text-align: center;}}\n" \
               "img {{display: block; margin: 0 auto; width: 400px;}}\n" \
               "</style>\n" \
               "<meta charset='{encoding}'>\n" \
               "<head>\n" \
            .format(encoding=encoding)

        # Build specific body for page
        body_begin = "<body>\n"
        body_end = "</body>\n"

        # Merge head, body and source
        source = head + body_begin + source + body_end

        return source
Пример #3
0
    def __find_section_url(self, url: str, domain_url: str) -> str:
        # Load section url
        section_regex = "//(.*?/.*?)/"
        section_url = re.search(section_regex, url)
        if section_url:
            section_url = section_url.groups()[0]
        else:
            section_url = None

        # Load normal section url
        nsection_regex = "//(.*?)/(.*?)/"
        nsection_url = re.search(nsection_regex, url)
        if nsection_url:
            nsection_url = nsection_url.groups()[0]
        else:
            nsection_url = None

        # Make query
        query = "SELECT url FROM section " \
                "WHERE url IN ("
        if section_url:
            query = query + "\"" + section_url + "\", "
        if nsection_url:
            query = query + "\"" + nsection_url + "\", "
        query = query + "\"" + domain_url + "\") "
        query += "ORDER BY importance DESC"

        result = dbconnection.select(query)

        if result:
            url = result[0][0]
        else:
            raise exceptions.UnsupportedURL()

        return url
Пример #4
0
    def load_domain(self):
        # Retrieve structures from database
        domain = Domain
        query = "SELECT * FROM domain " \
                "JOIN section " \
                "ON domain.url = section.domain_url " \
                "WHERE section.url = '{url}' " \
                "LIMIT 1;" \
            .format(url=self.url)
        result = dbconnection.select(query)

        # Load corretly domain
        for row in result:
            if row[1] == "Elmundo":
                domain = DomainElmundo()
            elif row[1] == "Uol":
                domain = DomainUol()
            elif row[1] == "Globo":
                domain = DomainGlobo()
            else:
                raise exceptions.UnsupportedDomain()

        if not domain:
            raise exceptions.UnsupportedSection()

        self.domain = domain
Пример #5
0
 def news_in_database(self, url: str) -> int:
     query = "SELECT id FROM news " \
             "WHERE url = '{url}'" \
         .format(url=url)
     result = dbconnection.select(query)
     if result:
         return True
     else:
         return False
Пример #6
0
    def load_structures(self):
        """ Create and load pages objects from database """
        # Retrieve structures from database
        query = "SELECT * FROM structure " \
                "JOIN section " \
                "ON section.url = structure.section_url " \
                "WHERE section.url = '" + self.url + "';"
        result = dbconnection.select(query)

        # Instance and append each structure to list
        for row in result:
            structure = Structure(row=row)
            self.structures.append(structure)
Пример #7
0
    def __init__(self):
        super(DomainElmundo, self).__init__()
        query = "SELECT * FROM domain " \
                "WHERE name = 'Elmundo'"
        result = dbconnection.select(query)

        for row in result:
            self.url = row[0]
            self.name = row[1]
            self.connection_timeout = row[2]
            self.connection_wait = row[3]
            self.connection_attempts = row[4]
            self.connection_agent = row[5]
            self.encoding = row[6]
Пример #8
0
    def __storage(self):
        # Check latest folder on news folder
        query = "SELECT id FROM news " \
                "ORDER BY id DESC " \
                "LIMIT 1"
        result = dbconnection.select(query)

        if result:
            num_folder = int(result[0][0]) + 1
        else:
            num_folder = 1

        # Check if folder exists
        destiny_folder = news_dir + str(num_folder) + "\\"
        print(destiny_folder)
        if not os.path.exists(destiny_folder):
            os.makedirs(destiny_folder)
        else:
            utils.clear_folder(destiny_folder)

        # Move temporary files to new folder
        utils.move_all_folder(temp_dir, destiny_folder)

        self.dir_html = "news\\" + str(num_folder) + "\\"