Python Utility примеры использования

Язык программирования: Python

Пространство имен/Пакет: BSpotReader.Utility.utility

Класс/Тип: Utility

Примеров на hotexamples.com: 2

Python Utility - 2 примера найдено. Это лучшие примеры Python кода для BSpotReader.Utility.utility.Utility, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

find_and_strip(1)

request_and_save(1)

strip_post(1)

Пример #1

Показать файл

Файл: Keywords.py Проект: bl4ckh0l3z/bspotreader

    def run(self):
        post_to_verify = Utility.find_and_strip(self._temp_dir, self._dict)
        result = {}
        coherence_max = 0
        for i in range(0, len(post_to_verify), 1):
            coherence = 0
            one_result = {}
            url_to_verify = post_to_verify[i]['url']
            logging.info('- Verifying url: {0} -'.format(str(url_to_verify)))
            text = post_to_verify[i]['text']
            text = text.lower()
            text = text.strip('\n')
            title = post_to_verify[i]['title']
            title = title.strip('\n')       # TODO: new
            title = title.lower()

            if self._find_in_title(title):
                one_result['title'] = title
                coherence += 1
                logging.debug("Incrementing coherence +1 (find_in_title)")

            words = self._find_in_text(text)
            if len(words):
                one_result['keywords found'] = []
                for word in words:
                    one_result['keywords found'].append(word)
                    logging.debug("Word found: %s" % (word))
                    coherence += self._neighborhood(text, words)

            if coherence:
                if coherence > coherence_max:
                    coherence_max = coherence
                logging.debug('- Coherence: {0}' .format(str(coherence)))

            sentences = self._find_in_sentence(text)
            if len(sentences):
                one_result['sentences'] = []
                for sentence in sentences:
                    one_result['sentences'].append(sentence)

            if len(one_result):
                result[url_to_verify] = one_result

        logging.debug(result)
        return result, coherence_max

Пример #2

Показать файл

Файл: runner.py Проект: bl4ckh0l3z/bspotreader

    def scrape_and_save(self):

        logging.info('- IP address over tor service: ' + urllib2.urlopen('http://icanhazip.com').read())
        dict_tot = {}
        # scrape the blog and save all html pages per month
        logging.info('- Scraping and saving hash from {0} {1} to {2} {3} -'.format(str(self._month_start),
                                                                                   str(self._year_start),
                                                                                   str(self._month_end),
                                                                                   str(self._year_end)))
        if self._year_end == self._year_start:
            dict_hash = {}
            # scrapeing first year
            for month in range(self._month_start, self._month_end + 1):

                if month < 10:
                    Utility.request_and_save(
                        "{0}/{1}/0{2}".format(str(self._target), str(self._year_start), str(month)),
                        "{0}{1}_{2}.html".format(str(self._temp_dir), str(self._year_start), str(month)),
                        self._user_agent, self._attempts, self._pause, self._time_out)
                else:
                    Utility.request_and_save(
                        "{0}/{1}/{2}".format(str(self._target), str(self._year_start), str(month)),
                        "{0}{1}_{2}.html".format(str(self._temp_dir), str(self._year_start), str(month)),
                        self._user_agent, self._attempts, self._pause, self._time_out)

                soup = BeautifulSoup(
                    open("{0}{1}_{2}.html".format(str(self._temp_dir), str(self._year_start), str(month))))

                buffer_list = Utility.strip_post(soup, self._year_start, month, self._temp_dir,
                                                 self._user_agent, self._pause, self._attempts, self._time_out, 
						 self._res_to_skip, self._target_prefix)
                if buffer_list:
                    dict_hash[month] = buffer_list
            if dict_hash:
                dict_tot[self._year_start] = dict_hash
        else:
            for year in range(self._year_start, self._year_end + 1):

                if year == self._year_end:
                    dict_hash = {}
                    # scrapeing first year
                    for month in range(1, self._month_end + 1):

                        if month < 10:
                            Utility.request_and_save(
                                "{0}/{1}/0{2}".format(str(self._target), str(year), str(month)),
                                "{0}{1}_{2}.html".format(str(self._temp_dir), str(year),
                                                         str(month)), self._user_agent, self._attempts, self._pause,
                                self._time_out)
                        else:
                            Utility.request_and_save(
                                "{0}/{1}/{2}".format(str(self._target), str(year), str(month)),
                                "{0}{1}_{2}.html".format(str(self._temp_dir), str(year),
                                                         str(month)), self._user_agent, self._attempts, self._pause,
                                self._time_out)
                        soup = BeautifulSoup(
                            open("{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month))))

                        buffer_list = Utility.strip_post(soup, year, month, self._temp_dir, self._user_agent,
                                                         self._pause, self._attempts, self._time_out, self._res_to_skip, 
							 self._target_prefix)
                        if buffer_list:
                            dict_hash[month] = buffer_list
                    if dict_hash:
                        dict_tot[year] = dict_hash

                elif year == self._year_start:
                    # scrapeing last year
                    dict_hash = {}
                    for month in range(self._month_start, 13):
                        if month < 10:
                            Utility.request_and_save(
                                "{0}/{1}/0{2}".format(str(self._target), str(year), str(month)),
                                "{0}{1}_{2}.html".format(str(self._temp_dir), str(year),
                                                         str(month)), self._user_agent, self._attempts, self._pause,
                                self._time_out)
                        else:
                            Utility.request_and_save(
                                "{0}/{1}/{2}".format(str(self._target), str(year), str(month)),
                                "{0}{1}_{2}.html".format(str(self._temp_dir), str(year),
                                                         str(month)), self._user_agent, self._attempts, self._pause,
                                self._time_out)
                        soup = BeautifulSoup(open("{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month))))
                        buffer_list = Utility.strip_post(soup, year, month, self._temp_dir, self._user_agent,
                                                         self._pause, self._attempts, self._time_out, self._res_to_skip, 
							 self._target_prefix)
                        if buffer_list:
                            dict_hash[month] = buffer_list
                    if dict_hash:
                        dict_tot[year] = dict_hash
                else:
                    # if it's neither the first nor the last year
                    dict_hash = {}
                    for month in range(1, 13):
                        if month < 10:
                            Utility.request_and_save(
                                "{0}/{1}/0{2}".format(str(self._target), str(year), str(month)),
                                "{0}{1}_{2}.html".format(str(self._temp_dir), str(year),
                                                         str(month)), self._user_agent, self._attempts, self._pause,
                                self._time_out)
                        else:
                            Utility.request_and_save(
                                "{0}/{1}/{2}".format(str(self._target), str(year), str(month)),
                                "{0}{1}_{2}.html".format(str(self._temp_dir), str(year),
                                                         str(month)), self._user_agent, self._attempts, self._pause,
                                self._time_out)
                        soup = BeautifulSoup(open("{0}{1}_{2}.html".format(str(self._temp_dir), str(year), str(month))))
                        buffer_list = Utility.strip_post(soup, year, month, self._temp_dir, self._user_agent,
                                                         self._pause, self._attempts, self._time_out, self._res_to_skip,
							 self._target_prefix)
                        if buffer_list:
                            dict_hash[month] = buffer_list
                    if dict_hash:
                        dict_tot[year] = dict_hash

        JsonConverter.save_dictionary_to_file(self._hash_path, dict_tot)
        logging.info('- Scraped and saved all hash with no errors -')