Python ArticleData примеры использования

Язык программирования: Python

Пространство имен/Пакет: article

Класс/Тип: ArticleData

Примеров на hotexamples.com: 9

Python ArticleData - 9 примеров найдено. Это лучшие примеры Python кода для article.ArticleData, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

from_json(3)

Пример #1

Показать файл

Файл: jsondb.py Проект: sevas/csxj_viewer_app

def get_latest_fetched_articles(db_root):
    providers = get_subdirectories(db_root)

    last_articles = {}
    last_errors = {}

    # todo: fix that shit
    fetched_date = datetime.today().date()

    for p in providers:
        provider_dir = os.path.join(db_root, p)
        all_days = get_subdirectories(provider_dir)
        last_day = get_latest_day(all_days)

        last_day_dir = os.path.join(provider_dir, last_day)
        all_hours = get_subdirectories(last_day_dir)
        last_hour = get_latest_hour(all_hours)

        fetched_date = make_date_from_string(last_day, last_hour)

        filename = os.path.join(last_day_dir, last_hour, 'articles.json')

        dump = get_provider_dump(filename)

        articles, errors = [], []
        for article in dump['articles']:
            articles.append(ArticleData.from_json(article))

        for error in dump['errors']:
            errors.append(error)

        last_articles[p] = articles
        last_errors[p] = errors

    return fetched_date, last_articles, last_errors

Пример #2

Показать файл

def get_latest_fetched_articles(db_root):
    providers = get_subdirectories(db_root)

    last_articles = {}
    last_errors = {}

    # todo: fix that shit
    fetched_date = datetime.today().date()

    for p in providers:
        provider_dir = os.path.join(db_root, p)
        all_days = get_subdirectories(provider_dir)
        last_day = get_latest_day(all_days)

        last_day_dir = os.path.join(provider_dir, last_day)
        all_hours = get_subdirectories(last_day_dir)
        last_hour = get_latest_hour(all_hours)

        fetched_date = make_date_from_string(last_day, last_hour)

        filename = os.path.join(last_day_dir, last_hour, 'articles.json')

        dump = get_provider_dump(filename)

        articles, errors = [], []
        for article in dump['articles']:
            articles.append(ArticleData.from_json(article))

        for error in dump['errors']:
            errors.append(error)

        last_articles[p] = articles
        last_errors[p] = errors

    return fetched_date, last_articles, last_errors

Пример #3

Показать файл

def get_articles_from_batch(db_root, source_name, date_string, batch_time):
    json_file = os.path.join(db_root, source_name, date_string, batch_time,
                             'articles.json')
    with open(json_file, 'r') as f:
        json_content = json.load(f)
        articles = [
            ArticleData.from_json(json_string)
            for json_string in json_content['articles']
        ]
        return articles

Пример #4

Показать файл

Файл: jsondb.py Проект: sevas/csxj_viewer_app

def get_articles_per_batch(db_root, source_name, date_string):
    path = os.path.join(db_root, source_name, date_string)

    all_batch_times = get_subdirectories(path)
    all_batches = []
    for batch_time in all_batch_times:
        json_file = os.path.join(path, batch_time, 'articles.json')
        with open(json_file, 'r') as f:
            json_content = json.load(f)
            articles = [ArticleData.from_json(json_string) for json_string in json_content['articles']]
            all_batches.append((batch_time, articles))

    all_batches.sort(key=lambda x: x[0])
    return all_batches

Пример #5

Показать файл

Файл: provider.py Проект: sevas/csxj-crawler

 def get_batch_content(self, date_string, batch_time_string):
     """
     Returns the data saved for a specific batch
     """
     batch_dir = os.path.join(self.directory, date_string, batch_time_string)
     if os.path.exists(batch_dir):
         json_filepath = os.path.join(batch_dir, ARTICLES_FILENAME)
         with open(json_filepath, 'r') as f:
             json_content = json.load(f)
             articles = [ArticleData.from_json(json_string) for json_string in json_content['articles']]
             articles.sort(key=lambda art: art.url)
             n_errors = len(json_content['errors'])
             return articles, n_errors
     else:
         raise NonExistentBatchError(self.name, date_string, batch_time_string)

Пример #6

Показать файл

def get_articles_per_batch(db_root, source_name, date_string):
    path = os.path.join(db_root, source_name, date_string)

    all_batch_times = get_subdirectories(path)
    all_batches = []
    for batch_time in all_batch_times:
        json_file = os.path.join(path, batch_time, 'articles.json')
        with open(json_file, 'r') as f:
            json_content = json.load(f)
            articles = [
                ArticleData.from_json(json_string)
                for json_string in json_content['articles']
            ]
            all_batches.append((batch_time, articles))

    all_batches.sort(key=lambda x: x[0])
    return all_batches

Пример #7

Показать файл

Файл: provider.py Проект: sevas/csxj-crawler

    def get_batch_articles(self, date_string, batch_time_string):
        """
        Returns the articles saved for a specific first batch.
        This function does not return the articles which might have been reprocessed
        after a (manual) error handling session. You should use the
        get_reprocessed_articles() function for that.

        The function return a sorted list of ArticleData instances.
        The list is sorted using the the article url as key.
        """
        batch_dir = os.path.join(self.directory, date_string, batch_time_string)
        if os.path.exists(batch_dir):
            json_filepath = os.path.join(batch_dir, ARTICLES_FILENAME)
            with open(json_filepath, 'r') as f:
                json_content = json.load(f)
                articles = [ArticleData.from_json(json_string) for json_string in json_content['articles']]
                articles.sort(key=lambda art: art.url)
                return articles
        else:
            raise NonExistentBatchError(self.name, date_string, batch_time_string)

Пример #8

Показать файл

Файл: provider.py Проект: sevas/csxj-crawler

    def get_reprocessed_batch_articles(self, date_string, batch_time_string):
        """
        Returns articles fetched during an error handling session.

        ((date_string, hour_string), articles)
        """
        batch_dir = os.path.join(self.directory, date_string, batch_time_string)
        if os.path.exists(batch_dir):
            reprocessed_articles = list()
            for reprocessed_data_dir in [i for i in utils.get_subdirectories(batch_dir) if i.startswith(REPROCESSED_DIR_PREFIX)]:
                reprocessed_date, reprocessed_time = reprocessed_data_dir.split("_")[1:]

                json_filepath = os.path.join(batch_dir, reprocessed_data_dir, ARTICLES_FILENAME)
                with open(json_filepath, 'r') as f:
                    json_content = json.load(f)
                    articles = [ArticleData.from_json(json_string) for json_string in json_content['articles']]
                    articles.sort(key=lambda art: art.url)
                    reprocessed_articles.append(((reprocessed_date, reprocessed_time), articles))
            return reprocessed_articles
        else:
            raise NonExistentBatchError(self.name, date_string, batch_time_string)

Пример #9

Показать файл

Файл: jsondb.py Проект: sevas/csxj_viewer_app

def get_articles_from_batch(db_root, source_name, date_string, batch_time):
     json_file = os.path.join(db_root, source_name, date_string, batch_time, 'articles.json')
     with open(json_file, 'r') as f:
        json_content = json.load(f)
        articles = [ArticleData.from_json(json_string) for json_string in json_content['articles']]
        return articles