Python Comment примеры использования

Язык программирования: Python

Пространство имен/Пакет: pwdb

Класс/Тип: Comment

Примеров на hotexamples.com: 4

Python Comment - 4 примера найдено. Это лучшие примеры Python кода для pwdb.Comment, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

update(3)

select(3)

insert(1)

Основные методы

update (3)

select (3)

insert (1)

Пример #1

Показать файл

def process_comment_urls(udb, ulimit=100000, number_of_processes=4):
    print('---EXTRACTING COMMENT URLS')
    totalcompleted = 0
    if ulimit == 0:
        ulimit = None
    total_to_process = Comment.select().where(
        Comment.number_urls.is_null()).count()
    if ulimit is not None and total_to_process > ulimit:
        total_to_process = ulimit
    with tqdm(total=total_to_process) as pbar:
        while totalcompleted < total_to_process:
            with udb.atomic():
                queue_tasks = [(comment.id, comment.body)
                               for comment in Comment.select().where(
                                   Comment.number_urls.is_null()).limit(ulimit)
                               ]
            # Create queues
            task_queue = Queue()  # ctx.Queue()  #
            done_queue = Queue()  # ctx.Queue()  #

            # Submit tasks
            for task in queue_tasks:
                task_queue.put(task)

            # Start worker processes
            for i in range(number_of_processes):
                Process(target=url_worker,
                        args=(task_queue, done_queue)).start()

            for i in range(len(queue_tasks)):
                comment_id, url_set = done_queue.get()
                try:
                    with udb.atomic():
                        Comment.update(number_urls=len(url_set)).where(
                            Comment.id == comment_id).execute()
                        for url in url_set:
                            url, urlcreated = Url.get_or_create(link=url)
                            try:
                                CommentLinks.insert(
                                    comment=comment_id,
                                    url=url.id).on_conflict_ignore().execute()
                            except SQLError:
                                print(comment_id, url.id)
                                raise
                except KeyboardInterrupt:
                    quit()

                pbar.update(1)
                totalcompleted += 1

            # Tell child processes to stop
            for i in range(number_of_processes):
                task_queue.put('STOP')
    """

Пример #2

Показать файл

Файл: subreddit.py Проект: pl77/redditPostArchiver

def reddit_comment_update(appcfg, update_length=604800):
    print('     ---UPDATING COMMENTS WITH DATA FROM THE REDDIT API')
    totalnumber = Comment.select().where(
        (Comment.retrieved_on - Comment.created_utc) < update_length).count()
    needs_update_list = list()
    needs_update = Comment.select().where(
        (Comment.retrieved_on - Comment.created_utc) < update_length)
    print(
        '         ---Building Task List.  This could take a while for large subreddits'
    )

    with tqdm(total=totalnumber, ncols=100, dynamic_ncols=False) as nbar:
        for dbcomment in needs_update:
            fullname = "t1_{}".format(dbcomment.comment_id)
            needs_update_list.append(fullname)
            nbar.update(1)
    needs_update_list = list(chunks(needs_update_list, 100))
    print(
        '         ---Accessing data from Reddit API and entering into database'
    )
    with tqdm(total=totalnumber, ncols=100, dynamic_ncols=False) as pbar:
        for nlist in needs_update_list:
            try:
                rd_comments = list(r.info(nlist))
            except RequestException:
                print("Connection Error to Reddit API. Exiting...")
                # quit()
                return
            with appcfg.database.atomic():
                for rdcomment in rd_comments:
                    updatedtime = arrow.now().timestamp
                    if rdcomment.author is None and rdcomment.body == '[deleted]':
                        Comment.update(
                            score=rdcomment.score,
                            retrieved_on=updatedtime,
                            deleted=True).where(
                                Comment.comment_id == rdcomment.id).execute()
                        """
                    elif rdcomment.body == '[deleted]':
                        Comment.update(score=rdcomment.score,
                                       retrieved_on=updatedtime,
                                       deleted=False).where(Comment.comment_id == rdcomment.id).execute()
                    elif rdcomment.author is None:
                        Comment.update(score=rdcomment.score,
                                       # body=rdcomment.body_html,
                                       retrieved_on=updatedtime,
                                       deleted=True).where(Comment.comment_id == rdcomment.id).execute()
                        """
                    else:
                        Comment.update(
                            score=rdcomment.score,
                            # body=rdcomment.body_html,
                            retrieved_on=updatedtime,
                            deleted=False).where(
                                Comment.comment_id == rdcomment.id).execute()
                    pbar.update(1)

Пример #3

Показать файл

Файл: subreddit.py Проект: pl77/redditPostArchiver

def process_comments(appcfg):
    # Get newest comments with two week overlap
    print('   PROCESSING NEWEST PUSHSHIFT.IO COMMENTS FOR', appcfg.subreddit)

    try:
        newest_utc = int(
            Comment.select(fn.MAX(Comment.created_utc)).scalar().timestamp())
    except (TypeError, AttributeError):
        newest_utc = None
    if newest_utc is not None:
        oldestdate = newest_utc  # - 1209600  # two weeks overlap, in seconds
    else:
        oldestdate = appcfg.oldestdate

    try:
        comment_id_set = get_push_comments(appcfg, appcfg.newestdate,
                                           oldestdate)
    except (ConnectionError, SSLError, ChunkedEncodingError):
        comment_id_set = None
        print("     Connection Error for Pushshift API.  Quitting...")
        # quit()
        return comment_id_set

    # Get oldest comments in case progress was interrupted, with two week overlap
    try:
        oldest_utc = int(
            Comment.select(fn.MIN(Comment.created_utc)).scalar().timestamp())
    except (TypeError, AttributeError):
        oldest_utc = None
    if oldest_utc is not None:
        newestdate = oldest_utc  # + 1209600  # two weeks overlap, in seconds
    else:
        newestdate = appcfg.newestdate
    print('   PROCESSING OLDEST PUSHSHIFT.IO COMMENTS FOR', appcfg.subreddit)

    try:
        old_comment_id_set = get_push_comments(appcfg, newestdate,
                                               appcfg.oldestdate)
    except (ConnectionError, SSLError, ChunkedEncodingError):
        old_comment_id_set = None
        print("     Connection Error for Pushshift API.  Quitting...")
        # quit()
        return old_comment_id_set
    comment_id_set |= old_comment_id_set
    filedate = arrow.now().timestamp
    basedir = "/rpa" if os.environ.get('DOCKER', '0') == '1' else '.'
    coutput_file_path = "{basedir}/{subreddit}_comments_{timestamp}.txt".format(
        basedir=basedir, subreddit=appcfg.subreddit, timestamp=filedate)

    # with open(coutput_file_path, 'w', encoding='UTF-8') as comment_file:
    #     comment_file.writelines(comment_id_set)
    print("     Total comments submitted to", appcfg.subreddit, "in set:",
          len(comment_id_set))
    deleted = Author.get_or_none(name='[deleted]')
    if deleted is not None:
        cupdatet = Comment.update(deleted=True).where(
            (Comment.author == deleted.id)
            & (Comment.deleted.is_null() or Comment.deleted == 0)).execute()
        print(
            '     Updated deleted field in comments.  Set deleted = True for',
            cupdatet, 'records.')
        cupdatef = Comment.update(
            deleted=False).where((Comment.author != deleted.id)
                                 & (Comment.deleted.is_null())).execute()
        print(
            '     Updated deleted field in comments.  Set deleted = False for',
            cupdatef, 'records.')

Пример #4

Показать файл

Файл: subreddit.py Проект: pl77/redditPostArchiver

def get_push_comments(appcfg, newestdate, oldestdate):
    subnumber = 1
    sub, subcreated = Subreddit.get_or_create(name=appcfg.subreddit)
    sub_id = sub.id
    totalsubnumber = 0
    push_comment_id_set = set()
    total_available = "https://api.pushshift.io/reddit/search/comment/?subreddit={subreddit}" \
                      "&after={oldestdate}&before={newestdate}&aggs=subreddit&size=0"
    turl = total_available.format(subreddit=appcfg.subreddit,
                                  oldestdate=oldestdate,
                                  newestdate=newestdate)
    # newestdate = appcfg.newestdate
    with requests.get(turl) as tp:
        if tp.status_code != 200:
            print("Connection Error for Pushshift API, quitting...")
            # quit()
            return push_comment_id_set
        tpush = tp.json()
    try:
        total_comments = tpush['aggs']['subreddit'][0]['doc_count']
    except (IndexError, KeyError):
        print("     No new comments to process from pushshift API for",
              appcfg.subreddit)
        return push_comment_id_set
    linktemplate = "https://api.pushshift.io/reddit/search/comment/?subreddit={subreddit}" \
                   "&after={oldestdate}&before={newestdate}&sort=desc&size=500"
    with tqdm(total=total_comments, ncols=100, dynamic_ncols=False) as pbar:
        while subnumber > 0:
            url = linktemplate.format(subreddit=appcfg.subreddit,
                                      oldestdate=oldestdate,
                                      newestdate=newestdate)
            with requests.get(url) as rp:
                try:
                    push = rp.json()
                except JSONDecodeError:
                    print("     JSON DECODE ERROR on Pushshift API Comments",
                          url)
                    time.sleep(10)
                    continue
                    # return push_comment_id_set
            subnumber = len(push['data'])
            totalsubnumber += subnumber
            commentlinktemplate = 'https://www.reddit.com/comments/{link_id}/_/{comment_id}/.json\n'
            with appcfg.database.atomic():
                for item in push['data']:
                    if 'id' not in item.keys():
                        print('The following item has no primary comment ID:',
                              item)
                        continue
                    else:
                        item['comment_id'] = item.pop('id')
                    try:
                        link_id = item['link_id']
                        item['link_id'] = link_id.replace('t3_', '')
                        commentlink = commentlinktemplate.format(
                            link_id=item['link_id'],
                            comment_id=item['comment_id'])
                        push_comment_id_set.add(commentlink)
                    except KeyError:
                        print('The following item has no submission link ID:',
                              item)
                        continue
                    if item['created_utc'] < newestdate:
                        newestdate = item['created_utc']
                    item['subreddit'] = sub_id
                    if 'author_flair_text' in item.keys(
                    ) and item['author_flair_text'] is not None:
                        author_flair, author_flaircreated = AuthorFlair.get_or_create(
                            text=item['author_flair_text'])
                        item['author_flair'] = author_flair.id
                    else:
                        item['author_flair'] = None
                    author, author_created = Author.get_or_create(
                        name=item['author'])
                    item['author'] = author.id
                    itemfields = Comment._meta.fields.keys()
                    insertdict = dict()
                    for key in item.keys():
                        if key in itemfields:
                            insertdict[key] = item[key]
                    Comment.insert(insertdict).on_conflict_ignore().execute()
            pbar.update(subnumber)
    return push_comment_id_set