Exemplo n.º 1
0
def process_all(client: Client, db: ExportDb) -> Iterator[Exception]:

    locs = [
        ThreadLocation.
        ARCHIVED,  # not sure what that means.. apparently groups you don't have access to anymore?
        ThreadLocation.INBOX,  # most of messages are here.
        ThreadLocation.
        OTHER,  # apparently, keeps hidden conversations? Although doesn't returl all of them for me...
        # ThreadLocation.PENDING, # what is it???
    ]
    threads: List[Thread] = []
    for loc in locs:
        logger.debug('fetching threads: %s', loc)
        # fetches all threads by default
        thr = client.fetchThreads(loc)
        threads.extend(thr)

    for thread in threads:
        db.insert_thread(thread)

    for thread in threads:
        on = db.get_oldest_and_newest(thread)
        if on is None:
            oldest = None
            newest = None
        else:
            oldest, newest = on
        # sadly, api only allows us to fetch messages from newest to oldest
        # that means that we have no means of keeping contiguous chunk of messages in the database,
        # and 'extending' it both ways
        # we can do extend if to the left (i.e. to the oldest)
        # but all newer messages have to be accumulated and written in a single transaction

        def error(e: Exception) -> Iterator[Exception]:
            logger.error('While processing thread %s', thread)
            logger.exception(e)
            yield e

        # this would handle both 'first import' properly and 'extending' oldest to the left if it wasn't None
        iter_oldest = iter_thread(client=client, thread=thread, before=oldest)
        for r in iter_oldest:
            if isinstance(r, Exception):
                yield from error(r)
            else:
                db.insert_message(thread, r)

        if newest is not None:
            # and we want to fetch everything until we encounter newest
            iter_newest = iter_thread(client=client,
                                      thread=thread,
                                      before=None)
            with db.db:  # transaction. that's *necessary* for new messages to extend fetched data to the right
                for r in iter_newest:
                    if isinstance(r, Exception):
                        yield from error(r)
                    else:
                        mts = int(r.timestamp)
                        if newest > mts:
                            logger.info(
                                '%s: fetched all new messages (up to %s)',
                                thread.name, newest)
                            break  # interrupt, thus preventing from fetching unnecessary data
                        db.insert_message(thread, r)

        # TODO not if should be defensive? could be an indication of a serious issue...
        yield from db.check_fetched_all(thread)