Python data_dumper示例，instascrape.utils.data_dumper Python示例

示例#1

0

显示文件

def post_downloader(insta, post, debug: bool, final_dest: str, dump_metadata: bool, dump_comments: bool, dump_likes: bool):

    def on_item_start(_, __, item):
        bar.set_description_str("[" + "⤓ Downloading".center(15) + "]")
        bar.set_postfix_str("({}) {}".format(post.shortcode, item.typename))

    def on_item_finish(_, __, ___, file_path):
        nonlocal downs, existings
        if file_path is not None:
            downs += 1
        else:
            existings += 1
            bar.set_description_str("[" + "♻︎ Existing".center(15) + "]")
            time.sleep(0.1)
        bar.update(1)

    def on_item_error(post, i, __, e):
        bar.set_description_str(Fore.RED + "[" + "✘ Failed".center(15) + "]")
        if not debug:
            tqdm.write(Fore.RED + "✖︎ [{} ({})] {}: {}".format(post.shortcode, i, e.__class__.__name__, str(e)))

    try:
        downs = existings = 0
        bar = progress_bar(total=post.media_count, hide=debug)
        post.download(final_dest, on_item_start=on_item_start, on_item_finish=on_item_finish, on_item_error=on_item_error)
        if any((dump_metadata, dump_comments, dump_likes)):
            bar.set_description_str("[" + "Ⓓ Dumping".center(15) + "]")
            data = {}
            # TODO: show dump progress using 'tqdm.write' with carriage return (\r)
            if dump_metadata:
                data.update(post.as_dict(extra=True))
                data.update({"media_items": [d.as_dict() for d in post.media_items()]})
            if dump_comments:
                comments = post.comments()
                comments = [comment._asdict() for comment in comments[1]]
                data.update({"comments": list(comments)})
            if dump_likes:
                likes = post.likes()
                likes = [like.username for like in likes]
                data.update({"likes": list(likes)})
            data_dumper(final_dest, post.shortcode, data)
        bar.set_description_str(Fore.GREEN + "[" + "✔︎ Finished".center(15) + "]")
    except Exception as e:
        exc_type, exc_value, tb = sys.exc_info()
        insta.logger.error("{}: {}".format(exc_type.__name__, exc_value))
        insta.logger.debug("".join(traceback.format_tb(tb)))
        if not debug:
            tqdm.write(Fore.RED + "✖︎ {}: {}".format(exc_type.__name__, str(e)))
        bar.set_description_str(Fore.RED + "[" + "✘ Failed".center(15) + "]")
    except KeyboardInterrupt:
        bar.set_description_str(Fore.MAGENTA + "[" + "⌧ Interrupted".center(15) + "]")
        raise
    finally:
        bar.close()
    return downs, existings

示例#2

0

显示文件

 def on_item_finish(_, __, item, file_path):
     nonlocal downs, existings
     if file_path is not None:
         downs += 1
     else:
         existings += 1
         bar.set_description_str("[" + "♻︎ Existing".center(15) + "]")
         time.sleep(0.1)
     if dump_metadata:
         bar.set_description_str("[" + "Ⓓ Dumping".center(15) + "]")
         data_dumper(final_dest, to_datetime(item.created_time), item.as_dict(extra=True))
     bar.update(1)

示例#3

0

显示文件

 def on_post_finish(post):
     # TODO: show dump progress using 'tqdm.write' with carriage return (\r)
     if any((dump_metadata, dump_comments, dump_likes)):
         bar.set_description_str("[" + "Ⓓ Dumping".center(15) + "]")
         data = {}
         if dump_metadata:
             data.update(post.as_dict(extra=True))
             data.update({"media_items": [d.as_dict() for d in post.media_items()]})
         if dump_comments:
             comments = post.comments()
             comments = [comment._asdict() for comment in comments[1]]
             data.update({"comments": list(comments)})
         if dump_likes:
             likes = post.likes()
             likes = [like.username for like in likes]
             data.update({"likes": list(likes)})
         data_dumper(final_dest, post.shortcode, data)
     bar.update(1)
     nonlocal subbar
     if subbar is not None:
         subbar.close()
         subbar = None

示例#4

0

显示文件

def story_downloader(story, debug: bool, final_dest: str, dump_metadata: bool):

    def on_item_start(_, __, item):
        bar.set_description_str("[" + "⤓ Downloading".center(15) + "]")
        bar.set_postfix_str(item.typename)

    def on_item_finish(_, __, item, file_path):
        nonlocal downs, existings
        if file_path is not None:
            downs += 1
        else:
            existings += 1
            bar.set_description_str("[" + "♻︎ Existing".center(15) + "]")
            time.sleep(0.1)
        if dump_metadata:
            bar.set_description_str("[" + "Ⓓ Dumping".center(15) + "]")
            data_dumper(final_dest, to_datetime(item.created_time), item.as_dict(extra=True))
        bar.update(1)

    def on_item_error(_, __, ___, e):
        bar.set_description_str(Fore.RED + "[" + "✘ Failed".center(15) + "]")
        if not debug:
            tqdm.write(Fore.RED + "✖︎ {}: {}".format(e.__class__.__name__, str(e)))

    try:
        downs = existings = 0
        bar = progress_bar(story.reel_count, hide=debug)
        story.download(final_dest, on_item_start=on_item_start, on_item_finish=on_item_finish, on_item_error=on_item_error)
        if dump_metadata:
            data_dumper(final_dest, "story", story.as_dict(extra=True))
        bar.set_description_str(Fore.GREEN + "[" + "✔︎ Finished".center(15) + "]")
    except KeyboardInterrupt:
        bar.set_description_str(Fore.MAGENTA + "[" + "⌧ Interrupted".center(15) + "]")
        raise
    finally:
        bar.close()
    return downs, existings

示例#5

0

显示文件

def dump_handler(**args):
    # Try to load object
    guest = False
    insta = load_obj()
    if insta is None:
        guest = True
        insta = Instagram()

    # Validate target
    parser = args.get("parser")
    targets = args.get("target")
    names = []
    for target in targets:
        if len(target) <= 1:
            parser.error("invalid target parsed: '{}'".format(target))
        if target[0] not in ("@", ":"):
            parser.error("invalid identifier of target: '{}'".format(target))
        if target[0] != targets[0][0]:
            parser.error("all targets must be the same type")
        names.append(target[1:])
    identifier = targets[0][0]

    # Gather options
    outfile = args.get("outfile")
    limit = args.get("limit")
    profiles_filter_str = args.get("profiles_filter")
    comments_filter_str = args.get("comments_filter")
    preload = args.get("preload")
    # Gather dump types
    followers = args.get("followers")
    followings = args.get("followings")
    comments = args.get("comments")
    likes = args.get("likes")

    # Validate dump types
    if identifier == "@" and any((comments, likes)):
        parser.error(
            "target '@' not allowed with arguments: -comments, -likes")
    elif identifier == ":" and any((followers, followings)):
        parser.error(
            "target ':' not allowed with arguments: -followers, -followings")

    struct_getter = None
    # Make entries
    Job = namedtuple("Job", "name handler")
    entries = []
    for name in names:
        jobs = []

        if identifier == "@":
            if not any((followers, followings)):
                jobs.append(
                    Job("information", lambda profile: profile.as_dict()))
            else:
                if followers:
                    jobs.append(
                        Job("followers", lambda profile: profile.followers()))
                if followings:
                    jobs.append(
                        Job("followings",
                            lambda profile: profile.followings()))
            if struct_getter is None:
                struct_getter = insta.profile

        elif identifier == ":":
            if not any((comments, likes)):
                jobs.append(Job("information", lambda post: post.as_dict()))
            else:
                if comments:
                    jobs.append(Job("comments", lambda post: post.comments()))
                if likes:
                    jobs.append(Job("likes", lambda post: post.likes()))
            if struct_getter is None:
                struct_getter = insta.post

        entries.append((name, jobs))

    # Start dumping entries of jobs
    if guest:
        warn_print("You are not logged in currently (Anonymous/Guest).")
    print(Style.BRIGHT + Fore.GREEN + "❖ [Dump] {} entries ({} jobs)\n".format(
        len(entries), sum([len(jobs) for _, jobs in entries])))
    for i, (name, jobs) in enumerate(entries, start=1):

        struct = None
        print(Style.BRIGHT + "{0}+ (Entry {1}/{2}) {3} {4}".format(
            Fore.BLUE, i, len(entries), struct_getter.__name__.title(),
            Fore.WHITE + name))
        with error_catcher(do_exit=False):
            struct = struct_getter(name)
            struct._obtain_full_data()
        if not bool(struct):
            continue

        for j, job in enumerate(jobs, start=1):
            # retrieve items
            group = None
            results = None
            print("{0}► (Job {1}/{2}) Retrieving {3}".format(
                Fore.CYAN, j, len(jobs), job.name.title()))
            with error_catcher(do_exit=False):
                results = job.handler(struct)
            if not bool(results):
                warn_print("No results are returned.")
                continue

            try:
                if job.name == "information":
                    if any((limit, profiles_filter_str, comments_filter_str,
                            preload)):
                        warn_print(
                            "Disallow: -l/--limit, -PF/--profiles-filter, -CF/--comments-filter, --preload"
                        )
                elif job.name in ("likes", "followers", "followings"):
                    if comments_filter_str:
                        warn_print("Disallow: -CF/--comments-filter")
                    print(Style.BRIGHT + "~ Total:", results.length)
                    group = results
                    group.limit(limit).preload(preload).ignore_errors(True)
                    if profiles_filter_str:
                        filterfunc = filterstr_to_filterfunc(
                            profiles_filter_str)
                        group.filter(filterfunc)
                    results = (result.username
                               for result in group) if not outfile else (
                                   result.as_dict(extra=True)
                                   for result in group)
                elif job.name == "comments":
                    if any((profiles_filter_str, preload)):
                        warn_print(
                            "Disallow: -PF/--profiles-filter, --preload")
                    print(Style.BRIGHT + "~ Total:", results[0])
                    if comments_filter_str:
                        filterfunc = filterstr_to_filterfunc(
                            comments_filter_str)
                    with error_catcher(do_exit=False):
                        results = [
                            result._asdict() for result in results[1]
                            if filterfunc(result)
                        ]
                        if limit is not None:
                            results = results[:limit]
                else:
                    raise ValueError("unable to resolve dump type")

                if outfile:
                    outfile = os.path.abspath(outfile)
                    dest, file = os.path.split(outfile)
                    filename, _ = os.path.splitext(file)
                    data_dumper(dest, filename, list(results))
                    print(Style.BRIGHT + Fore.GREEN + "⇟ Data Dumped => " +
                          Fore.WHITE + os.path.join(dest, filename + ".json"))
                else:
                    pretty_print(results)
            finally:
                if group is not None and job.name in ("likes", "followers",
                                                      "followings"):
                    errors = group.collect_errors()
                    if errors:
                        print(Fore.RED +
                              "  [{} Errors Collected During Posts Retrieving]"
                              .format(len(errors)))
                        for error in errors:
                            print(Fore.RED + "> {} -> {}: {}".format(
                                Fore.WHITE + Style.BRIGHT + error.name +
                                Style.RESET_ALL + Fore.RED, Style.BRIGHT +
                                error.exc_type.__name__, error.exc_value))
        print()

示例#6

0

显示文件

def highlights_downloader(insta, highlights: list, debug: bool, final_dest: str, limit: int, dump_metadata: bool):

    def on_item_start(_, __, item):
        if subbar is not None:
            subbar.set_postfix_str(item.typename)

    def on_item_finish(_, __, item, file_path):
        nonlocal downs, existings
        if file_path is not None:
            downs += 1
        else:
            existings += 1
            bar.set_description_str("[" + "♻︎ Existing".center(15) + "]")
            subbar.set_description_str("[" + "↻ Verifying".center(15) + "]")
            time.sleep(0.1)
        if dump_metadata:
            bar.set_description_str("[" + "Ⓓ Dumping".center(15) + "]")
            subbar.set_description_str("[" + "↻ Verifying".center(15) + "]")
            data_dumper(subdir, to_datetime(item.created_time), item.as_dict(extra=True))
        subbar.update(1)

    def on_item_error(_, i, ___, e):
        # # Interrupt the download process of the current highlight once it encounters an error
        # raise e from e
        # Ignore error and move on to the download process of the next reel item of the current highlight
        if subbar is not None:
            subbar.set_description_str(Fore.RED + "[" + "✘ Failed".center(15) + "]")
        else:
            bar.set_description_str(Fore.RED + "[" + "✘ Failed".center(15) + "]")

        if not debug:
            tqdm.write(Fore.RED + "✖︎ [{} ({})] {}: {}".format(highlight.title[:12], i, e.__class__.__name__, str(e)))

    try:
        highlights = highlights[:len(highlights) if limit is None else limit]
        downs = existings = 0
        bar = progress_bar(total=len(highlights), hide=debug)
        for highlight in highlights[:len(highlights) if limit is None else limit]:
            subdir = os.path.join(final_dest, clean_filename(highlight.title))
            subbar = progress_bar(total=highlight.reel_count, sub=True, hide=debug or highlight.reel_count == 1)
            bar.set_description_str("[" + "⤓ Downloading".center(15) + "]")
            bar.set_postfix_str("(" + highlight.title[:12] + ("..." if len(highlight.title) > 12 else "") + ")")
            if not os.path.isdir(subdir):
                os.mkdir(subdir)
            highlight.download(subdir, on_item_start=on_item_start, on_item_finish=on_item_finish, on_item_error=on_item_error)
            if dump_metadata:
                bar.set_description_str("[" + "Ⓓ Dumping".center(15) + "]")
                data_dumper(subdir, "highlight", highlight.as_dict(extra=True))
            bar.update(1)
        bar.set_description_str(Fore.GREEN + "[" + "✔︎ Finished".center(15) + "]")
    except Exception as e:
        exc_type, exc_value, tb = sys.exc_info()
        insta.logger.error("{}: {}".format(exc_type.__name__, exc_value))
        insta.logger.debug("".join(traceback.format_tb(tb)))
        if not debug:
            tqdm.write(Fore.RED + "✖︎ {}: {}".format(exc_type.__name__, str(e)))
        bar.set_description_str(Fore.RED + "[" + "✘ Failed".center(15) + "]")
    except KeyboardInterrupt:
        bar.set_description_str(Fore.MAGENTA + "[" + "⌧ Interrupted".center(15) + "]")
        raise
    finally:
        bar.close()
    return downs, existings