예제 #1
0
def get_starter_videos(config, handle, api_options, search_type, query):
    echov(f"Starting search using query {query}.", verbose)

    def url_2_id(url):
        qterm = parse.urlsplit(url).query
        return parse.parse_qs(qterm)["v"][0]

    if query == "-":
        if search_type == "id":
            return video_info(handle, ",".join(sys.stdin.read().split()))
        if search_type == "url":
            ids = [url_2_id(url) for url in sys.stdin.read().split()]
            return video_info(handle, ",".join(ids))
    else:
        if search_type == "term":
            return video_search(handle, config["number"][0], query,
                                **api_options)
        if search_type == "id":
            return video_info(handle, query)
        if search_type == "url":
            return video_info(handle, url_2_id(query))
        if search_type == "file":
            with open(query) as f:
                return video_info(handle, ",".join(f))

    raise click.BadParameter("Invalid search parameter.")
예제 #2
0
def set(context, option, value):
    """ Sets a default option."""
    config = context.obj["config"]
    verbose = context.obj["verbose"]
    config_path = context.obj["config"]["config_path"]

    try:
        value = ast.literal_eval(value)
    except:
        if value == "true":
            value = True
        if value == "false":
            value = False
    # TODO does not support List or Tuple types
    target_type = type(DEFAULT_OPTIONS[option])
    if not isinstance(value, target_type):
        raise click.BadArgumentUsage(
            f"Given value '{value}' is not a valid type for '{option}'. Please provide type '{target_type.__name__}'."
        )
    elif target_type is int and value < 0:
        raise click.BadArgumentUsage(
            f"Given integer '{value}' is negative! Please provide a non-negative value.."
        )

    config[option] = value
    echov("The new configurations file is:", verbose)
    if verbose:
        pprint(config)
    write_config(config, config_path)
    echov("Successfully changed!")
예제 #3
0
def get(context, option):
    """ Shows a default option."""
    config = context.obj["config"]
    update_config(config)
    if option in config:
        echov(f"The value of '{option}' is set to '{config[option]}'.")
    else:
        echow(f"The value of '{option}' is not set!")
예제 #4
0
def config(context):
    """ Shows and modifies default configurations. """
    verbose = context.obj["verbose"]
    echov("Starting YTcrawl's config mode.", verbose)
    echov("Read the following configuration:", verbose)
    if verbose:
        pprint(context.obj["config"])
    pass
예제 #5
0
def clear(context):
    """ Clears all configurations. """
    config_path = context.obj["config"]["config_path"]

    if click.confirm(f"Do you really want to clear the configuration file?"):
        # Erase content of configuration file
        write_config({}, config_path)
        echov("Configuration file cleared!")
    else:
        echov("Aborted! Nothing changed.")
예제 #6
0
def get_handle(keys):
    """ Obtains the YouTube resource handle using an API key. """
    echov("Starting YouTube authentication.", verbose)
    if not keys:
        echoe("""You need to provide an API key using `--api-key`
        or the configuration file in order to query YouTube's API.
        Please see README on how to obtain such a key.""")
    handle = get_youtube_handle(keys)
    echov("API access established.", verbose)
    return handle
예제 #7
0
def get_config(context, options):
    """ Reads the configuration file and updates it
    with the given command-line options. """
    config = context.obj["config"]
    echov("Updating configuration with command line options.", verbose)

    update_config(config, options)
    echov("Done! Working with the following configuration:", verbose)
    if verbose:
        pprint(config)
    return config
예제 #8
0
def unset(context, option):
    """ Unsets a default option."""
    config = context.obj["config"]
    verbose = context.obj["verbose"]
    config_path = context.obj["config"]["config_path"]

    if option in config:
        del config[option]

    echov("The new configurations file is:", verbose)
    if verbose:
        pprint(config)
    write_config(config, config_path)
    echov("Successfully written!")
예제 #9
0
def build_nodes(config, handle, api_options, starter_videos):
    for rank, video in enumerate(starter_videos):
        video.update({"rank": rank, "depth": 0})
    queue = deque(starter_videos)
    processed = []
    processed_ids = set()
    while len(queue) > 0:
        video = queue.popleft()
        echov(
            f"Processing video {video['videoId']} (Depth: {video['depth']}).",
            verbose,
        )
        processed.append(video)
        processed_ids.add(video["videoId"])
        if video["depth"] >= config["max_depth"]:
            video["relatedVideos"] = list()
            continue
        # Add children
        num_children = _get_branching(config["number"], video["depth"])
        while True:
            try:
                children = related_search(handle, num_children,
                                          video["videoId"], **api_options)
                video["relatedVideos"] = list(
                    map(lambda c: c["videoId"], children))
                for rank, child in enumerate(children):
                    child.update({"rank": rank, "depth": video["depth"] + 1})
                if config["unique"]:
                    queue.extend(child for child in children
                                 if child["videoId"] not in processed_ids)
                else:
                    queue.extend(children)
                break
            except HttpError as e:
                sys.tracebacklimit = 0
                echow("Http error received:")
                echow(e)
                handle = get_youtube_handle(api_options["keys"])

    return processed
예제 #10
0
def run(context, config_path, verbose):
    echov("Reading configuration file.", verbose)
    context.obj = {}
    context.obj["config"] = load_config(config_path)
    context.obj["verbose"] = verbose
예제 #11
0
def search(context, search_type, query, **options):
    """Searches YouTube using a specified query."""

    global verbose
    verbose = context.obj["verbose"]

    config = get_config(context, options)
    validate(config)

    rename = {
        "region_code": "regionCode",
        "lang_code": "relevanceLanguage",
        "safe_search": "safeSearch",
    }
    api_options = {rename[key]: config[key] for key in rename if config[key]}
    handle = get_handle(config["keys"])

    start_videos = get_starter_videos(config, handle, api_options, search_type,
                                      query)
    nodes = build_nodes(config, handle, api_options, start_videos)
    # Filter nodes
    for node in nodes:
        for key in node:
            if isinstance(node[key], str):
                node[key] = filter_text(node[key], encoding=config["encoding"])

    def filter(d):
        return {
            key: value
            for key, value in d.items() if config["include"] and
            (key in config["include"] or key in ["videoId", "relatedVideos"])
            or config["exclude"] and key not in config["exclude"]
        }

    nodes = list(map(filter, nodes))

    # Export
    if config["output_dir"] and config["output_format"] == "csv":
        echov("Query finished! Start exporting files to CSV!", verbose)
        export_to_csv(
            nodes,
            config["output_dir"],
            config["output_name"],
        )
        echov(f"Exported results to: " + config["output_dir"])

    if config["output_dir"] and config["output_format"] == "sql":
        echov("Query finished! Start exporting files to SQL!", verbose)
        export_to_sql(
            nodes,
            config["output_dir"],
            config["output_name"],
        )
        echov(f"Exported results to: " + config["output_dir"])

    if not config["output_dir"] or verbose:
        echov("Result:")
        for node in nodes:
            print(
                "    " * node["depth"],
                f"Depth: {node['depth']}, Rank: {node['rank']}, ID: {node['videoId']}",
            )
            print("    " * node["depth"], f"           Title: {node['title']}")
            print(
                "    " * node["depth"],
                "           Related Videos: {}".format(
                    node.get("relatedVideos")),
            )