def get_starter_videos(config, handle, api_options, search_type, query): echov(f"Starting search using query {query}.", verbose) def url_2_id(url): qterm = parse.urlsplit(url).query return parse.parse_qs(qterm)["v"][0] if query == "-": if search_type == "id": return video_info(handle, ",".join(sys.stdin.read().split())) if search_type == "url": ids = [url_2_id(url) for url in sys.stdin.read().split()] return video_info(handle, ",".join(ids)) else: if search_type == "term": return video_search(handle, config["number"][0], query, **api_options) if search_type == "id": return video_info(handle, query) if search_type == "url": return video_info(handle, url_2_id(query)) if search_type == "file": with open(query) as f: return video_info(handle, ",".join(f)) raise click.BadParameter("Invalid search parameter.")
def set(context, option, value): """ Sets a default option.""" config = context.obj["config"] verbose = context.obj["verbose"] config_path = context.obj["config"]["config_path"] try: value = ast.literal_eval(value) except: if value == "true": value = True if value == "false": value = False # TODO does not support List or Tuple types target_type = type(DEFAULT_OPTIONS[option]) if not isinstance(value, target_type): raise click.BadArgumentUsage( f"Given value '{value}' is not a valid type for '{option}'. Please provide type '{target_type.__name__}'." ) elif target_type is int and value < 0: raise click.BadArgumentUsage( f"Given integer '{value}' is negative! Please provide a non-negative value.." ) config[option] = value echov("The new configurations file is:", verbose) if verbose: pprint(config) write_config(config, config_path) echov("Successfully changed!")
def get(context, option): """ Shows a default option.""" config = context.obj["config"] update_config(config) if option in config: echov(f"The value of '{option}' is set to '{config[option]}'.") else: echow(f"The value of '{option}' is not set!")
def config(context): """ Shows and modifies default configurations. """ verbose = context.obj["verbose"] echov("Starting YTcrawl's config mode.", verbose) echov("Read the following configuration:", verbose) if verbose: pprint(context.obj["config"]) pass
def clear(context): """ Clears all configurations. """ config_path = context.obj["config"]["config_path"] if click.confirm(f"Do you really want to clear the configuration file?"): # Erase content of configuration file write_config({}, config_path) echov("Configuration file cleared!") else: echov("Aborted! Nothing changed.")
def get_handle(keys): """ Obtains the YouTube resource handle using an API key. """ echov("Starting YouTube authentication.", verbose) if not keys: echoe("""You need to provide an API key using `--api-key` or the configuration file in order to query YouTube's API. Please see README on how to obtain such a key.""") handle = get_youtube_handle(keys) echov("API access established.", verbose) return handle
def get_config(context, options): """ Reads the configuration file and updates it with the given command-line options. """ config = context.obj["config"] echov("Updating configuration with command line options.", verbose) update_config(config, options) echov("Done! Working with the following configuration:", verbose) if verbose: pprint(config) return config
def unset(context, option): """ Unsets a default option.""" config = context.obj["config"] verbose = context.obj["verbose"] config_path = context.obj["config"]["config_path"] if option in config: del config[option] echov("The new configurations file is:", verbose) if verbose: pprint(config) write_config(config, config_path) echov("Successfully written!")
def build_nodes(config, handle, api_options, starter_videos): for rank, video in enumerate(starter_videos): video.update({"rank": rank, "depth": 0}) queue = deque(starter_videos) processed = [] processed_ids = set() while len(queue) > 0: video = queue.popleft() echov( f"Processing video {video['videoId']} (Depth: {video['depth']}).", verbose, ) processed.append(video) processed_ids.add(video["videoId"]) if video["depth"] >= config["max_depth"]: video["relatedVideos"] = list() continue # Add children num_children = _get_branching(config["number"], video["depth"]) while True: try: children = related_search(handle, num_children, video["videoId"], **api_options) video["relatedVideos"] = list( map(lambda c: c["videoId"], children)) for rank, child in enumerate(children): child.update({"rank": rank, "depth": video["depth"] + 1}) if config["unique"]: queue.extend(child for child in children if child["videoId"] not in processed_ids) else: queue.extend(children) break except HttpError as e: sys.tracebacklimit = 0 echow("Http error received:") echow(e) handle = get_youtube_handle(api_options["keys"]) return processed
def run(context, config_path, verbose): echov("Reading configuration file.", verbose) context.obj = {} context.obj["config"] = load_config(config_path) context.obj["verbose"] = verbose
def search(context, search_type, query, **options): """Searches YouTube using a specified query.""" global verbose verbose = context.obj["verbose"] config = get_config(context, options) validate(config) rename = { "region_code": "regionCode", "lang_code": "relevanceLanguage", "safe_search": "safeSearch", } api_options = {rename[key]: config[key] for key in rename if config[key]} handle = get_handle(config["keys"]) start_videos = get_starter_videos(config, handle, api_options, search_type, query) nodes = build_nodes(config, handle, api_options, start_videos) # Filter nodes for node in nodes: for key in node: if isinstance(node[key], str): node[key] = filter_text(node[key], encoding=config["encoding"]) def filter(d): return { key: value for key, value in d.items() if config["include"] and (key in config["include"] or key in ["videoId", "relatedVideos"]) or config["exclude"] and key not in config["exclude"] } nodes = list(map(filter, nodes)) # Export if config["output_dir"] and config["output_format"] == "csv": echov("Query finished! Start exporting files to CSV!", verbose) export_to_csv( nodes, config["output_dir"], config["output_name"], ) echov(f"Exported results to: " + config["output_dir"]) if config["output_dir"] and config["output_format"] == "sql": echov("Query finished! Start exporting files to SQL!", verbose) export_to_sql( nodes, config["output_dir"], config["output_name"], ) echov(f"Exported results to: " + config["output_dir"]) if not config["output_dir"] or verbose: echov("Result:") for node in nodes: print( " " * node["depth"], f"Depth: {node['depth']}, Rank: {node['rank']}, ID: {node['videoId']}", ) print(" " * node["depth"], f" Title: {node['title']}") print( " " * node["depth"], " Related Videos: {}".format( node.get("relatedVideos")), )