Exemplo n.º 1
0
def export_ids(dir, idtype="imdb", recursive=True, pattern="*.imdb", output="./kodi.csv", interactive=False):
    """
    Exports the IDs from ID or .nfo files.

    :param dir: the directory to look for ID/.nfo files
    :type dir: str
    :param idtype: what type of IDs to extract from .nfo files (choices: 'imdb')
    :type idtype: str
    :param recursive: whether to locate files recursively
    :type recursive: bool
    :param pattern: the pattern for the ID files (glob)
    :type pattern: str
    :param output: the output CSV file to generate
    :type output: str
    :param interactive: whether to use interactive mode
    :type interactive: bool
    """

    dirs = []
    determine_dirs(dir, recursive, dirs)
    dirs.sort()
    logger.info("Dirs: %s" % ",".join(dirs))

    with open(output, "w") as csv_file:
        csv_file.write("Directory,File,ID\n")
        for d in dirs:
            logger.info("Current dir: %s" % d)
            if interactive and skip():
                if proceed():
                    continue
                else:
                    break
            processed = set()

            # ID file
            id_filenames = fnmatch.filter(os.listdir(d), pattern)
            for id_filename in id_filenames:
                id_path = os.path.join(d, id_filename)
                id = read_id(id_path)
                logger.info("ID: %s" % id)
                name = os.path.splitext(id_filename)[0]
                processed.add(name)
                write_to_csv(csv_file, d, name, id)

            # .nfo file (if not already processed)
            nfo_filenames = fnmatch.filter(os.listdir(d), "*.nfo")
            for nfo_filename in nfo_filenames:
                name = os.path.splitext(nfo_filename)[0]
                if name not in processed:
                    nfo_path = os.path.join(d, nfo_filename)
                    id = read_id_from_nfo(nfo_path, idtype)
                    logger.info("ID: %s" % id)
                    write_to_csv(csv_file, d, name, id)

            if interactive and not proceed():
                break
Exemplo n.º 2
0
def generate(dir,
             idtype="imdb",
             recursive=True,
             pattern="*.imdb",
             delay=1,
             dry_run=False,
             overwrite=False,
             language="en",
             fanart="none",
             fanart_file="folder.jpg",
             interactive=False):
    """
    Traverses the directory Generates the .nfo files.

    :param dir: the directory to traverse
    :type dir: str
    :param idtype: how to interpret the ID files (choices: 'imdb')
    :type idtype: str
    :param recursive: whether to search recursively
    :type recursive: bool
    :param pattern: the file pattern (glob) to use for identifying the files with the IDs
    :type pattern: str
    :param delay: the delay in seconds between web queries
    :type delay: int
    :param dry_run: whether to perform a 'dry-run', ie generating .nfo content but not saving them (only outputting them to stdout)
    :type dry_run: bool
    :param overwrite: whether to overwrite existing .nfo files (ie recreating them)
    :type overwrite: bool
    :param language: the preferred language for the titles
    :type language: str
    :param fanart: how to deal with fanart
    :type fanart: str
    :param fanart_file: the fanart filename to use (when downloading or re-using existing)
    :type fanart_file: str
    :param interactive: whether to use interactive mode
    :type interactive: bool
    """

    dirs = []
    determine_dirs(dir, recursive, dirs)
    dirs.sort()
    logger.info("Dirs: %s" % ",".join(dirs))
    if interactive:
        delay = 0

    for d in dirs:
        logger.info("Current dir: %s" % d)
        id_filenames = fnmatch.filter(os.listdir(d), pattern)

        for id_filename in id_filenames:
            id_path = os.path.join(d, id_filename)
            xml_path = os.path.join(d,
                                    os.path.splitext(id_filename)[0] + ".nfo")
            logger.info("ID file: %s" % id_path)

            if not overwrite and os.path.exists(xml_path):
                logger.info(".nfo file already exists, skipping")
                continue

            id = read_id(id_path)
            logger.info("ID: %s" % id)

            if interactive and skip():
                if proceed():
                    continue
                else:
                    break

            try:
                if idtype == "imdb":
                    doc = generate_imdb(id,
                                        language=language,
                                        fanart=fanart,
                                        fanart_file=fanart_file,
                                        nfo_file=xml_path)
                else:
                    logger.critical("Unhandled ID type: %s" % idtype)
                    return
                xml_str = doc.toprettyxml(indent="  ")
                if dry_run:
                    print(xml_str)
                else:
                    logger.info("Writing .nfo file: %s" % xml_path)
                    with open(xml_path, "w") as xml_file:
                        xml_file.write(xml_str)
            except Exception:
                logger.info(traceback.format_exc())

            if interactive and not proceed():
                break
            if delay > 0:
                time.sleep(delay)
Exemplo n.º 3
0
def generate_imdb(id,
                  language="en",
                  fanart="none",
                  fanart_file="folder.jpg",
                  xml_path=None,
                  episodes=False,
                  path=None,
                  overwrite=False,
                  dry_run=False):
    """
    Generates the XML for the specified IMDB ID.

    :param id: the IMDB ID to use
    :type id: str
    :param language: the preferred language for the titles
    :type language: str
    :param fanart: how to deal with fanart
    :type fanart: str
    :param fanart_file: the fanart filename to use (when downloading or re-using existing)
    :type fanart_file: str
    :param xml_path: the current nfo full file path
    :type xml_path: str
    :param episodes: whether to generate episode information as well
    :type episodes: bool
    :param path: the current directory (used for determining episode files)
    :type path: str
    :param overwrite: whether to overwrite existing .nfo files (ie recreating them)
    :type overwrite: bool
    :param dry_run: whether to perform a 'dry-run', ie generating .nfo content but not saving them (only outputting them to stdout)
    :type dry_run: bool
    :return: the generated XML DOM
    :rtype: minidom.Document
    """

    id = id.strip()

    # generate URL
    if id.startswith("http"):
        url = id
    else:
        url = "https://www.imdb.com/title/%s/" % id
    logger.info("IMDB URL: " + url)

    # retrieve html
    r = requests.get(url, headers={"Accept-Language": language})
    if r.status_code != 200:
        logging.critical("Failed to retrieve URL (status code %d): %s" %
                         (r.status_code, url))

    # parse html
    soup = BeautifulSoup(r.content, "html.parser")

    doc = minidom.Document()

    widget = soup.find("div", id="star-rating-widget")
    if widget is None:
        preflang_title = None
    else:
        preflang_title = widget["data-title"]

    for script in soup.findAll("script", type="application/ld+json"):
        j = json.loads(script.text)
        logger.debug(j)

        root = add_node(doc, doc, "movie")
        add_node(doc, root, "title",
                 j['name'] if preflang_title is None else preflang_title)
        add_node(doc, root, "originaltitle", j["name"])
        uniqueid = add_node(doc, root, "uniqueid",
                            j["url"].replace("/title/", "").replace("/", ""))
        uniqueid.setAttribute("type", "imdb")
        uniqueid.setAttribute("default", "true")
        if "description" in j:
            add_node(doc, root, "plot", j["description"])
            add_node(doc, root, "outline", j["description"])
        if "datePublished" in j:
            add_node(doc, root, "premiered", j["datePublished"])
        if "director" in j and "name" in j["director"]:
            add_node(doc, root, "director", j["director"]["name"])
        if "genre" in j:
            if isinstance(j["genre"], list):
                for genre in j["genre"]:
                    add_node(doc, root, "genre", genre)
            else:
                add_node(doc, root, "genre", j["genre"])
        for actor in j["actor"]:
            xactor = add_node(doc, root, "actor")
            add_node(doc, xactor, "name", actor["name"])
        if "trailer" in j and "embedUrl" in j["trailer"]:
            add_node(doc, root, "trailer",
                     "https://www.imdb.com" + j["trailer"]["embedUrl"])
        if "aggregateRating" in j and "ratingValue" in j["aggregateRating"]:
            xratings = add_node(doc, root, "ratings")
            xrating = add_node(doc, xratings, "rating")
            xrating.setAttribute("name", "imdb")
            xrating.setAttribute("max", "10")
            add_node(doc, xrating, "value",
                     str(j["aggregateRating"]["ratingValue"]))

        if fanart == "download":
            if "image" in j:
                logger.info("Downloading fanart: %s" % j["image"])
                r = requests.get(j["image"], stream=True)
                if r.status_code == 200:
                    fanart_path = os.path.join(os.path.dirname(xml_path),
                                               fanart_file)
                    with open(fanart_path, 'wb') as f:
                        for chunk in r:
                            f.write(chunk)
                    xthumb = add_node(doc, root, "thumb", fanart_file)
                    xthumb.setAttribute("aspect", "poster")
                else:
                    logger.critical(
                        "Failed to download fanart, status code: " %
                        r.status_code)
            else:
                logger.warning("No image associated, cannot download!")
        elif fanart == "use-existing":
            xthumb = add_node(doc, root, "thumb", fanart_file)
            xthumb.setAttribute("aspect", "poster")
        elif fanart == "none":
            pass
        else:
            logger.critical("Ignoring unhandled fanart type: %s" % fanart)

        if episodes:
            if has_episodes(soup):
                logger.info("Has episode data")

                # determine seasons
                url = create_episodes_url(id)
                logger.info("Default episodes URL: %s" % url)
                r = requests.get(url, headers={"Accept-Language": language})
                if r.status_code != 200:
                    logging.critical(
                        "Failed to retrieve URL (status code %d): %s" %
                        (r.status_code, url))
                    continue
                soup_ep = BeautifulSoup(r.content, "html.parser")
                seasons = extract_seasons(soup_ep)
                logger.info("Seasons: %s" % ", ".join(seasons))

                # extract episodes
                season_data = {}
                for season in seasons:
                    season_data[season] = {}
                    url = create_episodes_url(id, season=season)
                    logger.info("Season %s URL: %s" % (season, url))
                    r = requests.get(url,
                                     headers={"Accept-Language": language})
                    if r.status_code != 200:
                        logging.critical(
                            "Failed to retrieve URL (status code %d): %s" %
                            (r.status_code, url))
                        continue
                    soup_ep = BeautifulSoup(r.content, "html.parser")
                    episodes_data = extract_episodes(soup_ep, season)
                    for k in episodes_data:
                        xml = episode_to_xml(episodes_data[k])
                        season_data[season][k] = xml

                # locate files and output XML
                dirs = []
                determine_dirs(path, True, dirs)
                for d in dirs:
                    files = fnmatch.filter(os.listdir(d), "*S??E??*.*")
                    for f in files:
                        if f.endswith(".nfo"):
                            continue
                        parts = extract_season_episode(f)
                        if parts is None:
                            continue
                        s, e = parts
                        if (s in season_data) and (e in season_data[s]):
                            xml_path = os.path.join(
                                d,
                                os.path.splitext(f)[0] + ".nfo")
                            xml_str = season_data[s][e].toprettyxml(
                                indent="  ")
                            if dry_run:
                                print(xml_str)
                            elif os.path.exists(xml_path) and not overwrite:
                                continue
                            with open(xml_path, "w") as xml_path:
                                xml_path.write(xml_str)

            else:
                logger.info("Has no episode data")

    return doc