Пример #1
0
def main():
    args = parse_args()

    # arguments parsing
    if not any([args.url, args.album_name]):
        logger.error(
            "Not enough arguments. Use -h to see available arguments.")
        exit()

    RymNetwork = rymscraper.RymNetwork(headless=args.no_headless)
    logger.info("Extracting album timeline.")
    if args.album_name:
        album_timeline = RymNetwork.get_album_timeline(name=args.album_name)
    elif args.url:
        album_timeline = RymNetwork.get_album_timeline(url=args.url)

    export_directory = "Exports"
    Path(export_directory).mkdir(parents=True, exist_ok=True)

    if args.album_name:
        export_name = args.album_name
    else:
        export_name = " - ".join(args.url.split("/")[-3:-1])

    export_filename = f"{export_directory}/{int(time.time())}_export_album_timeline_{export_name}.json"
    logger.info("Exporting results to %s.", export_filename)
    with open(export_filename, "w") as f:
        f.write(json.dumps(album_timeline, indent=4, ensure_ascii=False))

    RymNetwork.browser.close()
    RymNetwork.browser.quit()

    logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
Пример #2
0
def main():
    args = parse_args()

    # arguments parsing
    if not any([args.url, args.artist, args.file_url, args.file_artist]):
        logger.error("Not enough arguments. Use -h to see available arguments.")
        exit()
    list_urls = None
    list_artists = None
    if args.url:
        list_urls = [x.strip() for x in args.url.split(",") if x.strip()]
        logger.debug("Option url found, list_urls : %s.", list_urls)
    if args.file_url:
        try:
            with open(args.file_url) as f:
                list_urls = [
                    x.strip()
                    for x in f.readlines()
                    if x.strip() and not x.startswith("#")
                ]
        except Exception as e:
            logger.error(e)
            exit()
        logger.debug("Option file_url found, list_urls : %s.", list_urls)
    if args.artist:
        list_artists = [x.strip() for x in args.artist.split(",") if x.strip()]
        logger.debug("Option artist found, list_artists : %s.", list_artists)
    if args.file_artist:
        try:
            with open(args.file_artist) as f:
                list_artists = [
                    x.strip() for x in f if x.strip() and not x.startswith("#")
                ]
        except Exception as e:
            logger.error(e)
            exit()
        logger.debug("Option file_artist found, list_artists : %s.", list_artists)

    RymNetwork = rymscraper.RymNetwork(headless=args.no_headless)
    logger.info("Extracting artist infos.")
    if list_artists:
        list_artists_infos = RymNetwork.get_artists_infos(names=list_artists)
    elif list_urls:
        logger.debug(list_urls)
        list_artists_infos = RymNetwork.get_artists_infos(urls=list_urls)

    export_directory = "Exports"
    Path(export_directory).mkdir(parents=True, exist_ok=True)

    export_filename = f"{export_directory}/{int(time.time())}_export_artist"

    RymNetwork.browser.close()
    RymNetwork.browser.quit()

    logger.info("Exporting results to %s.", export_filename + ".csv")
    df = pd.DataFrame(list_artists_infos)
    df.to_csv(export_filename + ".csv", sep="\t", index=False)

    logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
Пример #3
0
def main():
    args = parse_args()
    export_directory = "Exports"
    Path(export_directory).mkdir(parents=True, exist_ok=True)

    if not args.url:
        url = RymUrl.RymUrl()
        export_filename = f"{export_directory}/{int(time.time())}_export_chart"
        logger.debug("rym_url : %s.", url)

        if args.everything:
            export_filename += f"_everything"
            url.url_part_type = f"/release"
        else:
            export_filename += f"_album"
        if args.year:
            export_filename += f"_{args.year}"
            url.url_part_year = f"/{args.year}"
        if args.genre:
            export_filename += f"_{args.genre}"
            url.url_part_genres = f"/g:{args.genre}"
        if args.country:
            export_filename += f"_{args.country}"
            url.url_part_origin_countries = f"/loc:{args.country}"
    else:
        url = args.url
        export_filename = f"{export_directory}/{int(time.time())}_export_url"

    logger.debug("completed rym_url : %s.", url)

    RymNetwork = rymscraper.RymNetwork(headless=args.no_headless)

    logger.info("Extracting infos from the chart.")
    list_rows = RymNetwork.get_chart_infos(url, max_page=args.page)

    columns = [
        "Rank",
        "Artist",
        "Album",
        "Date",
        "Genres",
        "RYM Rating",
        "Ratings",
        "Reviews",
    ]

    df = pd.DataFrame(list_rows)
    df = df[columns]
    logger.info("Exporting results to %s.", export_filename + ".csv")
    df.to_csv(export_filename + ".csv", sep="\t", index=False)

    RymNetwork.browser.close()
    RymNetwork.browser.quit()

    logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
Пример #4
0
def main():
    args = parse_args()

    # arguments parsing
    if not any([args.url, args.artist, args.file_url, args.file_artist]):
        logger.error("Not enough arguments. Use -h to see available arguments.")
        exit()
    list_artists = None
    list_urls = None
    if args.url:
        list_urls = [x.strip() for x in args.url.split(",") if x.strip()]
        logger.debug("Option url found, list_urls : %s.", list_urls)
    if args.file_url:
        try:
            with open(args.file_url) as f:
                list_urls = [
                    x.strip()
                    for x in f.readlines()
                    if x.strip() and not x.startswith("#")
                ]
        except Exception as e:
            logger.error(e)
            exit()
        logger.debug("Option file_url found, list_urls : %s.", list_urls)
    if args.artist:
        list_artists = [x.strip() for x in args.artist.split(",") if x.strip()]
        logger.debug("Option artist found, list_artists : %s.", list_artists)
    if args.file_artist:
        try:
            with open(args.file_artist) as f:
                list_artists = [
                    x.strip() for x in f if x.strip() and not x.startswith("#")
                ]
        except Exception as e:
            logger.error(e)
            exit()
        logger.debug("Option file_artist found, list_artists : %s.", list_artists)

    # # starting selenium browser
    RymNetwork = rymscraper.RymNetwork(headless=args.no_headless)
    logger.info("Extracting artist infos.")
    if list_artists:
        list_artists_disco = RymNetwork.get_discographies_infos(
            names=list_artists, complementary_infos=args.complementary_infos
        )
    elif list_urls:
        logger.debug(list_urls)
        list_artists_disco = RymNetwork.get_discographies_infos(
            urls=list_urls, complementary_infos=args.complementary_infos
        )

    RymNetwork.browser.close()
    RymNetwork.browser.quit()

    export_directory = "Exports"
    Path(export_directory).mkdir(parents=True, exist_ok=True)

    export_filename = f"{export_directory}/{int(time.time())}_export_discography"

    # columns = ['Artist',
    #            'Name',
    #            'URL',
    #            'Category',
    #            'Type',
    #            'Year',
    #            'Date',
    #            'Average Rating',
    #            'Ratings',
    #            'Reviews',
    #            'Genres',
    #            'Language',
    #            'Descriptors',
    #            'Recorded',
    #            ]

    df = pd.DataFrame.from_records(list_artists_disco)
    # reorder columns
    # df = df[columns]
    logger.info("Exporting results to %s.", export_filename + ".csv")
    df.to_csv(export_filename + ".csv", sep="\t", index=False)

    logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
Пример #5
0
def network():
    network = rymscraper.RymNetwork(headless=True)
    yield network
    network.browser.close()
    network.browser.quit()