def main(): args = parse_args() # arguments parsing if not any([args.url, args.album_name]): logger.error( "Not enough arguments. Use -h to see available arguments.") exit() RymNetwork = rymscraper.RymNetwork(headless=args.no_headless) logger.info("Extracting album timeline.") if args.album_name: album_timeline = RymNetwork.get_album_timeline(name=args.album_name) elif args.url: album_timeline = RymNetwork.get_album_timeline(url=args.url) export_directory = "Exports" Path(export_directory).mkdir(parents=True, exist_ok=True) if args.album_name: export_name = args.album_name else: export_name = " - ".join(args.url.split("/")[-3:-1]) export_filename = f"{export_directory}/{int(time.time())}_export_album_timeline_{export_name}.json" logger.info("Exporting results to %s.", export_filename) with open(export_filename, "w") as f: f.write(json.dumps(album_timeline, indent=4, ensure_ascii=False)) RymNetwork.browser.close() RymNetwork.browser.quit() logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
def main(): args = parse_args() # arguments parsing if not any([args.url, args.artist, args.file_url, args.file_artist]): logger.error("Not enough arguments. Use -h to see available arguments.") exit() list_urls = None list_artists = None if args.url: list_urls = [x.strip() for x in args.url.split(",") if x.strip()] logger.debug("Option url found, list_urls : %s.", list_urls) if args.file_url: try: with open(args.file_url) as f: list_urls = [ x.strip() for x in f.readlines() if x.strip() and not x.startswith("#") ] except Exception as e: logger.error(e) exit() logger.debug("Option file_url found, list_urls : %s.", list_urls) if args.artist: list_artists = [x.strip() for x in args.artist.split(",") if x.strip()] logger.debug("Option artist found, list_artists : %s.", list_artists) if args.file_artist: try: with open(args.file_artist) as f: list_artists = [ x.strip() for x in f if x.strip() and not x.startswith("#") ] except Exception as e: logger.error(e) exit() logger.debug("Option file_artist found, list_artists : %s.", list_artists) RymNetwork = rymscraper.RymNetwork(headless=args.no_headless) logger.info("Extracting artist infos.") if list_artists: list_artists_infos = RymNetwork.get_artists_infos(names=list_artists) elif list_urls: logger.debug(list_urls) list_artists_infos = RymNetwork.get_artists_infos(urls=list_urls) export_directory = "Exports" Path(export_directory).mkdir(parents=True, exist_ok=True) export_filename = f"{export_directory}/{int(time.time())}_export_artist" RymNetwork.browser.close() RymNetwork.browser.quit() logger.info("Exporting results to %s.", export_filename + ".csv") df = pd.DataFrame(list_artists_infos) df.to_csv(export_filename + ".csv", sep="\t", index=False) logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
def main(): args = parse_args() export_directory = "Exports" Path(export_directory).mkdir(parents=True, exist_ok=True) if not args.url: url = RymUrl.RymUrl() export_filename = f"{export_directory}/{int(time.time())}_export_chart" logger.debug("rym_url : %s.", url) if args.everything: export_filename += f"_everything" url.url_part_type = f"/release" else: export_filename += f"_album" if args.year: export_filename += f"_{args.year}" url.url_part_year = f"/{args.year}" if args.genre: export_filename += f"_{args.genre}" url.url_part_genres = f"/g:{args.genre}" if args.country: export_filename += f"_{args.country}" url.url_part_origin_countries = f"/loc:{args.country}" else: url = args.url export_filename = f"{export_directory}/{int(time.time())}_export_url" logger.debug("completed rym_url : %s.", url) RymNetwork = rymscraper.RymNetwork(headless=args.no_headless) logger.info("Extracting infos from the chart.") list_rows = RymNetwork.get_chart_infos(url, max_page=args.page) columns = [ "Rank", "Artist", "Album", "Date", "Genres", "RYM Rating", "Ratings", "Reviews", ] df = pd.DataFrame(list_rows) df = df[columns] logger.info("Exporting results to %s.", export_filename + ".csv") df.to_csv(export_filename + ".csv", sep="\t", index=False) RymNetwork.browser.close() RymNetwork.browser.quit() logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
def main(): args = parse_args() # arguments parsing if not any([args.url, args.artist, args.file_url, args.file_artist]): logger.error("Not enough arguments. Use -h to see available arguments.") exit() list_artists = None list_urls = None if args.url: list_urls = [x.strip() for x in args.url.split(",") if x.strip()] logger.debug("Option url found, list_urls : %s.", list_urls) if args.file_url: try: with open(args.file_url) as f: list_urls = [ x.strip() for x in f.readlines() if x.strip() and not x.startswith("#") ] except Exception as e: logger.error(e) exit() logger.debug("Option file_url found, list_urls : %s.", list_urls) if args.artist: list_artists = [x.strip() for x in args.artist.split(",") if x.strip()] logger.debug("Option artist found, list_artists : %s.", list_artists) if args.file_artist: try: with open(args.file_artist) as f: list_artists = [ x.strip() for x in f if x.strip() and not x.startswith("#") ] except Exception as e: logger.error(e) exit() logger.debug("Option file_artist found, list_artists : %s.", list_artists) # # starting selenium browser RymNetwork = rymscraper.RymNetwork(headless=args.no_headless) logger.info("Extracting artist infos.") if list_artists: list_artists_disco = RymNetwork.get_discographies_infos( names=list_artists, complementary_infos=args.complementary_infos ) elif list_urls: logger.debug(list_urls) list_artists_disco = RymNetwork.get_discographies_infos( urls=list_urls, complementary_infos=args.complementary_infos ) RymNetwork.browser.close() RymNetwork.browser.quit() export_directory = "Exports" Path(export_directory).mkdir(parents=True, exist_ok=True) export_filename = f"{export_directory}/{int(time.time())}_export_discography" # columns = ['Artist', # 'Name', # 'URL', # 'Category', # 'Type', # 'Year', # 'Date', # 'Average Rating', # 'Ratings', # 'Reviews', # 'Genres', # 'Language', # 'Descriptors', # 'Recorded', # ] df = pd.DataFrame.from_records(list_artists_disco) # reorder columns # df = df[columns] logger.info("Exporting results to %s.", export_filename + ".csv") df.to_csv(export_filename + ".csv", sep="\t", index=False) logger.debug("Runtime : %.2f seconds." % (time.time() - temps_debut))
def network(): network = rymscraper.RymNetwork(headless=True) yield network network.browser.close() network.browser.quit()