Пример #1
0
def main():

    args = sys.argv[1:]
    crit = input.criteria()
    input.run(args, crit)

    storage = input.criteria()
    storage.data["department"] = "asd-CHEM"

    with requests.Session() as s:

        scraper.login(s)
        scraper.download(s.get(GOLD_SEARCH_URL), DEFAULT_GOLD_FILE_PATH,
                         "search")
        scraper.post_search(crit, s, "chem3")
        html_extraction.parse_to_file("chem3", pretty=True)
Пример #2
0
def new_user_registration():

    if request.method == 'GET':
        pk = request.args.get('key')

        if collection.count_documents({"_id": "W" + str(pk)}) > 0:
            form = RegisterForm(fb_id=pk)
            return render_template('register.html', form=form)
        else:
            return '404'

    else:
        fb_id = request.form.get('fb_id')
        gla_id = request.form.get('gla_id')
        gla_pass = request.form.get('gla_pass')
        loginResult = scraper.login(gla_id, gla_pass)

        if loginResult == 2:
            return '<h1> Wrong credentials. <a href="{}/register?key={}">Try again.</a></h1>'.format(
                app_url, fb_id)
        elif loginResult == 3:
            return '<h1> Something went wrong. <a href="{}/register?key={}">Try again.</a></h1>'.format(
                app_url, fb_id)

        collection.insert_one({
            "_id": fb_id,
            "guid": gla_id,
            "thing": f.encrypt(gla_pass.encode()),
            "loggedIn": 1
        })
        collection.delete_one({"_id": "W" + fb_id})
        return '<h1> Login successful! You can now close this page and chat to the bot. </h1>'
Пример #3
0
def register():
    if request.method == 'GET':
        key = request.args.get('key')
        app.logger.info('uid:{} requested registration'.format(key))
        if r.exists('IN_REG:' + key):
            app.logger.info('uid:{} is undergoing registration'.format(key))
            form = RegisterForm(uid=key)
            return render_template('register.html', form=form)
        else:
            app.logger.info(
                'uid:{} expired/invalid registration key'.format(key))
            return '404'
    else:
        regno = request.form.get('regno')
        password = request.form.get('password')
        uid = request.form.get('uid')
        if scraper.login(regno, password) is None:
            app.logger.info('uid:{} provided wrong credentials'.format(uid))
            return '<h1> Wrong credentials </h1>'

        app.logger.info('uid:{} has registered'.format(uid))
        r.delete('IN_REG:' + uid)
        r.set(uid, json.dumps({'regno': regno, 'password': password}))
        return '<h1> Registration complete </h1>'
Пример #4
0
import urlparse
import xbmcplugin
import xbmcgui
import xbmc

addon_url = sys.argv[0]
addon_handle = int(sys.argv[1])
args = urlparse.parse_qs(sys.argv[2][1:])
page = args.get("page", [None])[0]


def build_url(query):
    return addon_url + '?' + urllib.urlencode(query)


def get_videos():
    videos = scraper.list_videos()
    for video in videos:
        url = build_url({"page": "resolve", "url": video["url"]})
        li = xbmcgui.ListItem(video["label"], iconImage="DefaultVideo.png")
        xbmcplugin.addDirectoryItem(handle=addon_handle,
                                    url=url, listitem=li)
    xbmcplugin.endOfDirectory(addon_handle)


if page is None:
    scraper.login(xbmcplugin.getSetting(addon_handle, 'username'), xbmcplugin.getSetting(addon_handle, 'password'))
    get_videos()
elif page == "resolve":
    xbmc.Player().play(scraper.resolve_url(args.get("url")[0]))
Пример #5
0
def main():
    parser = argparse.ArgumentParser(
        description="Scrape blinkist.com and generate pretty output")

    parser.add_argument(
        "--language",
        choices={"en", "de"},
        default="en",
        help=
        "The language to scrape books in - either 'en' for english or 'de' for german",
    )
    parser.add_argument(
        "--match-language",
        action="store_true",
        default=False,
        help=
        "Skip scraping books if not in the requested language (not all book are avaible in german)",
    )

    def check_cooldown(value):
        if int(value) < 1:
            raise argparse.ArgumentTypeError("Can't be smaller than 1")
        return int(value)

    parser.add_argument(
        "--cooldown",
        type=check_cooldown,
        default=1,
        help=
        "Seconds to wait between scraping books, and downloading audio files. Can't be smaller than 1",
    )
    parser.add_argument(
        "--headless",
        action="store_true",
        default=False,
        help=
        "Start the automated web browser in headless mode. Works only if you already logged in once",
    )
    parser.add_argument(
        "--audio",
        action="store_true",
        default=False,
        help="Download the audio blinks for each book",
    )
    parser.add_argument(
        "--concat-audio",
        action="store_true",
        default=False,
        help=
        "Concatenate the audio blinks into a single file and tag it. Requires ffmpeg",
    )
    parser.add_argument(
        "--keep-noncat",
        action="store_true",
        default=False,
        help=
        "Keep the individual blink audio files, instead of deleting them (works with '--concat-audio' only)",
    )
    parser.add_argument(
        "--no-scrape",
        action="store_true",
        default=False,
        help=
        "Don't scrape the website, only process existing json files in the dump folder. Do not provide email or password with this option.",
    )
    parser.add_argument(
        "--book",
        default=False,
        help="Scrapes this book only, takes the blinkist url for the book"
        "(e.g. https://www.blinkist.com/en/books/... or https://www.blinkist.com/en/nc/reader/...)",
    )
    parser.add_argument(
        "--daily-book",
        action="store_true",
        default=False,
        help="Scrapes the free daily book only.",
    )
    parser.add_argument(
        "--books",
        default=False,
        help=
        "Scrapes the list of books, takes a txt file with the list of blinkist urls for the books"
        "(e.g. https://www.blinkist.com/en/books/... or https://www.blinkist.com/en/nc/reader/...)",
    )
    parser.add_argument(
        "--book-category",
        default="Uncategorized",
        help=
        "When scraping a single book, categorize it under this category (works with '--book' only)",
    )
    parser.add_argument(
        "--categories",
        type=str,
        nargs="+",
        default="",
        help=
        ("Only the categories whose label contains at least one string here will be scraped."
         "Case-insensitive; use spaces to separate categories. "
         "(e.g. '--categories entrep market' will only scrape books under 'Entrepreneurship' and 'Marketing & Sales')"
         ),
    )
    parser.add_argument(
        "--ignore-categories",
        type=str,
        nargs="+",
        default="",
        help=
        ("If a category label contains anything in ignored_categories, books under that category will not be scraped. "
         "Case-insensitive; use spaces to separate categories. "
         "(e.g. '--ignored-categories entrep market' will skip scraping of 'Entrepreneurship' and 'Marketing & Sales')"
         ),
    )
    parser.add_argument(
        "--create-html",
        action="store_true",
        default=True,
        help="Generate a formatted html document for the book",
    )
    parser.add_argument(
        "--create-epub",
        action="store_true",
        default=True,
        help="Generate a formatted epub document for the book",
    )
    parser.add_argument(
        "--create-pdf",
        action="store_true",
        default=False,
        help=
        "Generate a formatted pdf document for the book. Requires wkhtmltopdf",
    )
    parser.add_argument(
        "--save-cover",
        action="store_true",
        default=False,
        help="Save a copy of the Blink cover artwork in the folder",
    )
    parser.add_argument(
        "--embed-cover-art",
        action="store_true",
        default=False,
        help=
        "Embed the Blink cover artwork into the concatenated audio file (works with '--concat-audio' only)",
    )
    parser.add_argument(
        "--chromedriver",
        help=
        "Path to a specific chromedriver executable instead of the built-in one",
    )
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="Increases logging verbosity")

    if "--no-scrape" not in sys.argv:
        parser.add_argument(
            "email",
            help="The email to log into your premium Blinkist account")
        parser.add_argument(
            "password",
            help="The password to log into your premium Blinkist account")

    args = parser.parse_args()

    # set up logger verbosity
    logger.set_verbose(log, args.verbose)

    def generate_book_outputs(book_json, cover_img=False):
        if args.create_html:
            generator.generate_book_html(book_json, cover_img)
        if args.create_epub:
            generator.generate_book_epub(book_json)
        if args.create_pdf:
            generator.generate_book_pdf(book_json, cover_img)

    def scrape_book(driver, processed_books, book_url, category,
                    match_language):
        book_json, dump_exists = scraper.scrape_book_data(
            driver, book_url, category=category, match_language=match_language)
        if book_json:
            cover_img_file = False
            cover_tmp_file = False
            if args.audio:
                audio_files = scraped_audio_exists(book_json)
                if not audio_files:
                    audio_files = scraper.scrape_book_audio(
                        driver, book_json, args.language)
                if audio_files and args.concat_audio:
                    if type(audio_files) == list:
                        if args.embed_cover_art:
                            cover_tmp_file = scraper.download_book_cover_image(
                                book_json,
                                filename="_cover.jpg",
                                alt_file="cover.jpg")
                        generator.combine_audio(book_json, audio_files,
                                                args.keep_noncat,
                                                cover_tmp_file)
            if args.save_cover:
                cover_img_file = scraper.download_book_cover_image(
                    book_json, filename="cover.jpg", alt_file="_cover.jpg")
                generate_book_outputs(book_json, cover_img=cover_img_file)
            else:
                generate_book_outputs(book_json)
            if cover_tmp_file:
                if os.path.exists(cover_tmp_file):
                    log.debug(f"Deleting {cover_tmp_file}")
                    os.remove(cover_tmp_file)
                else:
                    log.debug(f'Could not find "{cover_tmp_file}"')
            processed_books.append(book_url)
        return dump_exists

    def finish(start_time, processed_books, driver=None):
        if driver:
            driver.close()
        elapsed_time = time.time() - start_time
        formatted_time = "{:02d}:{:02d}:{:02d}".format(
            int(elapsed_time // 3600),
            int(elapsed_time % 3600 // 60),
            int(elapsed_time % 60),
        )
        total_books = len(processed_books)
        log.info(
            f"Processed {total_books} book{'s' if total_books != 1 else ''} in {formatted_time}"
        )

    # start scraping
    log.info("Starting scrape run...")
    processed_books = []
    start_time = time.time()

    if args.no_scrape:
        # if the --no-scrape argument is passed, just process the
        # existing json dump files
        for file in glob.glob(os.path.join(os.getcwd(), "dump", "*.json")):
            generate_book_outputs(file)
            processed_books.append(file)
        finish(start_time, processed_books)
    else:
        match_language = args.language if args.match_language else ""
        # if no login cookies were found, don't start a headless browser
        # so that the user can solve recaptcha and log in
        start_headless = args.headless
        if not scraper.has_login_cookies():
            start_headless = False
        # add uBlock (if the conditions are right)
        use_ublock = not (args.book or args.headless)
        driver = scraper.initialize_driver(
            headless=start_headless,
            with_ublock=use_ublock,
            chromedriver_path=args.chromedriver,
        )

        is_logged_in = scraper.login(driver, args.language, args.email,
                                     args.password)
        if is_logged_in:
            if args.book or args.daily_book:
                # scrape single book
                book_url = (args.book if not args.daily_book else
                            scraper.get_daily_book_url(driver, args.language))
                scrape_book(
                    driver,
                    processed_books,
                    book_url,
                    category={"label": args.book_category},
                    match_language=match_language,
                )
            elif args.books:
                # scrape list of books
                with open(args.books, "r") as books_urls:
                    for book_url in books_urls.readlines():
                        dump_exists = scrape_book(
                            driver,
                            processed_books,
                            book_url.strip(),
                            category={"label": args.book_category},
                            match_language=match_language,
                        )
                        if not dump_exists:
                            time.sleep(args.cooldown)
            else:
                # scrape all books / categories
                all_books = scraper.get_all_books(driver, args.language)
                categories = scraper.get_categories(
                    driver,
                    args.language,
                    specified_categories=args.categories,
                    ignored_categories=args.ignore_categories,
                )
                for category in categories:
                    books_urls = scraper.get_all_books_for_categories(
                        driver, category)
                    for book_url in books_urls:
                        dump_exists = scrape_book(
                            driver,
                            processed_books,
                            book_url,
                            category=category,
                            match_language=match_language,
                        )
                        # if we processed the book from an existing dump
                        # no scraping was involved, no need to cooldown
                        if not dump_exists:
                            time.sleep(args.cooldown)
                uncategorized_books = [
                    x for x in all_books if x not in processed_books
                ]
                log.info(
                    f"Scraping {len(uncategorized_books)} remaining uncategorized books..."
                )
                for book_url in uncategorized_books:
                    dump_exists = scrape_book(
                        driver,
                        processed_books,
                        book_url,
                        category={"label": "Uncategorized"},
                        match_language=match_language,
                    )
                    if not dump_exists:
                        time.sleep(args.cooldown)
        else:
            log.error("Unable to login into Blinkist")
        finish(start_time, processed_books, driver)
Пример #6
0
 start_time = time.time()
 try:
     if (args.no_scrape):
         # if the --no-scrape argument is passed, just process the existing json dump files
         for file in glob.glob(os.path.join("dump", "*.json")):
             process_book_json(file, processed_books)
         finish(None, start_time, processed_books)
     else:
         match_language = args.language if args.match_language else ""
         # if no login cookies were found, don't start a headless browser
         # so that the user can solve recaptcha and log in
         start_headless = args.headless
         if not scraper.has_login_cookies():
             start_headless = False
         driver = scraper.initialize_driver(headless=start_headless)
         is_logged_in = scraper.login(driver, args.language, args.email,
                                      args.password)
         if (is_logged_in):
             if (args.book):
                 scrape_book(driver,
                             processed_books,
                             args.book,
                             category={"label": args.category},
                             match_language=match_language)
             else:
                 categories = scraper.get_categories(driver, args.language)
                 for category in categories:
                     books_urls = scraper.get_all_books_for_categories(
                         driver, category)
                     for book_url in books_urls:
                         dump_exists = scrape_book(
                             driver,
Пример #7
0
print = logging.info
logging.basicConfig(level=logging.WARNING if args.quiet else logging.INFO,
                    format="%(message)s")


def read_login(filename: str):
    username: str
    password: str
    with open(filename, 'r') as f:
        username = f.readline()
        password = f.readline()
    return (username, password)


print('Inicialising scraper...')
options = webdriver.FirefoxOptions()
options.headless = args.quiet
driver = webdriver.Firefox(options=options)

loginData = read_login(args.filename)
login(loginData[0], loginData[1], driver)
tagDict = defaultdict(int)
print('Scraping data...')
for i in range(args.passes):
    print('Pass no. %d' % (i + 1))
    scrape_once(driver, args.collect_dict, tagDict)

for tag in sorted(tagDict.items(), key=operator.itemgetter(1), reverse=True):
    print(tag[0] + ' ' + str(tag[1]))
driver.quit()
import urlparse
import xbmcplugin
import xbmcgui
import xbmc

addon_url = sys.argv[0]
addon_handle = int(sys.argv[1])
args = urlparse.parse_qs(sys.argv[2][1:])
page = args.get("page", [None])[0]


def build_url(query):
    return addon_url + '?' + urllib.urlencode(query)


def get_videos():
    videos = scraper.list_videos()
    for video in videos:
        url = build_url({"page": "resolve", "url": video["url"]})
        li = xbmcgui.ListItem(video["label"], iconImage="DefaultVideo.png")
        xbmcplugin.addDirectoryItem(handle=addon_handle, url=url, listitem=li)
    xbmcplugin.endOfDirectory(addon_handle)


if page is None:
    scraper.login(xbmcplugin.getSetting(addon_handle, 'username'),
                  xbmcplugin.getSetting(addon_handle, 'password'))
    get_videos()
elif page == "resolve":
    xbmc.Player().play(scraper.resolve_url(args.get("url")[0]))
Пример #9
0
def parse_message(message, id):
    r = collection.find_one({"_id": id})

    if r['loggedIn'] == 0:
        bot.send_text_message(id, "Logging in..")
        bot.send_action(id, "typing_on")
        loginResult = scraper.login(r['guid'],
                                    (f.decrypt(r['thing'])).decode())

        if loginResult == 1:
            collection.update_one({"_id": id}, {'$set': {'loggedIn': 1}})
            bot.send_text_message(id, "Logged in!")

            try:
                parse = witClient.message(message)
                bot.send_action(id, "typing_on")

                if 'datetime' in parse['entities']:
                    return scraper.specific_day(
                        parse['entities']['datetime'][0]['value'][:10],
                        r['guid'])

                elif 'read_next' in parse['entities']:
                    return scraper.read_now(r['guid'])

                else:
                    return "What's up?"

            except:
                return "So, what's up?"

        else:
            collection.delete_one({"_id": id})
            collection.insert_one({"_id": "W" + id})
            return "Something went wrong.\nRegister here: {}/register?key={}".format(
                app_url, id)

    else:
        if scraper.check_browser(r['guid']):

            try:
                parse = witClient.message(message)
                bot.send_action(id, "typing_on")

                if 'logout' in parse['entities']:
                    scraper.close(r['guid'])
                    collection.update_one({"_id": id},
                                          {'$set': {
                                              'loggedIn': 0
                                          }})
                    return "Logged out! Goodbye. :)"

                elif 'delete_data' in parse['entities']:
                    scraper.close(r['guid'])
                    collection.delete_one({"_id": id})
                    return "Deleted! :) "

                elif 'datetime' in parse['entities']:
                    return scraper.specific_day(
                        parse['entities']['datetime'][0]['value'][:10],
                        r['guid'])

                elif 'read_next' in parse['entities']:
                    return scraper.read_now(r['guid'])

                else:
                    return "Not sure how to answer that."

            except:
                return "Something went wrong with parsing that."

        else:
            collection.update_one({"_id": id}, {'$set': {'loggedIn': 0}})
            return "You have been logged out due to some error or being idle for too long. Say hello to log in again. :) "
Пример #10
0
def main():
  parser = argparse.ArgumentParser(description="Scrape blinkist.com and generate pretty output")

  parser.add_argument("--language", choices={"en", "de"}, default="en", 
                      help="The language to scrape books in - either 'en' for english or 'de' for german")
  parser.add_argument("--match-language", action="store_true", default=False, 
                      help="Skip scraping books if not in the requested language (not all book are avaible in german)")

  def check_cooldown(value):
    if int(value) < 1:
      raise argparse.ArgumentTypeError("Can't be smaller than 1")
    return int(value)

  parser.add_argument("--cooldown", type=check_cooldown, default=1, 
                      help="Seconds to wait between scraping books, and downloading audio files. Can't be smaller than 1")
  parser.add_argument("--headless", action="store_true", default=False, 
                      help="Start the automated web browser in headless mode. Works only if you already logged in once")
  parser.add_argument("--audio", action="store_true", default=True, 
                      help="Download the audio blinks for each book")
  parser.add_argument("--concat-audio", action="store_true", default=False, 
                      help="Concatenate the audio blinks into a single file and tag it. Requires ffmpeg")
  parser.add_argument("--keep-noncat", action="store_true", default=False,
                      help="Keep the individual blink audio files, instead of deleting them (works with '--concat-audio' only")
  parser.add_argument("--no-scrape", action="store_true", default=False, 
                      help="Don't scrape the website, only process existing json files in the dump folder. Do not provide email or password with this option.")
  parser.add_argument("--book", default=False, 
                      help="Scrapes this book only, takes the blinkist url for the book"
                      "(e.g. https://www.blinkist.com/en/books/... or https://www.blinkist.com/en/nc/reader/...)")
  parser.add_argument("--books", default=False, 
                      help="Scrapes the list of books, takes a txt file with the list of blinkist urls for the books"
                      "(e.g. https://www.blinkist.com/en/books/... or https://www.blinkist.com/en/nc/reader/...)")
  parser.add_argument("--book-category", default="Uncategorized", 
                      help="When scraping a single book, categorize it under this category (works with '--book' only)")
  parser.add_argument("--categories", type=str, nargs="+", default="", 
                      help=("Only the categories whose label contains at least one string here will be scraped."
                      "Case-insensitive; use spaces to separate categories. "
                      "(e.g. '--categories entrep market' will only scrape books under 'Entrepreneurship' and 'Marketing & Sales')"))
  parser.add_argument("--ignore-categories", type=str, nargs="+", default="", 
                      help=("If a category label contains anything in ignored_categories, books under that category will not be scraped. "
                            "Case-insensitive; use spaces to separate categories. "
                            "(e.g. '--ignored-categories entrep market' will skip scraping of 'Entrepreneurship' and 'Marketing & Sales')"))
  parser.add_argument("--create-html", action="store_true", default=True, 
                      help="Generate a formatted html document for the book")
  parser.add_argument("--create-epub", action="store_true", default=True, 
                      help="Generate a formatted epub document for the book")
  parser.add_argument("--create-pdf", action="store_true", default=False, 
                      help="Generate a formatted pdf document for the book. Requires wkhtmltopdf")
  parser.add_argument("--save-cover", action="store_true", default=False,
                      help="Save a copy of the Blink cover artwork in the folder")
  parser.add_argument("--embed-cover-art", action="store_true", default=False,
                      help="Embed the Blink cover artwork into the concatenated audio file (works with '--concat-audio' only)")
  parser.add_argument("--chromedriver", help='Path to a specific chromedriver executable instead of the built-in one')
  parser.add_argument("-v", "--verbose", action="store_true", help="Increases logging verbosity")

  if '--no-scrape' not in sys.argv:
    parser.add_argument("email", help="The email to log into your premium Blinkist account")
    parser.add_argument("password", help="The password to log into your premium Blinkist account")

  args = parser.parse_args()

  # set up logger
  log.setLevel(logging.INFO if not args.verbose else logging.DEBUG)
  log_screen_handler = logging.StreamHandler(stream=sys.stdout)
  log.addHandler(log_screen_handler)
  log.propagate = False
  try:
    import colorama, copy

    LOG_COLORS = {
      logging.DEBUG: colorama.Fore.GREEN,
      logging.INFO: colorama.Fore.BLUE,
      logging.WARNING: colorama.Fore.YELLOW,
      logging.ERROR: colorama.Fore.RED,
      logging.CRITICAL: colorama.Back.RED
    }

    class ColorFormatter(logging.Formatter):
      def format(self, record, *args, **kwargs):
        # if the corresponding logger has children, they may receive modified
        # record, so we want to keep it intact
        new_record = copy.copy(record)
        if new_record.levelno in LOG_COLORS:
          new_record.levelname = "{color_begin}{level}{color_end}".format(
              level=new_record.levelname,
              color_begin=LOG_COLORS[new_record.levelno],
              color_end=colorama.Style.RESET_ALL,
          )
        return super(ColorFormatter, self).format(new_record, *args, **kwargs)

    log_screen_handler.setFormatter(ColorFormatter(fmt='%(asctime)s %(levelname)-8s %(message)s', 
      datefmt="{color_begin}[%H:%M:%S]{color_end}".format(
        color_begin=colorama.Style.DIM,
        color_end=colorama.Style.RESET_ALL
      )))
  except ModuleNotFoundError as identifier:
    pass

  def generate_book_outputs(book_json, cover_img=False):
    if (args.create_html):
      generator.generate_book_html(book_json, cover_img)
    if (args.create_epub):
      generator.generate_book_epub(book_json)
    if (args.create_pdf):
      generator.generate_book_pdf(book_json, cover_img)

  def scrape_book(driver, processed_books, book_url, category, match_language):
    book_json, dump_exists = scraper.scrape_book_data(driver, book_url, category=category, match_language=match_language)
    if (book_json):
      cover_img_file = False
      cover_tmp_file = False
      if (args.audio):
        audio_files = scraped_audio_exists(book_json)
        if (not audio_files):
          audio_files = scraper.scrape_book_audio(driver, book_json, args.language)
        if (audio_files and args.concat_audio):
          if (type(audio_files) == list):
            if (args.embed_cover_art):
              cover_tmp_file = scraper.download_book_cover_image(book_json, filename='_cover.jpg',  alt_file='cover.jpg')
            generator.combine_audio(book_json, audio_files, args.keep_noncat, cover_tmp_file)
      if (args.save_cover):
        cover_img_file = scraper.download_book_cover_image(book_json, filename='cover.jpg',  alt_file='_cover.jpg')
        generate_book_outputs(book_json, cover_img='cover.jpg')
      else:
        generate_book_outputs(book_json)
      if cover_tmp_file:
        if (os.path.exists(cover_tmp_file)):
          log.debug(f'Deleting {cover_tmp_file}')
          os.remove(cover_tmp_file)
        else:
          log.debug(f'Could not find "{cover_tmp_file}"')
      processed_books += 1
    return dump_exists

  def finish(start_time, processed_books, driver = None):
    if (driver):
      driver.close()
    elapsed_time = time.time() - start_time
    formatted_time = '{:02d}:{:02d}:{:02d}'.format(int(elapsed_time // 3600), int(elapsed_time % 3600 // 60), int(elapsed_time % 60))
    log.info(f"Processed {processed_books} books in {formatted_time}")

  # start scraping
  log.info('Starting scrape run...')
  processed_books = 0
  start_time = time.time()

  if (args.no_scrape):
    # if the --no-scrape argument is passed, just process the existing json dump files
    for file in glob.glob(os.path.join(os.getcwd(), "dump", "*.json")):
      generate_book_outputs(file)
      processed_books += 1
    finish(start_time, processed_books)
  else:
    match_language = args.language if args.match_language else ""
    # if no login cookies were found, don't start a headless browser
    # so that the user can solve recaptcha and log in
    start_headless = args.headless
    if not scraper.has_login_cookies():
      start_headless = False
    # add uBlock (if the conditions are right)
    use_ublock = not (args.book or args.headless)
    driver = scraper.initialize_driver(
      headless=start_headless, 
      with_ublock=use_ublock, 
      chromedriver_path=args.chromedriver)

    is_logged_in = scraper.login(driver, args.language, args.email, args.password)
    if (is_logged_in):
      if (args.book):
        # scrape single book
        scrape_book(
          driver, processed_books, args.book, category={ "label" : args.book_category}, match_language=match_language)     
      elif (args.books):
        # scrape list of books
        with open(args.books, 'r') as books_urls:
          for book_url in books_urls.readlines():
            dump_exists = scrape_book(
              driver, processed_books, book_url.strip(), category={ "label" : args.book_category}, match_language=match_language)
            if not dump_exists:           
              time.sleep(args.cooldown)
      else:
        # scrape all books
        categories = scraper.get_categories(
          driver, args.language, 
          specified_categories=args.categories, 
          ignored_categories=args.ignore_categories)
        for category in categories:
          books_urls = scraper.get_all_books_for_categories(driver, category)
          for book_url in books_urls:
            dump_exists = scrape_book(driver, processed_books, book_url, category=category, match_language=match_language)
            # if we processed the book from an existing dump
            # no scraping was involved, no need to cooldown
            if not dump_exists:
              time.sleep(args.cooldown)
    else:
      log.error("Unable to login into Blinkist")   
    finish(start_time, processed_books, driver)