示例#1
0
def check(url, base_url, author):
    resolution = cfg.read_attempts()
    try:
        url = url_format(url, resolution)
    except IndexError as error:
        return test_link(base_url if base_url else url,
                         author=author,
                         resolution=resolution,
                         error=error)
    try:
        filename = media_files.rename(
            url.split("?name=")[1].split("&token=ip=")[0] + ".crdownload")
    except IndexError:
        filename = False
        error = "This link does not go to a supported site!"
    try:
        request = req.get(url,
                          headers=headers,
                          stream=True,
                          timeout=(cfg.timeout / 2, cfg.timeout))
    except (req.exceptions.ConnectionError, req.exceptions.InvalidURL,
            req.exceptions.ReadTimeout):
        print("DEBUG: error")
        error = f"Connection error with {media.format_title(filename)}."
        filename = False
    if not filename:
        print(error)
        log(error)
        return False
    return filename, request, resolution
示例#2
0
    def search(self, url, media_type=0):
        if media_type == 0:  # Movie (HD)
            element_class = "item_hd"
            description_class = "_smQamBQsETb"
        elif media_type == 1:  # Movie (CAM)
            element_class = "item_cam"
            description_class = "_smQamBQsETb"
        elif media_type >= 2:  # TV Show
            element_class = "item_series"
            description_class = "_skQummZWZxE"
        self.open_link(url)
        results, descriptions = self.get_results_from_search(
            element_class=element_class, decription_class=description_class)

        if not results:
            if media_type >= 2:  # TV Show
                raise NoResults
            media_type += 1
            return self.search(url, media_type=media_type)

        if media_type == 1:
            log("**INFO:** Film is in CAM quality.", silent=False)
        if not descriptions:  # this is the same as "if results and not descriptions:"
            description_class = "_smQamBQsETb"
            results, descriptions = self.get_results_from_search(
                element_class=element_class,
                decription_class=description_class)

        metadata = {}
        result_index = 1
        media_type_index = 1
        for description in descriptions:
            result_index += 1
            if description.get_attribute("data-filmname") != description.text:
                continue

            poster_url = self.get_movie_poster_url(result_index,
                                                   media_type_index)

            metadata[description.text.replace(":", "")] = {
                "data-filmname":
                description.get_attribute("data-filmname").replace(":", ""),
                "data-year":
                description.get_attribute("data-year"),
                "data-imdb":
                description.get_attribute("data-imdb").split(": ")[1],
                "data-duration":
                description.get_attribute("data-duration"),
                "data-country":
                description.get_attribute("data-country"),
                "data-genre":
                description.get_attribute("data-genre"),
                "data-descript":
                description.get_attribute("data-descript"),
                "img":
                poster_url
            }

        return results, metadata
示例#3
0
def file_size(filename, count, start_time=None, target_size=None):
    size = media.size(filename)
    size_MB = round(size / 1024 / 1024, 2)
    if ((count + 1) % 25 == 0 or count == 3) and start_time and target_size:
        filename = media.format_title(filename)
        remaining_size = target_size - size
        speed = size / (time.time() - start_time)
        speed_MB = round(speed * 8 / (1024 * 1024), 2)
        ETA = time.strftime("%Hh %Mm %Ss", time.gmtime(remaining_size / speed))
        size_MB, target_size = int(size_MB), int(target_size / 1024 / 1024)
        msg = f"Downloading {filename} at ~{speed_MB} Mbps, ETA: {ETA} ({size_MB}/{target_size} MB)."
        log(msg, silent=False)
    return size
示例#4
0
def check_for_captcha_solve(timeout=100):
    if __name__ == "__main__":
        media.write_file("captcha.txt", input("Solve the captcha:\n> "))

    filename = "captcha.txt"
    for _ in range(timeout * 2):
        time.sleep(0.5)
        if os.path.isfile(filename):
            solved_captcha = media.read_file(filename)[0]
            media.remove_file(filename)
            return solved_captcha
    log(f"Captcha was not solved withing {timeout} seconds.\nAborting download.",
        silent=False)
    return False
示例#5
0
    def run(self, resolution_override=None):
        # Function should return True when the download is complete and False if it perminantly failed
        self.url, request, resolution = self.best_quality(self.url)
        if self.url is False:
            return False

        filmname = self.metadata["data-filmname"]
        year = self.metadata["data-year"]
        if "Season" in filmname and "Episode" in filmname:
            print("Media is detected as TV Show.")
            show_title = filmname.split(" - ")[0]
            season = filmname.split(" - Season ")[1].split(
                " Episode")[0].split(" [")[0]
            season = season if len(season) >= 2 else "0" + season
            episode = filmname.split(" Episode ")[1].split(": ")[0]
            try:
                episode_title = filmname.split(": ")[(1 if " [" not in filmname
                                                      else 2)]
                # filename =      f"{show_title} - s{season}ep{episode} - {episode_title}"
                filename = f"{show_title} - s{season}ep{episode}"
            except IndexError:
                filename = f"{show_title} - s{season}ep{episode}"
            absolute_path = os.path.abspath(
                f"../TV SHOWS/{show_title}/Season {season}/{filename}.crdownload"
            )
        else:
            print("Media is detected as Movie/Film.")
            filename = (f"{filmname} ({year})"
                        if filmname[-1] != ")" else filmname)
            absolute_path = os.path.abspath(
                f"../MOVIES/{filename}/{filename}.crdownload")
        stream = Stream(
            request,
            absolute_path,
            (resolution_override if resolution_override else resolution),
        )
        stream.stream()
        filename = filename.replace(".crdownload", ".mp4")
        file_size = round(
            int(request.headers.get("content-length", 0)) / 1024 / 1024, 2)
        media.credit(self.author,
                     filename=filename,
                     resolution=resolution,
                     file_size=file_size)
        log(f"Finished download of {filename} in {resolution}p ({file_size} MB).",
            silent=False)

        return True
示例#6
0
 def search(self, query):
     search_arg = "%20".join(query.split())
     self.driver.get(f"https://gomovies-online.cam/search/{search_arg}")
     try:
         self.click(
             "/html/body/main/div/div/section/div[1]/div/movies[1]/div/div/div/div/a"
         )
     except NoSuchElementException:
         # no results
         error = f"Search for {query} yielded no results."
         print(error)
         log(error)
         self.driver.quit()
         return False
     url = self.driver.current_url + "-online-for-free.html"
     self.driver.get(url)
     return self.run()
示例#7
0
    def check_captcha(self):
        # "Myles" - Myles
        # "Liam" - Liam
        try:
            captcha_image = self.wait_until_element(
                By.XPATH,
                "//*[@id=\"checkcapchamodelyii-captcha-image\"]",
                timeout=1.5)
            captcha_input = self.driver.find_element(
                By.XPATH, "//*[@id=\"checkcapchamodelyii-captcha\"]")
            captcha_submit = self.driver.find_element(
                By.XPATH, "//*[@id=\"player-captcha\"]/div[3]/div/div")
        except TimeoutException:
            return None, None, None
        if captcha_image:
            print("DEBUG: Captcha!")
            log("Captcha! Solve using the command:\n```beta solve <captcha_solution>```"
                )

        return captcha_image, captcha_input, captcha_submit
示例#8
0
 def run(self, xpath="//*[@id=\"_skqeqEJBSrS\"]/div[2]/video", attr="src"):
     print("WEB SCRAPING")
     log("Waiting on web scraper (up to 35 seconds).")
     self.check_captcha()
     print("DEBUG: Finished check_captcha function")
     # self.run()
     try:
         element = WebDriverWait(self.driver, 30).until(
             EC.visibility_of_element_located((By.XPATH, xpath)))
         element = self.driver.find_element_by_xpath(xpath)
         data = element.get_attribute(attr)
         while len(data) < 100:
             print("DEBUG: No data!")
             self.driver.refresh()
             data = self.run(xpath, attr)
         self.driver.quit()
         return data
     except TimeoutException:
         print("DEBUG: Link invalid, scraping failed.")
         return False
示例#9
0
def test_link(url,
              author,
              start_time=0,
              resolution=0,
              filename=False,
              error=False):
    if filename: filename = media.format_title(filename)
    if ((time() - start_time) < 10) or error:
        if int(resolution) >= len(quality) - 1:
            error = f"Failed download of {filename if filename else url}, link is invalid."
            print(error)
            log(error)
            cfg.reset_attempts()
            return False
        cfg.increment_attempts()
        # print("FAILED (lowering quality)")
        download(url, author=author)
        return False
    error = f"Failed download of {filename if filename else url}.\nRestarting download..."
    print(error)
    log(error)
    download(url, author=author)
    return False
示例#10
0
    def check_captcha(self,
                      xpath="//*[@id=\"checkcapchamodelyii-captcha-image\"]",
                      attr="src"):
        try:
            captcha_element = WebDriverWait(self.driver, 5).until(
                EC.visibility_of_element_located((By.XPATH, xpath)))
            captcha_element = self.driver.find_element_by_xpath(xpath)
            captcha = captcha_element.get_attribute(attr)
            filename = self.screenshot_captcha(captcha_element)
            print(f"DEBUG: Captcha, {captcha}")
        except TimeoutException:
            print("DEBUG: No captcha")
            captcha = False
        if captcha:
            if __name__ != "__main__":
                log("Captcha! Please solve using the command: ```!solve <captcha_solution>```\nREMIND IAN TO FIX THIS --> please don't mess up or the download will fail."
                    )
                log(f"--file={filename}")
                filename = "solved_captcha.txt"
                solved_captcha = False
                while not solved_captcha and (time() - self.start_time) < 60:
                    sleep(1)
                    # print(f"DEBUG: Checking for {filename}")
                    if os.path.isfile(filename):
                        solved_captcha = media.read_file(filename)[0]
                        media.remove_file(filename)
                        print(
                            f"DEBUG: Solved captcha, {solved_captcha}, {not solved_captcha}/{(time() - self.start_time) < 60}"
                        )
            else:
                solved_captcha = input("Enter the solved captcha:\n> ")
            if solved_captcha:
                self.solve_captcha(solved_captcha)
                self.submit_captcha()
                # self.run()

        return captcha
示例#11
0
 def write(self):
     self.verify_path()
     with open(self.filename, "wb") as file:
         title = self.filename.split(".")[0].split("/")[-1:][0]
         size_MB = round(self.target_size / 1024 / 1024, 2)
         start_time = time.time()
         msg = f"Downloading {title} in {self.resolution}p ({size_MB} MB)..."
         log(msg, silent=False)
         try:
             for count, chunk in enumerate(
                     self.request.iter_content(chunk_size=self.chunk_size)):
                 file.write(chunk)
                 progress.file_size(self.filename,
                                    count,
                                    start_time,
                                    target_size=self.target_size)
         # except ConnectionResetError as e:
         except Exception as e:
             log(f"ERROR with {title}: Connection Reset!\nRetrying download..."
                 )
             log(str(e))
             self.write()
示例#12
0
    def best_quality(self, url):
        if not url:
            log("ERROR: No URL! Maybe there were no search results?",
                silent=False)
            return False, None, None
        if not isinstance(url, str):
            url = url.get_attribute("src")

        valid_resolutions = []
        for target_res in resolution_list:  # TODO: The proccess of checking every resolution's status code takes too long (fix me)
            valid_resolution, request = validate_url(url, target_res)
            valid_resolutions.append(valid_resolution)
            if valid_resolutions[-1] == 200:
                url = url_format(url, target_res)
                break
            if valid_resolutions[-1] == 403:
                filmname = self.metadata["data-filmname"]
                log(f"ERROR: Link expired while scraping \"{filmname}\".")
                return False, None, None
        if 200 not in valid_resolutions:
            log(f"ERROR: Status code {valid_resolutions[-1]}.")
            return False, None, None
        return url, request, target_res
示例#13
0
                     filename=filename,
                     resolution=resolution,
                     file_size=file_size)
        log(f"Finished download of {filename} in {resolution}p ({file_size} MB).",
            silent=False)

        return True


if __name__ == "__main__":

    def run_download(url, metadata, author):
        download_function = Download(url, metadata, author)
        threaded_download = Thread(target=download_function.run)
        threaded_download.start()

    scraper = Scraper(minimize=False)
    search = input("Enter a Title to search for:\n> ")

    while search:
        download_queue = scraper.download_first_from_search(search)
        if download_queue:
            for data in download_queue:
                if None in data:
                    log("No results!", silent=False)

                run_download(data[0], data[1][list(data[1])[0]], data[2])
                search = input("Enter a Title to search for:\n> ")
        else:
            log("No results!", silent=False)
示例#14
0
def download(url, base_url, author):
    global start_time

    data = check(url, base_url, author=author) if url else url
    print(data)
    if not data:
        error = "Scraping failed. Link is invalid or captcha was not solved."
        print(error)
        log(error)
        return False
    filename, request, resolution = data
    # msg = f"Atempting download in {quality[int(resolution)]}p..."
    # print(msg, end=" ", flush=True)
    # log(msg)
    target_size = request.headers.get("content-length", 0)
    rounded_target_size = round(int(target_size) / 1024 / 1024, 2)
    absolute_path = f"{media_files.path}/{filename}"
    make_directory()

    start_time = time()
    try:
        stream.download_file(request,
                             absolute_path,
                             resolution,
                             start_time=start_time)
    except (ConnectionError, ConnectionResetError, ChunkedEncodingError,
            SSLError):
        log(f"Connection error while downloading {media.format_title(filename)}.\nRestarting download..."
            )
        download(base_url if base_url else url, author=author)
        return False
    except req.exceptions.HTTPError as error:
        return test_link(base_url if base_url else url,
                         author=author,
                         error=error)
    file_size = round(size(absolute_path) / 1024 / 1024, 2)
    if file_size == 0:
        return test_link(base_url if base_url else url, start_time, resolution)
    with open(absolute_path, "r") as file:
        try:
            for count, line in enumerate(file):
                if count > 20: break
                if "403 Forbidden" in line:
                    return test_link(base_url if base_url else url, start_time,
                                     resolution)
        except UnicodeDecodeError:
            pass
    cfg.reset_attempts()
    filename = media.format_title(filename)
    resolution = quality[int(resolution)]
    if file_size != rounded_target_size:
        msg = f"{file_size}/{rounded_target_size} MB"
        msg = f"Error while downloading {filename}, incomplete file ({msg}).\nRestarting download..."
        print(msg)
        log(msg)
        download(base_url if base_url else url, author=author)
        return False
    else:
        final_msg = f"Finished download of {filename} in {resolution}p ({file_size} MB)."
        media.credit(author,
                     filename=filename,
                     resolution=resolution,
                     file_size=file_size)
    complete = media.rename(absolute_path,
                            absolute_path.replace(".crdownload", ".mp4"))
    absolute_path = absolute_path.replace(".crdownload", ".mp4")
    if not complete:
        final_msg = f"Error while finishing {filename}, that file already exists.\nCould not complete."
    print(final_msg)
    log(final_msg)

    return final_msg