Exemplo n.º 1
0
    def download(self, media, api):
        return_bool = True
        if not overwrite_files and media.downloaded:
            return
        count = 0
        sessions = [x for x in api.sessions if media.link in x.links]
        if not sessions:
            return
        session = sessions[0]
        while count < 11:
            links = [media.link]

            def choose_link(session, links):
                for link in links:
                    r = api.json_request(link, session, "HEAD",
                                         stream=True, json_format=False)
                    if not isinstance(r, requests.Response):
                        continue

                    header = r.headers
                    content_length = header.get('content-length')
                    if not content_length:
                        continue
                    content_length = int(content_length)
                    return [link, content_length]
            result = choose_link(session, links)
            if not result:
                count += 1
                continue
            link = result[0]
            content_length = result[1]
            media.size = content_length
            date_object = media.created_at
            download_path = os.path.join(
                media.directory, media.filename)
            timestamp = date_object.timestamp()
            if not overwrite_files:
                if main_helper.check_for_dupe_file(download_path, content_length):
                    main_helper.format_image(download_path, timestamp)
                    return_bool = False
                    media.downloaded = True
                    break
            r = api.json_request(
                link, session, stream=True, json_format=False)
            if not isinstance(r, requests.Response):
                return_bool = False
                count += 1
                continue
            downloader = main_helper.downloader(r, download_path, count)
            if not downloader:
                count += 1
                continue
            main_helper.format_image(download_path, timestamp)
            link_string = f"Link: {link}"
            path_string = f"Link: {download_path}"
            print(link_string)
            print(path_string)
            media.downloaded = True
            break
        return return_bool
Exemplo n.º 2
0
def profile_scraper(api, directory, username):
    y = api.get_subscription(username)
    q = []
    avatar = y.avatar
    header = y.header
    if avatar:
        q.append(["Avatars", avatar])
    if header:
        q.append(["Headers", header])
    for x in q:
        new_dict = dict()
        media_type = x[0]
        media_link = x[1]
        new_dict["links"] = [media_link]
        directory2 = os.path.join(directory, username, "Profile", media_type)
        os.makedirs(directory2, exist_ok=True)
        download_path = os.path.join(
            directory2, media_link.split("/")[-2]+".jpg")
        if not overwrite_files:
            if os.path.isfile(download_path):
                continue
        session = api.sessions[0]
        r = api.json_request(media_link, session, stream=True,
                             json_format=False, sleep=False)
        if not isinstance(r, requests.Response):
            continue
        while True:
            downloader = main_helper.downloader(r, download_path)
            if not downloader:
                continue
            break
Exemplo n.º 3
0
    def download(medias):
        return_bool = True
        for media in medias:
            count = 0
            session = media["session"]
            while count < 11:
                links = media["links"]

                def choose_link(session, links):
                    for link in links:
                        r = main_helper.json_request(session,
                                                     link,
                                                     "HEAD",
                                                     stream=True,
                                                     json_format=False)
                        if not isinstance(r, requests.Response):
                            continue

                        header = r.headers
                        content_length = header.get('content-length')
                        if not content_length:
                            continue
                        content_length = int(content_length)
                        return [link, content_length]

                result = choose_link(session, links)
                if not result:
                    count += 1
                    continue
                link = result[0]
                content_length = result[1]
                date_object = datetime.strptime(media["postedAt"],
                                                "%d-%m-%Y %H:%M:%S")
                download_path = os.path.join(media["directory"],
                                             media["filename"])
                timestamp = date_object.timestamp()
                if not overwrite_files:
                    if main_helper.check_for_dupe_file(download_path,
                                                       content_length):
                        main_helper.format_image(download_path, timestamp)
                        return_bool = False
                        break
                r = main_helper.json_request(session,
                                             link,
                                             stream=True,
                                             json_format=False)
                if not isinstance(r, requests.Response):
                    return_bool = False
                    count += 1
                    continue
                downloader = main_helper.downloader(r, download_path, count)
                if not downloader:
                    count += 1
                    continue
                main_helper.format_image(download_path, timestamp)
                log_download.info("Link: {}".format(link))
                log_download.info("Path: {}".format(download_path))
                break
        return return_bool
Exemplo n.º 4
0
    def download(self, post: format_content.post_item, api):
        return_bool = True
        for media in post.medias:
            if not overwrite_files and media.downloaded:
                continue
            count = 0
            session = media.session
            if not session:
                continue
            while count < 11:
                links = media.links

                def choose_link(session, links):
                    for link in links:
                        r = api.json_request(link, session, "HEAD",
                                             stream=True, json_format=False)
                        if not isinstance(r, requests.Response):
                            continue

                        header = r.headers
                        content_length = header.get('content-length')
                        if not content_length:
                            continue
                        content_length = int(content_length)
                        return [link, content_length]
                result = choose_link(session, links)
                if not result:
                    count += 1
                    continue
                link = result[0]
                content_length = result[1]
                media.size = content_length
                date_object = datetime.strptime(
                    post.postedAt, "%d-%m-%Y %H:%M:%S")
                download_path = os.path.join(
                    media.directory, media.filename)
                timestamp = date_object.timestamp()
                if not overwrite_files:
                    if main_helper.check_for_dupe_file(download_path, content_length):
                        main_helper.format_image(download_path, timestamp)
                        return_bool = False
                        media.downloaded = True
                        break
                r = api.json_request(
                    link, session, stream=True, json_format=False)
                if not isinstance(r, requests.Response):
                    return_bool = False
                    count += 1
                    continue
                downloader = main_helper.downloader(r, download_path, count)
                if not downloader:
                    count += 1
                    continue
                main_helper.format_image(download_path, timestamp)
                log_download.info("Link: {}".format(link))
                log_download.info("Path: {}".format(download_path))
                media.downloaded = True
                break
        return return_bool
Exemplo n.º 5
0
def profile_scraper(api: start, site_name, api_type, username, text_length,
                    base_directory):
    reformats = {}
    reformats["metadata_directory_format"] = json_settings[
        "metadata_directory_format"]
    reformats["file_directory_format"] = json_settings["file_directory_format"]
    reformats["file_directory_format"] = reformats[
        "file_directory_format"].replace("{value}", "")
    reformats["filename_format"] = json_settings["filename_format"]
    option = {}
    option["site_name"] = site_name
    option["api_type"] = api_type
    option["username"] = username
    option["date_format"] = date_format
    option["maximum_length"] = text_length
    option["directory"] = base_directory
    a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats)
    print
    y = api.get_subscription(identifier=username)
    override_media_types = []
    avatar = y.avatar
    header = y.header
    if avatar:
        override_media_types.append(["Avatars", avatar])
    if header:
        override_media_types.append(["Headers", header])
    for override_media_type in override_media_types:
        new_dict = dict()
        media_type = override_media_type[0]
        media_link = override_media_type[1]
        new_dict["links"] = [media_link]
        directory2 = os.path.join(b, media_type)
        os.makedirs(directory2, exist_ok=True)
        download_path = os.path.join(directory2,
                                     media_link.split("/")[-2] + ".jpg")
        if not overwrite_files:
            if os.path.isfile(download_path):
                continue
        session = api.sessions[0]
        r = api.json_request(media_link,
                             session,
                             stream=True,
                             json_format=False,
                             sleep=False)
        if not isinstance(r, requests.Response):
            continue
        while True:
            downloader = main_helper.downloader(r, download_path)
            if not downloader:
                continue
            break