Пример #1
0
    def download_ncodes_image_net(self, path_to_save=None):
        """
        This function is used to download ncodes for image-net dataset and restructuring of the same in an csv file.
        Args:
            path_to_save {str}: folder path where the ncodes csv is to be saved.
        Returns {str}: absolute path to ncodes csv.
        """
        # parameter initialization
        ncodes = pd.DataFrame([])
        path_to_save = os.path.realpath(os.path.dirname(__file__)) if path_to_save==None else path_to_save
        ncodes_path = os.path.join(path_to_save, "ncodes.csv")

        if not os.path.exists(ncodes_path):
            print("\nDownloading imagenet ncodes...")
            # download ncodes data
            with anchor(self.NCODES_DATA_URL) as response:
                html = response.read()
                soup = BeautifulSoup(html, features="lxml")
                for code_num, link in enumerate(tqdm(soup.findAll('a'))):
                    code = (link.get('href').split("wnid=")[-1])
                    values = link.contents[0]
                    ncodes.loc[code_num, "code"] = code
                    ncodes.loc[code_num, "name"] = values
            # add relevant columns to ncode data
            ncodes["to_download"] = False
            ncodes["how_many"] = -1
            ncodes.to_csv(ncodes_path, index=False) # save ncode data
            print("Download complete !\n")
        else:
            print("\nncodes data already present in the  mentioned folder")
        return ncodes_path
Пример #2
0
    def get_an_image(self, url_data, folder_path, queue_, max_time=7):
        """
        This function is used to download an image from its corresponding url.
        Args:
            url_data {http-link}: url for the image to be downloaded.
            folder_path {str}: folder where the downloded image will be saved.
            queue_{multiprocessing queue}: multiprocessing queue to push the result after asynchronous

        Returns: None
        """
        # initializing variables
        url = url_data[0]
        image_code = url_data[1]
        _, ext = os.path.splitext(url)
        ext = ".jpg" if ext.lower() not in self.IMAGE_FORMATS else ext

        # starting the image download
        download_start_time = time.time()
        try:
            # opening from the url
            with anchor(url, timeout=max_time) as request:
                with open(os.path.join(folder_path, "Images", image_code + ext), 'wb') as f:
                    try:
                        # writing the image to the disk
                        f.write(request.read())
                    except Exception as e:
                        #saving error
                        req_time = round(time.time() - download_start_time,1)
                        download_json = self.create_report_json(
                            url=url,
                            image_code=image_code,
                            status="SaveError",
                            spent_time=req_time,
                            error_description=str(e)
                        )
                        queue_.put(download_json)
                        return
            # download success
            req_time = round(time.time() - download_start_time, 1)
            download_json = self.create_report_json(
                url=url,
                image_code=image_code,
                status="done",
                spent_time=req_time,
                error_description="Success"
            )
            queue_.put(download_json)

        except Exception as e:
            # save data if error during download process
            req_time = round(time.time() - download_start_time, 1)
            download_json = self.create_report_json(
                url=url,
                image_code=image_code,
                status="Error",
                spent_time=req_time,
                error_description=str(e)
            )
            queue_.put(download_json)
Пример #3
0
    def download_speed(self):
        """
        This function is used to calculate network download speed
        Returns {float}: download speed of the connected internet network
        """
        try:
            # parameter initialization
            URL = "http://speedtest.ftp.otenet.gr/files/test1Mb.db"
            FILE_SIZE = 1048.576

            # downlaod a file to test download speed
            start = time.time()
            file = anchor(URL, timeout=7)
            file.read()
            end = time.time()
            time_difference = end - start
            return round(FILE_SIZE / time_difference)
        except:
            return False
Пример #4
0
    def ncode_image_download_sequentially(self,
                                          ncode_data,
                                          folder_path,
                                          verbose=True,
                                          max_time=7):
        """
        This function is used to sequentially download images for a particular ncode(object category).
        Args:
            ncode_data {pandas-dataframe}: image-net image urls data for the particular ncode.
            folder_path {str}: absolute path where downloaded images are to be saved.
            verbose {bool}: bool represent wheather to show stats for this ncode(object category) data download.
            max_time {int}: maximum download time that is permitted for an image url.

        Returns:
            ncode_report {pandas-dataframe}: data containing comprehensive summary about every download.
        """
        # variable intialization
        stats_report = list()
        ncode_report = list()

        # initiating sequential image download
        for image_num, url in enumerate(
                tqdm(ncode_data["img_url"], file=sys.stdout)):
            image_code = ncode_data.loc[image_num, "img_code"]

            _, ext = os.path.splitext(url)
            ext = ".jpg" if ext.lower() not in self.IMAGE_FORMATS else ext

            # check if image already present
            if os.path.exists(
                    os.path.join(folder_path, "Images", image_code + ext)):
                download_json = self.create_report_json(
                    url=url,
                    image_code=image_code + ext,
                    status="done",
                    spent_time="-",
                    error_description="Already present")
                stats_report.append("done")
                ncode_report.append(download_json)
                continue

            # starting the image download
            download_start_time = time.time()
            try:
                # opening from the url
                with anchor(url, timeout=max_time) as request:
                    with open(
                            os.path.join(folder_path, "Images",
                                         image_code + ext), 'wb') as f:
                        try:
                            # writing the image to the disk
                            f.write(request.read())
                        except Exception as e:
                            # saving error
                            req_time = round(time.time() - download_start_time,
                                             1)
                            download_json = self.create_report_json(
                                url=url,
                                image_code=image_code + ext,
                                status="SaveError",
                                spent_time=req_time,
                                error_description=str(e))
                            stats_report.append(download_json["status"])
                            ncode_report.append(download_json)
                            continue

                # download success
                req_time = round(time.time() - download_start_time, 1)
                download_json = self.create_report_json(
                    url=url,
                    image_code=image_code + ext,
                    status="done",
                    spent_time=req_time,
                    error_description="Success")
            except Exception as e:
                # save data if error during download process
                req_time = round(time.time() - download_start_time, 1)
                download_json = self.create_report_json(
                    url=url,
                    image_code=image_code + ext,
                    status="Error",
                    spent_time=req_time,
                    error_description=str(e))

            stats_report.append(download_json["status"])
            ncode_report.append(download_json)

        # stats frequency distribution
        stats_report_json = {
            x: stats_report.count(x)
            for x in set(stats_report)
        }
        _, category_name = os.path.split(folder_path)

        # stats reporting
        print()
        total = sum(stats_report_json.values())
        if verbose:
            if "done" in stats_report_json.keys():
                print("Success/Present : {}/{}".format(
                    stats_report_json["done"], total))
        self.report['total'].append(total)

        if "Error" in stats_report_json.keys():
            http_error = stats_report_json["Error"]
            if verbose:
                print("HTTP/URL/No-image errors : {}/{}".format(
                    http_error, total))
            self.report['error'].append(http_error)

        if "SaveError" in stats_report_json.keys():
            save_error = stats_report_json["SaveError"]
            if verbose:
                print("Writing errors : {}/{}\n".format(save_error, total))
            self.report['save_error'].append(save_error)

        return ncode_report