Exemplo n.º 1
0
def downloadZipFromLink(dowLink, albumName):
    zipName = albumName.replace(" ", "") + ".zip"
    tempDir = tempfile.gettempdir();
    zipPath = os.path.join(tempDir, zipName)
    
    if (os.path.exists(zipPath)):
        print("Deleting zip which already exists at location '{}'".format(zipPath));
        os.remove(zipPath); 
    
    print("Downloading '{}' from '{}' to file '{}'".format(albumName, dowLink, zipPath))
    urlopener = URLopener();
    def reporthook(blocknum, blocksize, totalsize):
        readsofar = blocknum * blocksize
        if totalsize > 0:
            percent = readsofar * 1e2 / totalsize
            s = "\r%5.1f%% %*d / %d" % (
                percent, len(str(totalsize)), readsofar, totalsize)
            sys.stderr.write(s)
            if readsofar >= totalsize:  # near the end
                sys.stderr.write("\n")
        else:  # total size is unknown
            sys.stderr.write("read %d\n" % (readsofar,))  
    try:        
        urlopener.retrieve(dowLink, zipPath, reporthook)
    except:
        if (os.path.exists(zipPath)):
            print("\nDeleting archive file '{}'".format(zipPath));
            os.remove(zipPath); 
        raise;
    
    print("Download of '{}' has completed.".format(albumName))
    return zipPath;
Exemplo n.º 2
0
    def _verify_and_download(self):
        """check if file is where it should and download if not"""
        if path.isfile(self._path):
            return
        # File does not exist, so we have to download it.
        epic_id = int(self.epic_id)
        d1 = epic_id - epic_id % 100000
        d2 = epic_id % 100000 - epic_id % 1000
        url_template = 'https://archive.stsci.edu/missions/k2/target_pixel_files/c{0:d}/{1:d}/{2:05d}/{3}'
        url_to_load = url_template.format(self.campaign, d1, d2,
                                          self.file_name)

        fmt = "Downloading {:} ..... "
        print(fmt.format(self.file_name), end='', file=sys.stderr, flush=True)
        url_retriever = URLopener()
        try:
            url_retriever.retrieve(url_to_load, self._path)
        except exceptions:
            print("", file=sys.stderr, flush=True)
            raise IOError(
                "\n\nFailed to download file {:}\n\n".format(url_to_load))
        if not path.isfile(self._path):
            print("", file=sys.stderr, flush=True)
            raise IOError('Download of\n' + url_to_load + '\nto\n' +
                          self._path + 'somehow failed')
        print(" done", file=sys.stderr, flush=True)
Exemplo n.º 3
0
 def download_file(self):
     """
     this function will visit a url for a specific location, enter the date
     and save the file to a specdified directory
     # http://penteli.meteo.gr/meteosearch/data/aghiosnikolaos/2009-11.txt
     """
     for station in self.stations['stations'][:]:
         try:
             # os.mkdir('./data/' + station)
             os.mkdir(os.path.join(os.getcwd(), data_folder) + '/' + station)  # messy!!!
         except:
             # add logging and fix exceptions too broad
             print('directory: {0} all ready exists!!!'.format(station))
             pass
         testfile = URLopener()
         os.chdir(data_folder + '/' + station)
         for i, date in enumerate(self.dates_to_download):
             name_to_save_file = os.getcwd() + '/' + station + '-' + date + '.txt'
             print(os.getcwd())
             try:
                 #  this is the complete url to visit and download its contents
                 url = url_seed + station + '/' + date + '.txt'
                 testfile.retrieve(url, name_to_save_file)
             except:
                 pass
         os.chdir(os.pardir)
         os.chdir(os.pardir)
Exemplo n.º 4
0
def download_data(admin_level, plz_regex_string, filename):
    bbox = '48.07303233901773,11.348190307617188,48.25028349849019,11.73614501953125'
    query = 'rel(' +bbox + ')[boundary=administrative][admin_level={}]; out geom;'.format(admin_level) + \
            'rel(' +bbox + ')[boundary=postal_code][postal_code~"{}"]; out geom;'.format(plz_regex_string)

    file = URLopener()
    file.retrieve('http://overpass-api.de/api/interpreter?data=' + quote_plus(query), filename)
def readTLEfile(source):
    ''' Read a TLE file (unzip if necessary) '''
    sourceName = source['name']
    sourceUrl = source['url']
    sourceFile = source['file']
    if os.path.isfile(sourceFile):
        print('Using saved TLE data {} ({})'.format(sourceFile,
              time.ctime(os.path.getmtime(sourceFile))))
    else:
        print('Retrieving TLE data from {}'.format(sourceUrl))
        file = URLopener()
        try:
            file.retrieve(sourceUrl, sourceFile)
        except:
            print("Error: Failed to get TLE data")
            return None
        else:
            print('{} updated'.format(sourceFile))

    if sourceFile.lower().endswith('.zip'):
        print('Unzipping {}...'.format(sourceFile))
        zip = zipfile.ZipFile(sourceFile)
        zip.extractall('.')
        sourceFile = zip.namelist()[0]
        print('Extracted {}'.format(zip.namelist()))

    tempContent = []
    with open(sourceFile) as f:
        for aline in f:
            tempContent.append(aline.replace('\n', ''))
        print(len(tempContent) // 3,
              'TLEs loaded from {}'.format(sourceFile))

    return tempContent
def readTLEfile(source):
    ''' Read a TLE file (unzip if necessary) '''
    sourceName = source['name']
    sourceUrl = source['url']
    sourceFile = source['file']
    if os.path.isfile(sourceFile):
        print('Using saved TLE data {} ({})'.format(
            sourceFile, time.ctime(os.path.getmtime(sourceFile))))
    else:
        print('Retrieving TLE data from {}'.format(sourceUrl))
        file = URLopener()
        try:
            file.retrieve(sourceUrl, sourceFile)
        except:
            print("Error: Failed to get TLE data")
            return None
        else:
            print('{} updated'.format(sourceFile))

    if sourceFile.lower().endswith('.zip'):
        print('Unzipping {}...'.format(sourceFile))
        zip = zipfile.ZipFile(sourceFile)
        zip.extractall('.')
        sourceFile = zip.namelist()[0]
        print('Extracted {}'.format(zip.namelist()))

    tempContent = []
    with open(sourceFile) as f:
        for aline in f:
            tempContent.append(aline.replace('\n', ''))
        print(len(tempContent) // 3, 'TLEs loaded from {}'.format(sourceFile))

    return tempContent
Exemplo n.º 7
0
def downloadAsset(uri, dirname):
    tUrl = uri
    o = urlparse(tUrl)
    contentType = ""
    # targetDir = os.path.join(CURRENT_DIRECTORY, dirname, '/'.join(o.path.split('/')[1:-1]))
    targetDir = CURRENT_DIRECTORY + '/' + dirname + '/' + '/'.join(
        o.path.split('/')[1:-1])

    # javascript, fragment의 경우 다운로드 불필요
    if o.scheme == "javascript" or (o.netloc == '' and o.path == ''):
        return

    if o.scheme == "":
        if uri.startswith("//"):
            tUrl = f"https:{uri}"
        else:
            tUrl = f"https://{uri}"

    try:
        contentType = getContentType(tUrl)
    except Exception:
        try:
            if uri.startswith('//'):
                tUrl = f"http:{uri}"
            else:
                tUrl = f"http://{uri}"
            contentType = getContentType(tUrl)
        except Exception:
            pass
            # raise Exception("Error during connection")
    else:
        # text/html 무시
        if contentType in mimeTypes[1:]:
            if not os.path.exists(targetDir):
                path = Path(targetDir)
                path.mkdir(parents=True)

            targetFile = targetDir + '/' + o.path.split('/')[-1]
            if not os.path.exists(targetFile):
                try:
                    urlretrieve(tUrl, targetFile)
                    print(f"[Retrieved] {targetFile}")
                except Exception:
                    try:
                        opener = URLopener()
                        opener.addheader('User-Agent', 'Mozilla/5.0')
                        filename, headers = opener.retrieve(tUrl, targetFile)
                    except Exception:
                        try:
                            tUrl = tUrl.replace('www.', '')
                            tUrl = tUrl.replace('http:', 'https:')
                            filename, headers = opener.retrieve(
                                tUrl, targetFile)
                        except Exception as e:
                            print(str(e))
                            raise Exception

        else:
            pass
Exemplo n.º 8
0
    def download(self, entity_id: int, destination: str = None, sort: List[Sort] = None) -> str:
        """
        Download sequences from a single entity.
        """

        sort = [Sort('id', 'asc')] if sort is None else sort
        sort = list(sort_item.to_json() for sort_item in sort) if sort else []
        body = {'filter': [], 'selection': [], 'sort': sort}
        file_path = Sequences.get_filepath_for_entity_id(entity_id)
        url = '{}/entities/{}/_extract'.format(self.url, entity_id)
        print('Downloading shards from "{}" to "{}".'.format(url, file_path))

        paths = []
        with self.session.post(url, stream=True, timeout=10 * 60, json=body) as response:
            try:
                links = response.json()
                print('links', links)
                if 'statusCode' in links and links['statusCode'] != 200:
                    raise Exception(links['message'])
                elif len(links) == 0:
                    raise Exception(
                        'Sequences:download - Error; no download links for {}. Does the table exist?'.format(entity_id))

                index = 0
                for link in links:
                    testfile = URLopener()
                    path = '{}-{}.gz'.format(file_path, index)
                    paths.append(path)
                    testfile.retrieve(link, path)
                    index = index + 1

            except Exception as e:
                print('Sequences:download - error:', e)
                raise e

        sorted_paths = self.get_sorted_file_shard_list(entity_id, paths, [])

        print(f'Unzipping: entity_id={entity_id} to destination={destination}')

        skip_first = False

        with open(destination, 'wb+') as target_file:
            for file_shard in sorted_paths:
                with gzip.open(file_shard, 'rb') as g_zip_file:
                    first_line = True
                    for line in g_zip_file:
                        # We skip the first line of every file, except for the very first.
                        if not (first_line and skip_first):
                            line = Sequences.sanitize(line.decode("utf-8"))
                            target_file.write(line.encode("utf-8"))
                        first_line = False
                # We skip the first line of every file, except for the very first.
                skip_first = True

        return destination
Exemplo n.º 9
0
def save_downloaded_file(context):
    """
    Saves POEditor terms to a file in output dir

    :param context: behave context
    :return: N/A
    """
    file_path = get_poeditor_file_path(context)
    saved_file = URLopener()
    saved_file.retrieve(context.poeditor_download_url, file_path)
    context.logger.info('POEditor terms have been saved in "%s" file' %
                        file_path)
Exemplo n.º 10
0
def download_text_file(url, file_name):
    opener = URLopener()
    file_name = file_name.split("/")[-1]
    file_name = file_name.replace("%20", " ")
    if _is_absolute_link(file_name):
        url = file_name
        if not url.startswith("http://"):
            url = "http://" + url
        out_name = file_name.split("/")[-1]
    else:
        url = "{}{}".format(url, file_name)
        out_name = file_name
    opener.retrieve(url, file_name)
    return out_name
Exemplo n.º 11
0
def _download_file(url, destination):
    logger.info('Downloading %s to %s...', url, destination)

    response = _open_url(url)

    if not response.code == 200:
        raise WagonError("Failed to download file. Request to {0} "
                         "failed with HTTP Error: {1}".format(
                             url, response.code))
    final_url = response.geturl()
    if final_url != url and is_verbose():
        logger.debug('Redirected to %s', final_url)
    f = URLopener()
    f.retrieve(final_url, destination)
Exemplo n.º 12
0
def download_data():
    """This function downloads the data, extract them and remove the archive."""
    if not os.path.exists(DATA_HOME):
        print("Data are missing. Downloading them now...", end="", flush=True)
        datafile = URLopener()
        datafile.retrieve(DOWNLOAD_URL, ARCHIVE_FNAME)
        print("Ok.")
        print("Extracting now...", end="", flush=True)
        tf = tarfile.open(ARCHIVE_FNAME)
        tf.extractall()
        print("Ok.")
        print("Removing the archive...", end="", flush=True)
        os.remove(ARCHIVE_FNAME)
        print("Ok.")
def downloadFile(linkStore):
    for imgUrl in linkStore:
        try:
            #removing double slash from the start of url
            imgUrl = urlEdit(imgUrl[2:])
            fileName = imgUrl.split("/")[-1]
            imgUrl = 'https://' + imgUrl
            print('Downloading file: ' + fileName + '\tURL: ' + imgUrl + '\n')
            image = URLopener()
            image.retrieve(imgUrl, fileName)
            # above line may create error due to 403 forbidden response
        except:
            print("Error occured while downloading file: " + imgUrl + '\n')
        continue
Exemplo n.º 14
0
def main():
    username = input("username: "******"password: "******"http://www.loxa.edu.tw/index.php")
    with urlopen(r) as response:
        phpsessid = response.getheader("set-cookie").split("; ")[0].split("=")[1]

    cookie = "PHPSESSID={0}; Cookie_Allow=1".format(phpsessid)
    data = {"loginname": username, "loginpswd": password}
    r = Request(
        "http://www.loxa.edu.tw/check.php",
        data=urlencode(data).encode("utf8"),
        headers={"cookie": cookie},
        method="POST",
    )
    try:
        response = urlopen(r)
    except HTTPError:
        sys.exit("Invalid username or password.")

    r = Request("http://www.loxa.edu.tw/index.php?login=1&show_msg=Y", headers={"cookie": cookie})
    response = urlopen(r)

    r = Request("http://www.loxa.edu.tw/jewelbox/foldertree.php", headers={"cookie": cookie})
    with urlopen(r) as response:
        html = response.read().decode("big5")

    folder_tree_pattern = re.compile('insFld\(.+?, gFld\(".+?", "file_list.php\?dir_id=(\d+?)", "\w"\)\);')
    file_url_pattern = re.compile('<td colspan=3 nowrap>\s+?<a href="(http.+?)"')
    for i in folder_tree_pattern.finditer(html):
        dir_id = i.group(1)
        r = Request(
            "http://www.loxa.edu.tw/jewelbox/file_list.php?dir_id={0}".format(dir_id), headers={"cookie": cookie}
        )
        with urlopen(r) as response:
            html = response.read().decode("big5")

            for i in file_url_pattern.finditer(html):
                url = i.group(1)
                url_data = urlparse(url)
                file_path = url_data.path.lstrip("/")
                dir_name, base_name = os.path.split(file_path)
                if not os.path.exists(dir_name):
                    os.makedirs(dir_name)
                url_opener = URLopener()
                url_opener.addheader("cookie", cookie)
                print("Download: {0} -> {1}".format(url, file_path))
                url_opener.retrieve(url, file_path)
Exemplo n.º 15
0
def scrape_pokemon_image(url):
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    page = urlopen(req).read()
    soup = BeautifulSoup(page, 'html.parser')

    images = soup.find_all('img')
    image_link = images[0].get('src')

    print("[INFO] downloading {}".format(image_link))
    name = str(image_link.split('/')[-1])
    opener = URLopener()
    opener.addheader('User-Agent', 'Mozilla/5.0')
    opener.retrieve(image_link, os.path.join('data/images/', name))

    print(image_link)
Exemplo n.º 16
0
def install_mpt(install_path, url=DEFAULT_MPT_URL):
    """
    Install MyPyTutor to the given directory.

    Args:
      install_path (str): The directory to install MyPyTutor in.
      url (str, optional): The URL of the MyPyTutor file to use.

    """
    # create our install path if it doesn't already exist
    if not os.path.exists(install_path):
        os.makedirs(install_path)

    print('Installing MyPyTutor...', end='', flush=True)

    # grab the latest zip file
    # we use an explicit filename here because we don't yet have access
    # to the tutorlib module for abstracting away temporary file creation
    try:
        urlobj = URLopener()
        filename, _ = urlobj.retrieve(url, 'MyPyTutor.zip')
    except Exception:
        print('failed')
        sys.exit(1)

    # extract the file
    with ZipFile(filename) as zf:
        zf.extractall(install_path)

    print('done')
Exemplo n.º 17
0
    def on_update_button_click(self):
        try:
            opener = URLopener()
            opener.retrieve(self.REMOTE_UPDATE_URL,
                            "resources/parameters.json")

            # Read the new settings.
            self.data = read_settings()
            messagebox.showinfo(
                "Settings Update",
                "Settings successfully updated from the server.")
        except Exception as e:
            logging.critical(
                "Couldn't open the remote settings file: {0}".format(str(e)))
            messagebox.showerror("Couldn't Update Settings",
                                 "Couldn't open the remote settings file.")
Exemplo n.º 18
0
def Download_File(name):
    """ Download UCAC4 file. """

    url_name = prefix+name
    ucac_file = URLopener()
    ucac_file.retrieve(url_name, name)

    inp = open(name, 'rb')
    bz2_file = bz2.BZ2File(name+'.bz2', 'wb', compresslevel=1)
    copyfileobj(inp, bz2_file)
    inp.close()
    bz2_file.close()

    os.remove(name)

    return 0
Exemplo n.º 19
0
def install_mpt(install_path, url=DEFAULT_MPT_URL):
    """
    Install MyPyTutor to the given directory.

    Args:
      install_path (str): The directory to install MyPyTutor in.
      url (str, optional): The URL of the MyPyTutor file to use.

    """
    # create our install path if it doesn't already exist
    if not os.path.exists(install_path):
        os.makedirs(install_path)

    print('Installing MyPyTutor...', end='', flush=True)

    # grab the latest zip file
    # we use an explicit filename here because we don't yet have access
    # to the tutorlib module for abstracting away temporary file creation
    try:
        urlobj = URLopener()
        filename, _ = urlobj.retrieve(url, 'MyPyTutor.zip')
    except Exception:
        print('failed')
        sys.exit(1)

    # extract the file
    with ZipFile(filename) as zf:
        zf.extractall(install_path)

    print('done')
Exemplo n.º 20
0
    def export(self,
               entity_id: int,
               format: ExportFormat,
               destination_folder: str = None):
        entity = self.entities.get(entity_id)
        entity_name = entity['name']
        user = self.authentication.user

        path_parts = entity['path'].split('.')
        # Last path part is always the current document.
        # Any before that are ancestor folders, the first being the parent.
        parent_folder_id = int(path_parts[-2]) if len(path_parts) > 1 else None

        job_id = self.jobs.create(owner_id=user['orgs'][0]['id'],
                                  shareable_id=entity['ownerId'],
                                  job_type=JobType.ExportJob,
                                  name='Export from python client',
                                  input_entity_ids=[entity_id],
                                  params={
                                      "filter": [],
                                      "format": format,
                                      "fileName": entity_name,
                                      "selection": [],
                                      "targetFolderId": parent_folder_id,
                                  })

        # Wait for the file to be converted to Genbank.
        job = self.jobs.poll_job(job_id)

        links = job['outputLinks']

        outputs = []

        for link in links:
            testfile = URLopener()

            destination = os.path.join(destination_folder, entity_name)
            testfile.retrieve(link['url'], destination)

            outputs.append(destination)

        return outputs
Exemplo n.º 21
0
def online_install():
	#Set location for file download by changing working directory
	#Variable that stores the file name of the ninite file, the temp folder path, and the current directory
	dl = 'ninite.exe'
	dl_path = "c:\\Install_Wizard_Temp"
	currentDir = os.getcwd()
	##This should allow the download location to be changed so that the program can be run off locked flash drive
	#Test to see if directory exists for program already, if not, create one
	if not os.path.exists(dl_path):
		os.makedirs(dl_path)
	#Change working directory to one on customers computer
	os.chdir(dl_path)
	#Check if there is a previous ninite installer
	if os.path.isfile(dl):
		os.remove(dl)
		print('file removed')
	#Create url
	url = urlCreate()
	#Create object to open url
	ninite = URLopener()
	#Download file from url and save as installer.exe
	
	try:
		ninite.retrieve(url, dl)
	except: #Error in retrieving website
		text1.set('Ninite website could\nnot be accessed')
	#Run the file
	try:
		check_call(dl, shell=True)
	except: #Error in running file
		text1.set('Error running ninite file')
		
	#Test to see if dl file exists, if so, delete
	if os.path.isfile(dl):
		os.remove(dl)	
	#Change directory back to original working directory
	os.chdir(currentDir)
	#Check if directory that was created earlier still exists, if so remove it
	if os.path.exists(dl_path):
		rmtree(dl_path)
Exemplo n.º 22
0
def choose_crawl_and_download_paths():
    crawls = get_list_of_crawls()
    print("Select a crawl [0-{}]:".format(len(crawls)))
    print_crawls(crawls)
    try:
        crawl_no = int(input("Crawl number [0-{}]:".format(len(crawls))))
    except:
        print('Error: Enter a valid crawl number')
        sys.exit(1)

    file_type = input("File Type [wat/wet/warc]:").lower()

    if file_type not in ['warc', 'wat', 'wet']:
        print("Error: Enter a valid file type")
        sys.exit(1)

    url_to_fetch = "https://commoncrawl.s3.amazonaws.com/crawl-data/{}/{}.paths.gz".format(
        crawls[crawl_no][1], file_type)
    path_file_opener = URLopener()
    path_file_opener.retrieve(url_to_fetch, "paths.gz")

    subprocess.check_output(['gunzip', '--force', 'paths.gz'])

    return crawls[crawl_no][0]
Exemplo n.º 23
0
def download_pic(pic_key, name):
	#Format of url: http://i.imgur.com/KTqYYKVh.jpg
	
	#Create full URL for download of picture
	url = "http://i.imgur.com/" + pic_key
	#Check if there is a picture with this name already
	#Either rename or skip
	if os.path.isfile(name):
		return True
		#return False
		'''
		#Add "_1" to the end of the picture name if name is taken
		name = "1_" + name
		'''
	
	#Create object to open url
	picture = URLopener()	
	#Try to download picture and save as name
	try:
		picture.retrieve(url, name)
	except: #Error in downloading picture
		return False
	#Return True if process completes, meaning that picture downloaded
	return True
Exemplo n.º 24
0
def open_url(url, **kwds):
    """Opens a url or file and returns an appropriate key-value reader."""
    reader_cls = fileformat(url)

    parsed_url = urlparse(url, 'file')
    if parsed_url.scheme == 'file':
        f = open(parsed_url.path, 'rb')
    else:
        if parsed_url.scheme == 'hdfs':
            server, username, path = hdfs.urlsplit(url)
            url = hdfs.datanode_url(server, username, path)

        if reader_cls is ZipReader and sys.version_info < (3, 2):
            # In Python <3.2, the gzip module is broken because it depends on
            # the underlying file being seekable (not true for url objects).
            opener = URLopener()
            filename, _ = opener.retrieve(url)
            f = open(filename, 'rb')
            os.unlink(filename)
        else:
            f = urlopen(url)

    return reader_cls(f, **kwds)
Exemplo n.º 25
0
elif sys.version_info[0] == 2:
    from urllib import URLopener

dataset_url = 'https://vision.in.tum.de/rgbd/dataset/freiburg2/rgbd_dataset_freiburg2_pioneer_slam.tgz'
filename_zip = 'rgbd_dataset_freiburg2_pioneer_slam.tgz'
filename = 'rgbd_dataset_freiburg2_pioneer_slam'

# go to benchmak directory
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)

if not os.path.exists(filename_zip):
    print('Downloading dataset file ', filename_zip)
    testfile = URLopener()
    testfile.retrieve(dataset_url, filename_zip)

if not os.path.exists(filename):
    print('Extracting dataset ', filename)
    tar = tarfile.open(filename_zip, "r:gz")
    tar.extractall()
    tar.close()

if not os.path.exists(filename + '/depth_gt.txt'):
    first_list = associate.read_file_list(filename + '/depth.txt')
    second_list = associate.read_file_list(filename + '/groundtruth.txt')

    matches = associate.associate(first_list, second_list, 0.0, 0.02)

    f = open(filename + '/depth_gt.txt', 'w')
    for a, b in matches:
def test2_ok():
    od = URLopener()
    # ok: insecure-urlopener-retrieve-ftp
    url = "ftps://example.com"
    od.retrieve(url)
Exemplo n.º 27
0
    thumbnail = hdri.select('.thumbnail')[0]['data-src']
    href = urlparse(hdri['href'])
    filename = href.query[2:] + '_' + resolution
    new_filename = filename.replace(category + '&h=', '')
    tonemapped = thumbnail.replace('/files/hdri_images/thumbnails/', '')

    dl_url = 'https://hdrihaven.com/files/hdris/' + new_filename
    thumbnail_url = 'https://hdrihaven.com/' + thumbnail
    tonemapped_url = 'https://hdrihaven.com/files/hdri_images/tonemapped/8192/' + tonemapped

    print(f"\n{new_filename} - {dl_url}")

    try:
        print(f"{new_filename}.hdr downloading...")
        ext = '.hdr'
        opener.retrieve(dl_url + ext, new_filename + ext)
        filesnum += 1
    except Exception as e:
        print(f"{new_filename}.hdr download failed, trying .exr...")
        try:
            ext = '.exr'
            opener.retrieve(dl_url + ext, new_filename + ext)
            filesnum += 1
        except Exception as e:
            print(f"{new_filename} download failed. Continuing...\n")
            continue

    if (tonemappedjpg == 'Y' or tonemappedjpg == 'y' or tonemappedjpg == 'Yes'
            or tonemappedjpg == 'yes'):
        print(f"8K Tonemapped {tonemapped} downloading...")
        opener.retrieve(tonemapped_url, os.path.basename(tonemapped_url))
Exemplo n.º 28
0
def downloadAsset(uri, dirname, contentType):
    # if contentType == 'text/javascript':
    #     return
    down = time.time()
    tUrl = uri
    o = urlparse(tUrl)
    targetDir = CURRENT_DIRECTORY + '/' + dirname + '/' + '/'.join(o.path.split('/')[1:-1])

    # javascript, fragment의 경우 다운로드 불필요
    if o.scheme == "javascript" or (o.netloc == '' and o.path == ''):
        return
    global ret_time
    global ret
    ret += 1

    if o.scheme == "":
        if uri.startswith("//"):
            tUrl = f"https:{uri}"
        else:
            tUrl = f"https://{uri}"

    if not uri.startswith('http'):
        if uri.startswith('//'):
            tUrl = f"http:{uri}"
        else:
            tUrl = f"http://{uri}"

    # text/html 무시
    if contentType in mimeTypes[1:]:
        if not os.path.exists(targetDir):
            path = Path(targetDir)
            path.mkdir(parents=True)

        targetFile = targetDir + '/' + o.path.split('/')[-1]
        if not os.path.exists(targetFile):
            try:
                urlretrieve(tUrl, targetFile)
                print(f"[Retrieved] {tUrl}", time.time() - down)
                # print(f"[Retrieved] {targetFile}", time.time() - down)
                ret_time += time.time() - down
            except Exception as e:
                try:
                    print(type(e).__name__ , tUrl)
                    opener = URLopener()
                    opener.addheader('User-Agent', 'Mozilla/5.0')
                    filename, headers = opener.retrieve(tUrl, targetFile)
                    print(f"[Retrieved2] {targetFile}", time.time() - down)
                    ret_time += time.time() - down
                except Exception as e:
                    try:
                        print(type(e).__name__,'헤더 붙여도' , tUrl)
                        tUrl = tUrl.replace('www.', '')
                        tUrl = tUrl.replace('http:', 'https:')
                        opener.retrieve(tUrl, targetFile)
                        print(f"[Retrieved3] {targetFile}", time.time() - down)
                        ret_time += time.time() - down
                    except Exception as e:
                        print(type(e).__name__, 'https:// 에 www 제외', tUrl)
                        if 'bobae' in tUrl : #보배 드림 image 만을 위한 처리 우선은 이렇게 임시방편
                            try :
                                tUrl = tUrl.replace('//', '//image.')
                                opener.retrieve(tUrl, targetFile)
                                print(f"[Retrieved4] 보배드림 image {targetFile}", time.time() - down)
                            except :
                                print(type(e).__name__, 'image 처리도 실패', tUrl)
                                pass
                        return
            finally:
                if contentType == 'text/css':
                    global args
                    parseCSSURLs(targetFile, args.url, dirname)
    else:
        pass
Exemplo n.º 29
0
        elif file + ext not in files and owerwrite == False:
            urlopener.retrieve(url + i + ext, file + ext)
            print("Download complete: " + file + ext)
        elif owerwrite == True:
            urlopener.retrieve(url + i + ext, file + ext)
            print("Already exist (overwrite): " + file + ext)


for i in items:
    item = i["href"].replace("/hdri/?h=", "")

    # preview image files
    hdr_file = "https://hdrihaven.com/files/hdris/" + item + "_"
    thumb_file = "https://hdrihaven.com/files/hdri_images/thumbnails/" + item + ".jpg"
    preview_file = (
        "https://hdrihaven.com/files/hdri_images/tonemapped/1500/" + item +
        ".jpg")
    spheres_file = "https://hdrihaven.com/files/hdri_images/spheres/" + item + ".jpg"
    if down_thumbnail == True and item + "_thumbnail.jpg" not in files:
        urlopener.retrieve(thumb_file, item + "_thumbnail.jpg")
    if down_preview == True and item + "_preview.jpg" not in files:
        urlopener.retrieve(preview_file, item + "_preview.jpg")
    if down_spheres == True and item + "_spheres.jpg" not in files:
        urlopener.retrieve(spheres_file, item + "_spheres.jpg")

    # hdr file
    try:
        downloader(hdr_file, ".hdr", item)
    except:
        downloader(hdr_file, ".exr", item)
Exemplo n.º 30
0
 def handle(self, *args, **options):
     print("Updating catalogs..")
     update_conf_list = AutomaticProductUpdate.objects.filter(order_number=1)
     for conf in update_conf_list:
         shop_shop = conf.shop
         print("Updating catalog for shop '%s'.." % shop_shop)
         print("-------------------------------------------------------- ")
         try:
             print("Dowloading catalog file for shop '%s', from url:%s" % (shop_shop, conf.catalog_url))
             file = URLopener()
             if not os.path.exists(CATALOGS_ROOT):
                 os.makedirs(CATALOGS_ROOT)
             catalog_filename = CATALOGS_ROOT+'/%s_catalog' % shop_shop
             if conf.is_compressed:
                 extension = '.%s' % conf.compress_format
             else:
                 extension = '.csv'
             catalog_filename += extension
             file.retrieve(conf.catalog_url, catalog_filename)
             print("Catalog file retrieved for shop '%s', local path:%s" % (shop_shop, catalog_filename))
             if conf.is_compressed:
                 print("Decompressing file ...")
                 # Get a new clean tmp dir
                 tmp_dir = CATALOGS_ROOT + '/%s_tmp' % shop_shop
                 if os.path.exists(tmp_dir):
                     shutil.rmtree(tmp_dir)
                 os.makedirs(tmp_dir)
                 # Extract catalog (should be a single file inside compressed file)
                 if not decompress_file(input_file=catalog_filename,
                                        output_dir=tmp_dir,
                                        compression_format=conf.compress_format):
                     print("Decompressing file ... ERROR")
                     return -1
                 # Copy and rename the extracted catalog file
                 extracted_catalog = os.listdir(tmp_dir)[0]
                 catalog_filename = catalog_filename[:-4] + ".csv"
                 extracted_catalog_path = os.path.abspath(os.path.join(tmp_dir, extracted_catalog))
                 shutil.copyfile(extracted_catalog_path, catalog_filename)
                 print("Decompressing file ... DONE")
                 print("Cleaning and preparing CSV FILE ...")
                 output_file = CATALOGS_ROOT+'/%s' % shop_shop + ".csv"            
                 csv_file = open(catalog_filename, 'r', errors = 'ignore')
                 with open(output_file, 'w') as fh:
                     reader = csv.reader(csv_file, delimiter=';')
                     next(reader,None)
                     writer = csv.writer(fh, delimiter=';')
                     writer.writerow(("aw_deep_link","product_name","search_price","merchant_name","delivery_cost","brand_name","product_model","delivery_time","product_GTIN"))
                     for r in reader:
                         count = 0
                         for i in r:
                            a = i.count(';')
                            count += a
                         if count ==0:
                             writer.writerow((r[0],r[1],r[2],r[3].replace(" ", ""),r[4],r[5],r[6],r[7],r[8]))
                 csv_file.close()
                 fh.close()
             conf.last_update = datetime.now()
             conf.local_file = catalog_filename
             conf.save()
         except Exception as e:
             print("ERROR processing catalog %s [SKIPPED]\n%s" %(shop_shop, e))
             continue
         print("------------------------------------------------------ ")
     print("All catalogs processed.")
Exemplo n.º 31
0
def save_file_on_disc_from(url, directory):
    try:
        testfile = URLopener()
        testfile.retrieve(url, directory + '-' + str(url).split("/")[-1])
    except Exception as exception:
        log.error('Unexpected exception: ' + str(exception))
Exemplo n.º 32
0
def getWeatherData(year, month, day):
    apiKey = getApiKey()
    url = 'http://api.wunderground.com/api/{0}/history_20{1}{2}{3}/q/WA/seattle.json'.format(apiKey, year, month, day)
    testfile = URLopener()
    testfile.retrieve(url, dataLocation + "{0}{1}{2}.{3}".format(year, month, day, dataExtension))
def test6(url="ftp://example.com"):
    od = URLopener()
    # ruleid: insecure-urlopener-retrieve-ftp
    od.retrieve(url)
def test1_ok():
    od = URLopener()
    # ok: insecure-urlopener-retrieve-ftp
    od.retrieve("ftps://example.com")
Exemplo n.º 35
0
for hdri in hdris:
    thumbnail = hdri.select('.thumbnail')[0]['data-src']
    href = urlparse(hdri['href'])
    filename = href.query[2:] + '_' + resolution

    # DL link example
    # https://hdrihaven.com/files/hdris/small_harbor_02_2k.hdr
    dl_url = (
        'https://hdrihaven.com/files/hdris/' + filename
    )
    thumbnail_url = 'https://hdrihaven.com' + thumbnail
    print(dl_url)
    print(thumbnail_url)

    try:
        print('downloading hdr...')
        ext = '.hdr'
        opener.retrieve(dl_url + ext, filename + ext)
    except Exception as e:
        print('hdr download failed, trying exr...')
        try:
            ext = '.exr'
            opener.retrieve(dl_url + ext, filename + ext)
        except Exception as e:
            print('download failed. Continuing...\n')
            continue
    print('')
    opener.retrieve(thumbnail_url, os.path.basename(thumbnail_url))

print('Done')
Exemplo n.º 36
0
r = requests.get(url_category, allow_redirects=True, headers={'User-Agent': ua.chrome})
soup = BeautifulSoup(r.text, 'html.parser')

save_to = category+' Texture ' + resolution

try:
    os.mkdir(save_to)
except Exception as e:
    pass
os.chdir(save_to)

texs = soup.select('#item-grid a')

for tex in texs:
    href = urlparse(tex['href'])
    filename = href.query[2:]
    new_filename = filename.replace(category+'&t=','')
    dl_url = (f"https://texturehaven.com/files/textures/zip/{resolution}/{new_filename}/{new_filename}_{resolution}_{fileformat}.zip")
    
    print(f"\n{dl_url}")

    try:
        print(f"{new_filename} downloading...")
        opener.retrieve(dl_url, os.path.basename(dl_url))
        filesnum+=1
    except Exception as e:
        print(f"{new_filename} download failed, Continuing...")
        continue

print(f"\nDownload completed. {filesnum} files downloaded.")
Exemplo n.º 37
0
def download_page(pic_url,output):
    image = URLopener()
    image.retrieve(pic_url,output)