def _read_zip(self, filepath): """ Read an IPS installation zipfile and return the core version number @type filepath: str @rtype: LooseVersion """ with ZipFile(filepath) as zip: namelist = zip.namelist() if re.match(r'^\d+/?$', namelist[0]): self.log.debug('Developer Tools directory matched: %s', namelist[0]) version_id = namelist[0].strip('/') else: basename = os.path.basename(filepath) match = re.match('^IPS_Developer_Tools_v(\d+).zip$', basename) if match: self.log.info( 'Could not parse dev_tools archive, pulling version id from filename instead' ) version_id = match.group(1) else: self.log.error( 'No developer tools directory matched, unable to continue' ) raise BadZipfile( 'Unrecognized dev tools file format, aborting') if version_id not in self.ips_versions: raise BadZipfile( 'Unrecognized version ID (is the dev tools package newer than our latest IPS release?)' ) version = self.ips_versions[version_id] self.log.debug('Version matched: %s', version) return Version(version, version_id)
def nltk_download_corpus(resource_path): """ Download the specified NLTK corpus file unless it has already been downloaded. Returns True if the corpus needed to be downloaded. """ from nltk.data import find from nltk import download from os.path import split, sep from zipfile import BadZipfile # Download the NLTK data only if it is not already downloaded _, corpus_name = split(resource_path) if not resource_path.endswith(sep): resource_path = resource_path + sep downloaded = False try: find(resource_path) except LookupError: download(corpus_name) downloaded = True except BadZipfile: raise BadZipfile( 'The NLTK corpus file being opened is not a zipfile, ' 'or it has been corrupted and needs to be manually deleted.') return downloaded
def _load_zip(path): try: z = ZipFile(path, 'r') except (BadZipfile, LargeZipFile): raise BadZipfile('%s: %s' % (path, sys.exc_info()[1])) else: return z
def unvault(self, name, target=None, download=True): assert self.key, "you have to give a key or set in $VAULT_KEY" assert name, "give a vault name" vault_tmp, vault_zip, vault_crypt = self.directories(name) if download: assert self.s3_path, "No s3_path specified" assert self.s3_bucket, "No s3_bucket specified" assert self.s3_useragent, "you need to provide $S3_VAULT_USERAGENT" self.download(self.s3_bucket, self.s3_file(name), vault_crypt) assert os.path.exists(vault_crypt), "Download failed for %s" % self.s3_file(name) with open(vault_zip, 'w') as vz, open(vault_crypt) as vc: c = vc.read() aes = AESCipher(self.key) plain = aes.decrypt(c) vz.write(plain) try: zipf = ZipFile(vault_zip) zipf.extractall(target or self.location) except BadZipfile as e: raise BadZipfile('Could not extract %s. Did you set the key?' % vault_crypt) members = [os.path.join(target or self.location, member) for member in zipf.namelist()] self.extracted_files.extend(members) self.cleanup(name) return members
def temp_shapefile_from_zip(zip_path): """Given a path to a ZIP file, unpack it into a temp dir and return the path to the shapefile that was in there. Doesn't clean up after itself unless there was an error. If you want to cleanup later, you can derive the temp dir from this path. """ try: zf = ZipFile(zip_path) except BadZipfile as e: raise BadZipfile(str(e) + ": " + zip_path) tempdir = mkdtemp() shape_path = None # Copy the zipped files to a temporary directory, preserving names. for name in zf.namelist(): if name.endswith("/"): continue data = zf.read(name) outfile = os.path.join(tempdir, os.path.basename(name)) if name.endswith('.shp'): shape_path = outfile f = open(outfile, 'w') f.write(data) f.close() return tempdir, shape_path
def _read_zip(self, filepath): """ Read an IPS installation zipfile and return the core version number @type filepath: str @rtype: Version """ with ZipFile(filepath) as zip: namelist = zip.namelist() if re.match(r'^ips_\w{5}/?$', namelist[0]): self.log.debug('Setup directory matched: %s', namelist[0]) else: self.log.error('No setup directory matched') raise BadZipfile('Unrecognized setup file format') versions_path = os.path.join(namelist[0], 'applications/core/data/versions.json') if versions_path not in namelist: raise BadZipfile('Missing versions.json file') versions = json.loads(zip.read(versions_path), object_pairs_hook=OrderedDict) vid = next(reversed(versions)) version = versions[vid] self.log.debug('Version matched: %s', version) return Version(version, vid)
def zip_files(files, zip_name): """ Creates a zip file named zip_name from files list argument. Returns a zip file string content files -- files list argument zip_name -- name of created zip file """ # s = cStringIO.StringIO() # zip_file = ZipFile(s, 'w') zip_name = zip_name + '.zip' zip_file = ZipFile(zip_name, 'w') files_len = len(files) # counter for not found files in list arg file_not_found_count = 0 # make dir if there are a shape file plus other formats files in files list if files_len > 3 and any('shp' in substr for substr in files): dir_shp = 'shp/' else: dir_shp = '' # create zip for n in files: try: name = unicode(n) f = open(name, 'r') if name.endswith('.shp') or name.endswith('.dbf') or name.endswith( '.shx') or name.endswith('prj'): zip_file.writestr(dir_shp + os.path.basename(name), f.read()) else: zip_file.writestr(os.path.basename(name), f.read()) f.close() except IOError: # file not found file_not_found_count += 1 zip_file.close() # returns zip files string if there are files in zip, # None, otherwise. if files_len > file_not_found_count: return None else: delete_file(zip_name) raise BadZipfile('Zip file not valid!')
def _populate_ips_versions(self): """ Populate IPS version data for mapping @return: """ # Get a map of version ID's from our most recent IPS version ips = IpsManager(self.ctx) ips = ips.dev_version or ips.latest with ZipFile(ips.filepath) as zip: namelist = zip.namelist() ips_versions_path = os.path.join( namelist[0], 'applications/core/data/versions.json') if ips_versions_path not in namelist: raise BadZipfile('Missing versions.json file') self.ips_versions = json.loads(zip.read(ips_versions_path), object_pairs_hook=OrderedDict) self.log.debug("%d version ID's loaded from latest IPS release", len(self.ips_versions))
def nltk_download_corpus( resource_path, local_data=LOCAL_DATA, nltk_dir=NLTK_DIR, ): """ Download the specified NLTK corpus file unless it has already been downloaded. Returns True if the corpus needed to be downloaded. """ print(os.getcwd()) from nltk.data import find if not (local_data): from nltk import download else: from nltk.downloader import Downloader _downloader = Downloader(local_data=local_data, nltk_dir=nltk_dir) #print(os.listdir(_downloader._download_dir)) DEBUG, don't throw away download = _downloader.download from os.path import split, sep from zipfile import BadZipfile # Download the NLTK data only if it is not already downloaded _, corpus_name = split(resource_path) if not resource_path.endswith(sep): resource_path = resource_path + sep downloaded = False try: find(resource_path) except LookupError: download(corpus_name) downloaded = True except BadZipfile: raise BadZipfile( 'The NLTK corpus file being opened is not a zipfile, ' 'or it has been corrupted and needs to be manually deleted.') return downloaded
def extract_and_save_project(self, file, directory, title, started, user): task = extract_and_save_project update_started(task, title, started, 'Extracting files...', {}) task_args = (task, title, started) resp, stat = self.archive_project.extract(file, directory, user, update_progress, task_args) if resp == 'ok': self.file_utility.remove_file(file) logger.info("File extracted and removed.") details = self.file_utility.import_project(directory, user, update_progress, task_args) return task_finished(task, title, started, 'Upload complete!', details) else: self.file_utility.remove_file(file) logger.info("File extraction failed, so removed.") raise BadZipfile(resp)
def nltk_download_corpus(resource_path): from nltk.data import find from nltk import download from os.path import split, sep from zipfile import BadZipfile _, corpus_name = split(resource_path) if not resource_path.endswith(sep): resource_path = resource_path + sep downloaded = False try: find(resource_path) except LookupError: download(corpus_name) downloaded = True except BadZipfile: raise BadZipfile( 'The NLTK corpus file being opened is not a zipfile, ' 'or it has been corrupted and needs to be manually deleted.') return downloaded
def create_zip_file(self): zip_file = ZipFile(self.file_name + '.zip', 'w') file_name = self.file_name + self.extension zip_file.write(file_name, self.file_name + '/' + file_name) compressed_file_name = self.file_name + '.' + self.minify_posfix + self.extension zip_file.write(compressed_file_name, self.file_name + '/' + compressed_file_name) for name, absolute_name in self.extra_zip_files: zip_file.write(absolute_name, self.file_name + '/' + name) zip_file.close() #tests the zipfile zip_file = ZipFile(self.file_name + '.zip', 'r') if zip_file.testzip() is not None: raise BadZipfile() else: print '** Succesfully created zipped file. **' zip_file.close()
def nltk_download_corpus(resource_path): """ Download the specified NLTK corpus file unless it has already been downloaded. Returns True if the corpus needed to be downloaded. """ from nltk.data import find from nltk import download from os.path import split, sep from zipfile import BadZipfile # Download the NLTK data only if it is not already downloaded _, corpus_name = split(resource_path) # From http://www.nltk.org/api/nltk.html # When using find() to locate a directory contained in a zipfile, # the resource name must end with the forward slash character. # Otherwise, find() will not locate the directory. # # Helps when resource_path=='sentiment/vader_lexicon'' if not resource_path.endswith(sep): resource_path = resource_path + sep downloaded = False try: find(resource_path) except LookupError: download(corpus_name) downloaded = True except BadZipfile: raise BadZipfile( 'The NLTK corpus file being opened is not a zipfile, ' 'or it has been corrupted and needs to be manually deleted.' ) return downloaded
def test_load_csvz_data_from_memory(): if not PY2: io = StringIO() get_data(io, file_type="csvz") else: raise BadZipfile("pass it")