Пример #1
0
 def test_create_zipfile(self):
     archive = Archive('test.zip')
     archive._runtime = RUNTIME_NODE_JS
     ok_(hasattr(archive.create_zipfile(), 'read'))
     with PyZipFile(archive._zippath, 'r', compression=ZIP_DEFLATED) as zipfile:
         ok_('lambda_function.pyc' in zipfile.namelist())
         ok_('.lamvery_secret.json' in zipfile.namelist())
Пример #2
0
 def test_build(self):
     builder = Builder('test.zip')
     builder._runtime = RUNTIME_NODE_JS
     ok_(hasattr(builder.build(), 'read'))
     with PyZipFile(builder._zippath, 'r', compression=ZIP_DEFLATED) as zipfile:
         ok_('lambda_function.pyc' in zipfile.namelist())
         ok_('.lamvery_secret.json' in zipfile.namelist())
Пример #3
0
    def add_interesting_events(self, node_dictionary, zipfile, zipfilename):
        t = time.clock()
        self.filename = zipfilename

        # Figure out sizes for progress reporting
        for key in self.events.keys():
            for name in zipfile.namelist():
                if name.endswith('/' + key):
                    self.total_size += zipfile.getinfo(name).file_size

        mortimer.update_progress_so_far(self.progress_queue, self.total_size, self.progress_size)

        for key in self.events.keys():
            for name in zipfile.namelist():
                if name.endswith('/' + key):
                    tf = tempfile.TemporaryFile()
                    tf.write(zipfile.open(name).read())
                    tf.seek(0)

                    # some logs don't have a year in the timestamp, assume log file year is the one
                    self.year = int((datetime(zipfile.getinfo(name).date_time[0], 1, 1) - datetime(1970,1,1)).total_seconds())
                    self.process_log(node_dictionary, key, tf)
                    tf.close()

        self.process_time = time.clock() - t

        mortimer.update_progress_so_far(self.progress_queue, self.total_size, self.total_size)

        print "{}: Processing of node events took {} seconds".format(self.filename, self.process_time)
Пример #4
0
def addItem(zipfile, item):
	if os.path.isdir(item):
		for (archiveDirPath, dirNames, fileNames) in os.walk(item):
			for fileName in fileNames:
				filePath = os.path.join(archiveDirPath, fileName)
				if not fileName in zipfile.namelist():
					zipfile.write(filePath, filePath)
	else:
		if not item in zipfile.namelist():
			zipfile.write(item)
Пример #5
0
def __unpackZip(verzip, rodir, verbose):
    zipfile = ZipFile(verzip)

    if verbose:
        for l in zipfile.namelist():
            print os.path.join(rodir, l)

    if not os.path.exists(rodir) or not os.path.isdir(rodir):
        os.mkdir(rodir)
    zipfile.extractall(rodir)

    print "%d files checked out" % len(zipfile.namelist())
    return 0
Пример #6
0
    def __init__(self, zipfile, entry=""):
        """
        Create a new path pointer pointing at the specified entry
        in the given zipfile.

        :raise IOError: If the given zipfile does not exist, or if it
        does not contain the specified entry.
        """
        if isinstance(zipfile, basestring):
            zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile))

        # Normalize the entry string:
        entry = re.sub("(^|/)/+", r"\1", entry)

        # Check that the entry exists:
        if entry:
            try:
                zipfile.getinfo(entry)
            except:
                # Sometimes directories aren't explicitly listed in
                # the zip file.  So if `entry` is a directory name,
                # then check if the zipfile contains any files that
                # are under the given directory.
                if entry.endswith("/") and [n for n in zipfile.namelist() if n.startswith(entry)]:
                    pass  # zipfile contains a file in that directory.
                else:
                    # Otherwise, complain.
                    raise IOError("Zipfile %r does not contain %r" % (zipfile.filename, entry))
        self._zipfile = zipfile
        self._entry = entry
Пример #7
0
 def ziptodict(self,zipfile):
     contentdict ={}
     list = zipfile.namelist()
     for name in list:
         if not name[-1] == '/':
             contentdict[name]=zipfile.read(name)
             print name
     return contentdict
Пример #8
0
def get_file_infos_from_zip(zipfile):
    # we return internal file name and md5 for all non-directory files within the zipped file
    
    # the separator within zip archive is always "/" even when produced on windows...
    return [ 
            {'file':get_internal_zip_path(member), 
             'md5': md5_from_zipped_file(zipfile,member),
             'size': zipfile.getinfo(member).file_size} for member in zipfile.namelist() if not member.endswith("/") ]
Пример #9
0
def add_unique_postfix(zipfile, new_filepath):

    if not new_filepath in zipfile.namelist():
        return new_filepath

    path, name = os.path.split(new_filepath)
    name, ext = os.path.splitext(name)

    make_fn = lambda i: os.path.join(path, '%s(%d)%s' % (name, i, ext))

    i = 1
    while i < 1000:
        uni_fn = make_fn(i)
        if not uni_fn in zipfile.namelist():
            return uni_fn
        i += 1

    return None
Пример #10
0
def extract_zip(zipfile, output_dir):
    """Extracts a zipfile without the uppermost folder."""
    output_dir = Path(output_dir)
    if zipfile.testzip() is None:
        for m in zipfile.namelist():
            fldr, name = re.split('/', m, maxsplit=1)
            if name:
                content = zipfile.open(m, 'r').read()
                with open(output_dir / name, 'wb') as out:
                    out.write(content)
Пример #11
0
def add_dirs(zipfile):
    """
    Given a writable zipfile, inject directory entries for
    any directories implied by the presence of children.
    """
    names = zipfile.namelist()
    consume(
        zipfile.writestr(name + "/", b"")
        for name in map(posixpath.dirname, names)
        if name and name + "/" not in names)
    return zipfile
Пример #12
0
def unzip(url, output_file):
    """
    Get a zip file content from a link and unzip
    """
    content = requests.get(url)
    zipfile = ZipFile(BytesIO(content.content))
    output_content = ""
    output_content += zipfile.open(zipfile.namelist()[0]).read()
    output = open(output_file, "w")
    output.write(output_content)
    output.close()
Пример #13
0
def validate_metadata(metadata_handle, zipfile, zip_name):
    # this is specific to the client
    metadata = csv.reader(metadata_handle)
    if(csv_title_row):
        metadata.next()
    objects = []
    for row in metadata:
        object = {}
        # grab a row and validate it appropriatly
        object['files'] = row[0].split(';')
        object['line_num'] = metadata.line_num
        for index, file in enumerate(object['files']):
            file = string.strip(file)
            object['files'][index] = file
            if file not in zipfile.namelist():
                raise WatcherException('Metadata validation failure. File %s not found in zipfile %s. metadata.csv:%d' % (file, zip_name, metadata.line_num))
        object['title'] = row[1]
        if(string.strip(row[2])):
            object['relation'] = row[2].split(' ')
        else:
            object['relation'] = ''
        object['subjects'] = row[3].split(';')
        object['keywords'] = row[4].split(';')
        object['date'] = row[5]
        object['spacial'] = row[6]
        object['temporal'] = row[7]
        roles = row[8].split(';')
        first_names = row[9].split(';')
        last_names = row[10].split(';')
        if not (len(roles) == len(first_names) == len(last_names)):
            raise WatcherException('Metadata validation failure. Length of Roles(%d), FirstNames(%d) and LastNames(%d)' 
                'is not consistant. metadata.csv:%d' % (len(roles), len(first_names), len(last_names), object['line_num']))
        object['people'] = []
        for role, first, last in zip(roles, first_names, last_names):
            person = {}
            person['first'] = first
            person['last'] = last
            person['role'] = role
            object['people'].append(person)
        object['publisher'] = row[11]
        object['language'] = row[12]
        object['rights'] = row[13]
        object['abstract'] = row[14]
        object['significant'] = row[15]
        object['sensitive'] = row[16]
        object['notes'] = row[17]
        object['collection'] = row[18]
        object['model'] = row[19]
        if(object['model'] not in ['audio', 'document', 'image']):
            raise WatcherException('Metadata validation failure. Model %s is not valid (Valid values are audio, document, image). metadata.csv:%d' % (row[2], object['line_num']))
        logger.debug(object)
        objects.append(object)
    return objects
Пример #14
0
def _analyze_zipfile_for_import(zipfile, project, schema):
    names = zipfile.namelist()

    def read_sp_manifest_file(path):
        # Must use forward slashes, not os.path.sep.
        fn_manifest = path + '/' + project.Job.FN_MANIFEST
        if fn_manifest in names:
            return json.loads(zipfile.read(fn_manifest).decode())

    if schema is None:
        schema_function = read_sp_manifest_file
    elif callable(schema):
        schema_function = _with_consistency_check(schema,
                                                  read_sp_manifest_file)
    elif isinstance(schema, str):
        schema_function = _with_consistency_check(
            _make_path_based_schema_function(schema), read_sp_manifest_file)
    else:
        raise TypeError(
            "The schema variable must be None, callable, or a string.")

    mappings = dict()
    skip_subdirs = set()

    dirs = {os.path.dirname(name) for name in names}
    for name in sorted(dirs):
        cont = False
        for skip in skip_subdirs:
            if name.startswith(skip):
                cont = True
                break
        if cont:
            continue

        sp = schema_function(name)
        if sp is not None:
            job = project.open_job(sp)
            if os.path.exists(job.workspace()):
                raise DestinationExistsError(job)
            mappings[name] = job
            skip_subdirs.add(name)

    # Check uniqueness
    if len(set(mappings.values())) != len(mappings):
        raise RuntimeError(
            "The jobs identified with the given schema function are not unique!"
        )

    for path, job in mappings.items():
        _names = [name for name in names if name.startswith(path)]
        yield path, _CopyFromZipFileExecutor(zipfile, path, job, _names)
Пример #15
0
 def unzip(self, *path_parts, out=None):
     """Extracts the contents into memory or to a file
     :param *path_parts: aany number of parts to be joined for the path
     :param out: The output path for extracting the contents. If set to None
         it will extract in memory
     """
     read_path = os.path.join(self.base_path, *path_parts)
     zip_file = self._get_zipfile(read_path)
     if out:
         zip_file.extractall(out)
     else:
         for name in zipfile.namelist():
             content = zipfile.read(name)
             yield content
Пример #16
0
def get_original_crash_test_case_of_zipfile(crash_test_case,
                                            original_test_case):
    import zipfile
    zipfile = zipfile.ZipFile(io.BytesIO(original_test_case))
    max_similarity = 0
    for name in zipfile.namelist():
        possible_original_test_case = zipfile.read(name)
        similarity = SequenceMatcher(
            None, base64.b64encode(possible_original_test_case),
            base64.b64encode(crash_test_case)).ratio()
        if similarity > max_similarity:
            max_similarity = similarity
            original_test_case = possible_original_test_case
    return original_test_case
Пример #17
0
    def __init__(self, filename):
        self.filename = filename
        self.delete_zip = False

        try:
            if zipfile.is_zipfile(filename) and 'doc.kml' in zipfile.namelist():
                try:
                    zipf = zipfile.ZipFile(filename)
                    zipf.extract('doc.kml', '~/volatile')
                    self.filename = "~/volatile/doc.kml"
                    self.delete_zip = True
                except Exception, err:
                    raise Exception("Error unzipping: %s" % err)
            else:
Пример #18
0
def EpubToTxt(file):

    import os
    import zipfile
    import re
    from bs4 import BeautifulSoup

    def PrettifyTxt(text):
        lines = text.split('\n')
        text = ''
        for line in lines:
            if line.split():
                text = text + '    ' + line.strip() + '\n'
        return text

    filename = os.path.basename(file)
    filebasename = os.path.splitext(filename)[0]
    zipfile = zipfile.ZipFile(file)
    namelist = zipfile.namelist()
    opflist = []
    text = ''

    for subfile in namelist:
        if subfile.endswith('.opf'):
            opflist.append(subfile)
    opffile = min(opflist, key=len)
    folder = opffile.rstrip(os.path.basename(opffile))
    opfs = zipfile.open(opffile)
    opf = ''
    for line in opfs:
        opf = opf + str(line, 'utf-8')
    ncx = re.search('(?s)<spine.*toc.*=.*"ncx".*>(.*?)</spine>', opf,
                    re.M).group()
    manifest = re.search('(?s)<manifest.*>(.*?)</manifest>', opf, re.M).group()

    ids = re.findall(' id="(.*?)"', manifest)
    hrefs = re.findall('href="(.*?)"', manifest)
    idrefs = re.findall('<itemref.*idref="(.*?)"', ncx)

    key = dict(zip(ids, hrefs))

    for idref in idrefs:
        htmpath = folder + key[idref]
        htmopen = zipfile.open(htmpath)
        soup = BeautifulSoup(htmopen, 'lxml')
        text = text + soup.get_text()

    zipfile.close()
    return PrettifyTxt(text)
Пример #19
0
def zip_extractall(zipfile, rootdir):
    """Python 2.4 compatibility instead of ZipFile.extractall."""
    for name in zipfile.namelist():
        if name.endswith('/'):
            if not os.path.exists(os.path.join(rootdir, name)):
                os.makedirs(os.path.join(rootdir, name))
        else:
            destfile = os.path.join(rootdir, name)
            destdir = os.path.dirname(destfile)
            if not os.path.isdir(destdir):
                os.makedirs(destdir)
            data = zipfile.read(name)
            f = open(destfile, 'w')
            f.write(data)
            f.close()
Пример #20
0
def zip_extractall(zipfile, rootdir):
    """Python 2.4 compatibility instead of ZipFile.extractall."""
    for name in zipfile.namelist():
        if name.endswith('/'):
            if not os.path.exists(os.path.join(rootdir, name)):
                os.makedirs(os.path.join(rootdir, name))
        else:
            destfile = os.path.join(rootdir, name)
            destdir = os.path.dirname(destfile)
            if not os.path.isdir(destdir):
                os.makedirs(destdir)
            data = zipfile.read(name)
            f = open(destfile, 'w')
            f.write(data)
            f.close()
    def _load_zip(self, zipfile):
        content = []
        for libitem in zipfile.namelist():
            data = zipfile.read(libitem).decode('utf-8')

            if libitem.startswith('__'):
                continue

            if libitem.endswith('csv'):
                content.extend(self._load_csv(libitem, data))
            elif libitem.endswith('json'):
                content.extend(self._load_json(libitem, data))
            else:
                continue

        return content
def _unzip(zipfile, path):
    """
        Python 2.5 doesn't have extractall()
    """
    isdir = os.path.isdir
    join = os.path.join
    norm = os.path.normpath
    split = os.path.split

    for each in zipfile.namelist():
        if not each.endswith('/'):
            root, name = split(each)
            directory = norm(join(path, root))
            if not isdir(directory):
                os.makedirs(directory)
            file(join(directory, name), 'wb').write(zipfile.read(each))
Пример #23
0
def loadAll(c, zfn, zipfile):
    successful=None
    try:
        c.execute('begin transaction')
        c.execute("insert into loaded_files values('" \
            + os.path.basename(zfn) + "')")
        for f in zipfile.namelist():
            print "Loading " + f
            load(c, zipfile, f)
            print os.times()
        c.execute("commit")
        successful=1
    finally:
        if not successful:
            print "Rolling back."
            c.execute('rollback')
Пример #24
0
def loadAll(c, zfn, zipfile):
    successful = None
    try:
        c.execute('begin transaction')
        c.execute("insert into loaded_files values('" \
            + os.path.basename(zfn) + "')")
        for f in zipfile.namelist():
            print "Loading " + f
            load(c, zipfile, f)
            print os.times()
        c.execute("commit")
        successful = 1
    finally:
        if not successful:
            print "Rolling back."
            c.execute('rollback')
Пример #25
0
def uncompress(srcfile, destdir):
    import gzip
    import tarfile, zipfile
    file = os.path.basename(srcfile)
    if os.path.isfile(file):
        shortname, fmt = os.path.splitext(file)
        fmt = fmt[1:]
        if fmt in ('tgz', 'tar'):
            try:
                tar = tarfile.open(srcfile)
                names = tar.getnames()
                for name in names:
                    tar.extract(name, destdir)
                tar.close()
            except Exception as e:
                print("Can't uncompress {} for {}".format(file, e))
        elif fmt == 'zip':
            try:
                zipfile = zipfile.ZipFile(srcfile)
                for names in zipfile.namelist():
                    zipfile.extract(names, destdir)
                zipfile.close()
            except Exception as e:
                print("Can't uncompress {} for {}".format(file, e))
        elif fmt == 'gz':
            try:
                fname = os.path.join(destdir, os.path.basename(srcfile))
                gfile = gzip.GzipFile(srcfile)
                open(fname, "w+").write(gfile.read())
                # gzip对象用read()打开后,写入open()建立的文件中。
                gfile.close()
                # 关闭gzip对象
            except Exception as e:
                return False, e, fmt
        '''
        elif fmt == 'rar':
            try:
                rar = rarfile.RarFile(srcfile)  
                os.chdir(destdir)
                rar.extractall()  
                rar.close()  
            except Exception as e :
                return (False, e, filefmt)
        '''
    else:
        print('文件格式不支持或者不是压缩文件')
    return None
Пример #26
0
    def read(self):
        """ A generator that reads the in memory zip files, opens them 
            and returns the content, one file at a time. 
        """
        file_content = {}

        with self.zipfile as zipfile:
            for files in zipfile.namelist():
                if files == 'serverID.txt':
                    continue
                self.file_name = files.replace('.json.txt', '')
                file_pointer = zipfile.open(files)
                file_bytes = file_pointer.read()
                """ Could be a more useful class if it didn't assume
                that content of txt files are JSON """
                file_content = json.loads(file_bytes.decode('utf-8'))
                yield file_content
Пример #27
0
def extract_zip(zipfile, output_dir):
    """
    Extracts a zipfile without the uppermost folder.

    Parameters
    ----------
    zipfile: zipfile object
        Zipfile object to extract.
    output_dir: str | Path
        Directory to extract files to.
    """
    output_dir = Path(output_dir)
    if zipfile.testzip() is None:
        for m in zipfile.namelist():
            fldr, name = re.split("/", m, maxsplit=1)
            if name:
                content = zipfile.open(m, "r").read()
                with open(str(output_dir / name), "wb") as out:
                    out.write(content)
Пример #28
0
def zipped_mesh_loader(zipfile):
    namelist = zipfile.namelist()
    if len(namelist) == 0:
        raise ValueError("No entries in namelist")
    synset_id = namelist[0].split("/")[0]
    keys = set(n.split("/")[1] for n in namelist if n.endswith(".obj"))

    def load_fn(key):
        import trimesh

        from shape_tfds.core.resolver import ZipSubdirResolver

        subdir = os.path.join(synset_id, key)
        resolver = ZipSubdirResolver(zipfile, subdir)
        obj = os.path.join(subdir, "model.obj")
        with zipfile.open(obj) as fp:
            return trimesh.load(fp, file_type="obj", resolver=resolver)

    return Mapping.mapped(keys, load_fn)
Пример #29
0
def filelist(zipfile):
    files = {}
    filenames = zipfile.namelist()
    for file in filenames:
        files[table(file.split('/')[-1])] = file
    return files
Пример #30
0
def EpubToTxt(file):

    import os
    import zipfile
    import re
    import html2text
    from bs4 import BeautifulSoup

    def PrettifyTxt(text):
        lines = text.split('\n')
        text = ''
        for line in lines:
            if line.split():
                text = text + '    ' + line.strip() + '\n'
        return text

    filename = os.path.basename(file)
    filebasename = os.path.splitext(filename)[0]
    path = file.strip('/' + filename)
    savepath = path + filebasename
    zipfile = zipfile.ZipFile(file)
    namelist = zipfile.namelist()
    subfilelist = []
    flag = {}
    text = ''

    for subfile in namelist:
        flag1 = subfile.endswith('.html')
        flag2 = subfile.endswith('.htm')
        flag3 = subfile.endswith('.xhtml')
        if flag1 or flag2 or flag3:
            flag[subfile] = True

    for subfile in namelist:
        if subfile.endswith('.opf'):
            folder = subfile.rstrip(os.path.basename(subfile))
            opfs = zipfile.open(subfile)
            opf = ''
            for line in opfs:
                opf = opf + str(line, 'utf-8')
            left1 = re.search('<spine toc="ncx">', opf).span()[0]
            left2 = re.search('<manifest>', opf).span()[0]
            right1 = re.search('</spine>', opf).span()[1]
            right2 = re.search('</manifest>', opf).span()[1]
            ncx = opf[left1:right1]
            manifest = opf[left2:right2]

            ids = re.findall('id="(.*?)"', manifest)
            hrefs = re.findall('href="(.*?)"', manifest)
            idrefs = re.findall('<itemref idref="(.*?)"', ncx)

            key = {}

            for i in range(0, len(ids)):
                key[ids[i]] = hrefs[i]

            for idref in idrefs:
                htmpath = folder + key[idref]
                if htmpath in flag.keys() and flag[htmpath]:
                    htmopen = zipfile.open(htmpath)
                    soup = BeautifulSoup(htmopen, 'lxml')
                    text = text + soup.get_text()
                    flag[htmpath] = False
                else:
                    pass

    zipfile.close()

    return PrettifyTxt(text)
Пример #31
0
def filelist(zipfile):
    files = {}
    filenames = zipfile.namelist()
    for file in filenames:
        files[table(file.split('/')[-1])] = file
    return files
Пример #32
0
def _analyze_zipfile_for_import(zipfile, project, schema):
    """Validate paths in zipfile.

    Parameters
    ----------
    zipfile : zipfile.ZipFile
        An instance of ZipFile.
    project : :class:`~signac.Project`
        The signac project.
    schema : str or callable
        An optional schema function, which is either a string or a function that accepts a
        path as its first and only argument and returns the corresponding state point as dict
        (Default value = None).

    Yields
    ------
    src : str
        Source path.
    copy_executor : callable
        A callable that uses a provided function to copy to a destination.

    Raises
    ------
    TypeError
        If the schema provided is not None, callable, or a string.
    :class:`~signac.errors.DestinationExistsError`
        If a job is already initialized.
    :class:`~signac.errors.StatepointParsingError`
        If the jobs identified with the given schema function are not unique.

    """
    names = zipfile.namelist()

    def read_sp_manifest_file(path):
        """Read a state point manifest file.

        Parameters
        ----------
        path : str
            Path to manifest file.

        Returns
        -------
        dict
            Parsed manifest contents.

        """
        # Must use forward slashes, not os.path.sep.
        fn_manifest = path + "/" + project.Job.FN_MANIFEST
        if fn_manifest in names:
            return json.loads(zipfile.read(fn_manifest).decode())

    if schema is None:
        schema_function = read_sp_manifest_file
    elif callable(schema):
        schema_function = _with_consistency_check(schema, read_sp_manifest_file)
    elif isinstance(schema, str):
        schema_function = _with_consistency_check(
            _make_path_based_schema_function(schema), read_sp_manifest_file
        )
    else:
        raise TypeError("The schema variable must be None, callable, or a string.")

    mappings = {}
    skip_subdirs = set()

    dirs = {os.path.dirname(name) for name in names}
    for name in sorted(dirs):
        cont = False
        for skip in skip_subdirs:
            if name.startswith(skip):
                cont = True
                break
        if cont:
            continue

        sp = schema_function(name)
        if sp is not None:
            job = project.open_job(sp)
            if os.path.exists(job.workspace()):
                raise DestinationExistsError(job)
            mappings[name] = job
            skip_subdirs.add(name)

    # Check uniqueness
    if len(set(mappings.values())) != len(mappings):
        raise StatepointParsingError(
            "The jobs identified with the given schema function are not unique!"
        )

    for src, job in mappings.items():
        _names = [name for name in names if name.startswith(src)]
        copy_executor = _CopyFromZipFileExecutor(zipfile, src, job, _names)
        yield src, copy_executor
Пример #33
0
s = sys.argv[2]
url = 'http://www.congreso.es/votaciones/OpenData?sesion=%s&completa=1&legislatura=%s' % (
    s, l)
zipname = 'l%ss%s.zip' % (l, s)
os.system('wget -c "%s" -O %s' % (url, zipname))

legislatura = u''
if l == '10':
    legislatura = u'X Legislatura'
else:
    print 'Error legislatura'
    sys.exit()

votacionesids = []
zipfile = zipfile.ZipFile(zipname)
for zipp in zipfile.namelist():
    xmlraw = unicode(zipfile.read(zipp), 'ISO-8859-1')
    #print xmlraw

    sesion = re.findall(ur"(?im)<sesion>(\d+)</sesion>", xmlraw)[0]
    if sesion != s:
        print 'Error, no coinciden los numeros de sesion'
        sys.exit()

    numerovotacion = re.findall(
        ur"(?im)<numerovotacion>(\d+)</numerovotacion>", xmlraw)[0]
    votacionesids.append(int(numerovotacion))
    fecha = re.findall(ur"(?im)<fecha>([^<]+)</fecha>", xmlraw)[0]
    fecha = u'%s-%s-%s' % (fecha.split('/')[2], '%02d' %
                           (int(fecha.split('/')[1])), '%02d' %
                           (int(fecha.split('/')[0])))
Пример #34
0
}

# Application creation
app = QApplication([])
app.setStyle('Fusion')

# Ask for file path
file_dialog = QFileDialog(None, 'megascan zip file', 'C:/')
if file_dialog.exec_():
    filenames = file_dialog.selectedFiles()
    filename = filenames[0]

# Open ZIP file and get the list of files in the folder
zip_path_input = pathlib.Path(filename)
zf = zf.ZipFile(zip_path_input)
files_name = zf.namelist()

# if in png mode
'''
folder_path_output = zip_path_input.parent / zip_path_input.parts[-1][:-4]
'''

# Create empty folder to receive the png, if png mode
'''
try:
    os.mkdir(folder_path_output)
except:
    pass
'''

psd_name = zip_path_input.parts[-1][:-4] + '.psd'
Пример #35
0
url = "ftp.datashop.livevol.com"
ftp = ftplib.FTP(url)
ftp.login(user="******", passwd="")
ftp.cwd("subscriptions/order_000003016/item_000004427/")

# Returns iterator of files in directory
files = ftp.mlsd()
files = [x for x in files]
# files is list of tuples
filename = files[-1][0]
# temp file to store zip file
with tempfile.TemporaryFile() as temp:
    ftp.retrbinary("RETR {0}".format(filename), temp.write)
    with zipfile.ZipFile(temp) as zipfile:
        file = zipfile.namelist()[0]
        # open csv from zip and read into dataframe
        with zipfile.open(file) as csv:
            dataframe = pd.read_csv(csv)
ftp.quit()

newframe = pd.DataFrame()
newframe['date'] = dataframe['quote_date']
newframe['high'] = dataframe['high']
newframe['low'] = dataframe['low']
newframe['open'] = dataframe['open']
newframe['close'] = dataframe['close']
newframe['volume'] = dataframe['trade_volume']
newframe['wap'] = dataframe['vwap']
newframe['symbol'] = dataframe['root']
newframe['expiry'] = dataframe['expiration']
Пример #36
0
"""

import re
import urllib
import zipfile

pcurl = "http://www.pythonchallenge.com/pc/def/"
url = "http://www.pythonchallenge.com/pc/def/channel.html"
zip_url = "http://www.pythonchallenge.com/pc/def/channel.zip"
page = urllib.urlopen(url).read()

zip_file = urllib.urlretrieve(zip_url)[0]
zipfile = zipfile.ZipFile(zip_file)

# zipfile.printdir()
total_files = len(zipfile.namelist())

# print zipfile.read('readme.txt')

# content of readme.txt
# welcome to my zipped list.
#
# hint1: start from 90052
# hint2: answer is inside the zip

# print zipfile.read('90052.txt')

# content of 90052.txt
# Next nothing is 94191

regex = re.compile("nothing is (\d*)")
Пример #37
0
"""

import re
import urllib
import zipfile

pcurl = 'http://www.pythonchallenge.com/pc/def/'
url = 'http://www.pythonchallenge.com/pc/def/channel.html'
zip_url = 'http://www.pythonchallenge.com/pc/def/channel.zip'
page = urllib.urlopen(url).read()

zip_file = urllib.urlretrieve(zip_url)[0]
zipfile = zipfile.ZipFile(zip_file)

# zipfile.printdir()
total_files = len(zipfile.namelist())

# print zipfile.read('readme.txt')

# content of readme.txt
# welcome to my zipped list.
#
# hint1: start from 90052
# hint2: answer is inside the zip

# print zipfile.read('90052.txt')

# content of 90052.txt
# Next nothing is 94191

regex = re.compile('nothing is (\d*)')
Пример #38
0
def get_zip_files_internalpaths_list(zipfile):
    return [ get_internal_zip_path(member) for member in zipfile.namelist() if not member.endswith("/") ]
Пример #39
0
import urllib2
from cStringIO import StringIO
import zipfile

MASTER_ZIP = 'https://github.com/matpow2/anaconda/archive/master.zip'

print 'Downloading update.'
data = urllib2.urlopen(MASTER_ZIP).read()
print 'Done, extracting.'

fp = StringIO(data)
zipfile = zipfile.ZipFile(fp, 'r')
names = []
for name in zipfile.namelist():
    splitted = name.split('/')
    if len(splitted) >= 2 and (splitted[0] == 'anaconda-master' and
                               splitted[1] == 'tools'):
        continue
    names.append(name)

zipfile.extractall('.', names)
Пример #40
0
                           "C:/Users/Administrator/Desktop/123")
print(kk)
'''
ZIP压缩包
模块(方法)名称叫 zipfile
创建一个zipfile对象表示一个zip文件 参数file表示文件的路径或类文件对象
'''
zf = zipfile.ZipFile("C:/Users/Administrator/Desktop/liutan2.zip")
print(zf)
'''
ZipFile.getinfo(name)
获取zip文档内指定文件信息,返回一个zipfile.Zipinfo对象他包括文件的详细信息
'''
ll = zf.getinfo("11.txt")
print(ll)
'''
zipfile.namelist()
获取zip文档内所有文件的名称列表
'''
nl = zf.namelist()
print(nl)
'''
zipfile.extractall()
解压zip文档中的所有文件到当前目录,参数members的默认值为zip文档内的所有文件名称
'''
iu = zf.extractall("C:/Users/Administrator/Desktop/")
print(iu)

############################random模块###################################################################################################################
'''
random
l = sys.argv[1]
s = sys.argv[2]
url = 'http://www.congreso.es/votaciones/OpenData?sesion=%s&completa=1&legislatura=%s' % (s, l)
zipname = 'l%ss%s.zip' % (l, s)
os.system('wget -c "%s" -O %s' % (url, zipname))

legislatura = u''
if l == '10':
    legislatura = u'X Legislatura'
else:
    print 'Error legislatura'
    sys.exit()

votacionesids = []
zipfile = zipfile.ZipFile(zipname)
for zipp in zipfile.namelist():
    xmlraw = unicode(zipfile.read(zipp), 'ISO-8859-1')
    #print xmlraw
   
    sesion = re.findall(ur"(?im)<sesion>(\d+)</sesion>", xmlraw)[0]
    if sesion != s:
        print 'Error, no coinciden los numeros de sesion'
        sys.exit()
    
    numerovotacion = re.findall(ur"(?im)<numerovotacion>(\d+)</numerovotacion>", xmlraw)[0]
    votacionesids.append(int(numerovotacion))
    fecha = re.findall(ur"(?im)<fecha>([^<]+)</fecha>", xmlraw)[0]
    fecha = u'%s-%s-%s' % (fecha.split('/')[2], '%02d' % (int(fecha.split('/')[1])), '%02d' % (int(fecha.split('/')[0])))
    titulo = re.search(ur"(?im)<titulo>", xmlraw) and re.findall(ur"(?im)<titulo>([^<]+)</titulo>", xmlraw)[0] or u''
    textoexp = re.search(ur"(?im)<textoexpediente>", xmlraw) and re.findall(ur"(?im)<textoexpediente>([^<]+)</textoexpediente>", xmlraw)[0] or u''
    titulosub = re.search(ur"(?im)<titulosubgrupo>", xmlraw) and re.findall(ur"(?im)<titulosubgrupo>([^<]+)</titulosubgrupo>", xmlraw)[0] or u''
Пример #42
0
 def test_build_with_single_file(self):
     builder = Builder('test.zip', function_filename='lambda_function.py', single_file=True)
     builder.build()
     with PyZipFile(builder._zippath, 'r', compression=ZIP_DEFLATED) as zipfile:
         ok_('lambda_function.py' in zipfile.namelist())
         ok_(not ('.lamvery_secret.json' in zipfile.namelist()))
        sys.stderr.write(
            "Error: MD5 digest of the catkeys file does not match the md5 on the server"
        )
        sys.exit(-1)

    # Rewind to the beginning of the file
    archive.seek(0)

    try:
        zipfile = zipfile.ZipFile(archive, mode="r")
    except:
        sys.stderr.write(
            "There was an error processing the zip file from haiku-files.org")
        sys.exit(-1)
    template_list = []
    for f in zipfile.namelist():
        if "en.catkeys" in f:
            template_list.append(f)

    # Compare list of templates with data on disk by comparing fingerprints. If the fingerprint changed, write the
    # updated file to disk
    updated_list = []
    for template in template_list:
        data = StringIO.StringIO(zipfile.read(template))
        if not compare_template_to_disk(template, data):
            strip_and_save(template, data)
            updated_list.append(template)
            print("Updated template %s" % template)

    # Now instruct merging with the translated files
    commands = []
Пример #44
0
 def test_create_zipfile_with_single_file(self):
     archive = Archive('test.zip', function_filename='lambda_function.py', single_file=True)
     archive.create_zipfile()
     with PyZipFile(archive._zippath, 'r', compression=ZIP_DEFLATED) as zipfile:
         ok_('lambda_function.py' in zipfile.namelist())
         ok_(not ('.lamvery_secret.json' in zipfile.namelist()))
Пример #45
0
import urllib2
from cStringIO import StringIO
import zipfile

MASTER_ZIP = 'https://github.com/matpow2/anaconda/archive/master.zip'

print 'Downloading update.'
data = urllib2.urlopen(MASTER_ZIP).read()
print 'Done, extracting.'

fp = StringIO(data)
zipfile = zipfile.ZipFile(fp, 'r')
names = []
for name in zipfile.namelist():
    splitted = name.split('/')
    if len(splitted) >= 2 and (splitted[0] == 'anaconda-master'
                               and splitted[1] == 'tools'):
        continue
    names.append(name)

zipfile.extractall('.', names)