Exemplos de gzip_open em Python, exemplos de gzip.gzip_open em Python

Exemplo n.º 1

0

Exibir arquivo

    def _initialize(self, reset=False):

        if not reset:

            try:

                print("Initializing VariantHDF5 ...")

                print("\tReading {} ...".format(self._variant_hdf5_file_path))

                self._variant_hdf5 = open_file(self._variant_hdf5_file_path,
                                               mode="r")

                print("\tReading {} ...".format(
                    self._id_chrom_pickle_gz_file_path))

                with gzip_open(self._id_chrom_pickle_gz_file_path
                               ) as id_chrom_pickle_gz_file:

                    self._id_chrom = load(id_chrom_pickle_gz_file)

                print("\tReading {} ...".format(
                    self._gene_chrom_pickle_gz_file_path))

                with gzip_open(self._gene_chrom_pickle_gz_file_path
                               ) as gene_chrom_pickle_gz_file:

                    self._gene_chrom = load(gene_chrom_pickle_gz_file)

            except (OSError, FileNotFoundError, HDF5ExtError) as exception:

                warn("\tFailed: {}.".format(exception))

                reset = True

        if reset:

            print("Resetting ...")

            if self._variant_hdf5:

                self._variant_hdf5.close()

                print("\tClosed {} ...".format(self._variant_hdf5_file_path))

            print("\tMaking {} ...".format(self._variant_hdf5_file_path))

            self._make_variant_hdf5()

            print("\tReading {} ...".format(self._variant_hdf5_file_path))

            self._variant_hdf5 = open_file(self._variant_hdf5_file_path,
                                           mode="r")

Exemplo n.º 2

0

Exibir arquivo

def corpus_reader(
        path: str,
        from_memory: bool = False
) -> Union[Tuple[str, None], Tuple[None, str]]:
    """Function to read corpus text file.
    
    Args:
      path: Path to the file.
      from_memory: To "read" from memory
    
    Returns:
      Corpus text and error string in case of any.
    
    Raises:
      IOError: Occurred on reading/unpacking error.
    """
    try:
        if from_memory:
            return path.read(), None
        else:
            if path.endswith(".gz"):
                with gzip_open(path, 'rb') as f:
                    return f.read(), None
            else:
                with open(path, 'r', encoding='utf-8') as f:
                    return f.read(), None
    except IOError as ex:
        return None, ex

Exemplo n.º 3

0

Exibir arquivo

Arquivo: utils.py Projeto: PedroMTQ/mantis

def gunzip(source_filepath,
           dest_filepath=None,
           block_size=65536,
           remove_source=False,
           stdout_file=None):
    if not dest_filepath:
        dest_filepath = source_filepath.strip('.gz')
    if os.path.isdir(dest_filepath):
        file_name = source_filepath.split(SPLITTER)[-1].replace('.gz', '')
        dest_filepath = add_slash(dest_filepath) + file_name
    print('Gunzipping ',
          source_filepath,
          'to',
          dest_filepath,
          flush=True,
          file=stdout_file)
    with gzip_open(source_filepath, 'rb') as s_file, \
            open(dest_filepath, 'wb') as d_file:
        while True:
            block = s_file.read(block_size)
            if not block:
                break
            else:
                d_file.write(block)
        d_file.write(block)
    if remove_source: os.remove(source_filepath)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: walker.py Projeto: parallel-p/pesto

 def walk(self, file_name):
     if file_name.endswith('.gz'):
         with gzip_open(file_name) as current_file:
             yield self._get_submit_from_xml(current_file)
     else:
         with open(file_name, encoding='utf-8') as current_file:
             yield self._get_submit_from_xml(current_file)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: uniprot.py Projeto: cancerregulome/multiscale-mutation-hotspots

def read_file(input_file_path):
    infile = None
    if input_file_path.endswith('.gz'):
        infile = gzip_open(input_file_path)
    else:
        infile = open(input_file_path, 'r+b')

    count = 0
    total_records = 0
    chunk = StringIO()
    for line in infile:
        parsed = None
        if len(UNIPROT_RECORD_TERMINATOR.findall(line)) == 0:
            chunk.write(line)
        else:
            count += 1
            if count >= PRINT_LIMIT:
                total_records += count
                count = 0
                logging.info("Processed " + str(total_records))

            try:
                parsed = parse_chunk(chunk)
            except Exception as e:
                e.message
                pass

            chunk = StringIO()
            if parsed is None:
                continue

            yield parsed

    infile.close()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: fetch.py Projeto: chwnam/lifemotif-diem

def fetch_and_archive(service, email, archive_path, mid_list):

    logger.info(
        'fetch_and_archive started. email: %s, archive_path: %s, mid_list: %d message(s)' %
        (email, archive_path, len(mid_list))
    )

    if path_isabs(archive_path):
        output_dir = realpath(archive_path)
    else:
        output_dir = realpath(expanduser(path_join(getcwd(), archive_path)))

    count = 0
    error = 0

    for mid in mid_list:

        file_name = path_join(output_dir, ('%x.gz' % mid))
        message = fetch_mail(service, email, mid)

        if not message:
            error += 1
            continue

        with gzip_open(file_name, 'wb') as f:
            f.write(urlsafe_b64decode(message['raw']))
            logger.debug('Message id %x gzipped to %s.' % (mid, file_name))

        count += 1

    logger.info('fetch_and_archive completed. Total %d item(s) saved. Error %d item(s).' % (count, error))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: containers.py Projeto: ndeangel/digicampipe

def load_from_pickle_gz(file):
    file = gzip_open(file, "rb")
    while True:
        try:
            yield pickle.load(file)
        except (EOFError, pickle.UnpicklingError):
            return

Exemplo n.º 8

0

Exibir arquivo

def read_file(input_file_path):
    infile = None
    if input_file_path.endswith('.gz'):
        infile = gzip_open(input_file_path)
    else:
        infile = open(input_file_path, 'r+b')

    count = 0
    total_records = 0
    chunk = StringIO()
    for line in infile:
        parsed = None
        if len(UNIPROT_RECORD_TERMINATOR.findall(line)) == 0:
            chunk.write(line)
        else:
            count += 1
            if count >= PRINT_LIMIT:
                total_records += count
                count = 0
                logging.info("Processed " + str(total_records))

            try:
                parsed = parse_chunk(chunk)
            except Exception as e:
                e.message
                pass

            chunk = StringIO()
            if parsed is None:
                continue

            yield parsed

    infile.close()

Exemplo n.º 9

0

Exibir arquivo

Arquivo: update.py Projeto: matsimon/univention-corporate-server

 def _load_index_json(self, app_cache):
     index_json_gz_filename = os.path.join(app_cache.get_cache_dir(),
                                           '.index.json.gz')
     self._verify_file(index_json_gz_filename)
     with gzip_open(index_json_gz_filename, 'rb') as fgzip:
         content = fgzip.read()
         return loads(content)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: protein2ipr.py Projeto: hrk2109/multiscale-mutation-hotspots

def read_file(input_file_path):
    infile = None
    if input_file_path.endswith('.gz'):
        infile = gzip_open(input_file_path)
    else:
        infile = open(input_file_path, 'r+b')

    count = 0
    total_records = 0

    csv_reader = reader(infile, delimiter='\t')

    for line in csv_reader:
        count += 1
        if count >= PRINT_LIMIT:
            total_records += count
            count = 0
            logging.info("Processed " + str(total_records))

        item = {}
        for index, field in enumerate(FIELDNAMES):
            item[field] = line[index]

        item['start'] = int(item['start'])
        item['end'] = int(item['end'])
        yield item

    infile.close()

Exemplo n.º 11

0

Exibir arquivo

def merge_contents(filelist):
    """
    Merges a list of Contents files and returns a dict of the merged files
    """
    pkgs = {}
    for i in filelist:
        if i and isfile(i):
            cfile = gzip_open(i).read()
            cfile = cfile.decode('utf-8')
            contents = cfile.split('\n')

            header = False
            for line in contents:
                if line.startswith('This file maps each file'):
                    header = True
                if line.startswith('FILE'):
                    header = False
                    continue

                if line != '' and not header:
                    sin = line.split()
                    if sin[-1] not in pkgs.keys():
                        pkgs[sin[-1]] = []
                    pkgs[sin[-1]].append(' '.join(sin[:-1]))
    return pkgs

Exemplo n.º 12

0

Exibir arquivo

    def _extract_memory_info(self, dump_pathname, processor_notes):
        """Extract and return the JSON data from the .json.gz memory report.
        file"""
        def error_out(error_message):
            processor_notes.append(error_message)
            return {"ERROR": error_message}

        try:
            fd = gzip_open(dump_pathname, "rb")
        except IOError as x:
            error_message = "error in gzip for %s: %r" % (dump_pathname, x)
            return error_out(error_message)

        try:
            memory_info_as_string = fd.read()
            if len(memory_info_as_string) > self.config.max_size_uncompressed:
                error_message = (
                    "Uncompressed memory info too large %d (max: %d)" % (
                        len(memory_info_as_string),
                        self.config.max_size_uncompressed,
                    )
                )
                return error_out(error_message)

            memory_info = json_loads(memory_info_as_string)
        except IOError as x:
            error_message = "error in gzip for %s: %r" % (dump_pathname, x)
            return error_out(error_message)
        except ValueError as x:
            error_message = "error in json for %s: %r" % (dump_pathname, x)
            return error_out(error_message)
        finally:
            fd.close()

        return memory_info

Exemplo n.º 13

0

Exibir arquivo

Arquivo: stardict.py Projeto: hinesmr/mica

    def __init__(self, db, filename, compressed = False, index_offset_bits = 32):
        """
        
        Arguments:
        - `filename`: the filename of .idx file of stardict.
        - `compressed`: indicate whether the .idx file is compressed.
        - `index_offset_bits`: the offset field length in bits.
        """
        self.db = db
        self._offset = 0

        s = self.db["_word_idx"].select().limit(1)
        rs = s.execute()
        result = rs.fetchone()

        if result is None :
            self._size = getsize(filename)
            if compressed:
                self.fh = gzip_open(filename, "rb")
            else:
                self.fh = open(filename, "rb")

            self._index = 0
            self._index_offset_bits = index_offset_bits
            #self.db["_word_idx"] = OOBTree()
            #self.db["_index_idx"] = OOBTree()
            trans = self.db["conn"].begin()
            for word_str, word_data_offset, word_data_size, index in self:
                #self.db["_index_idx"][self._index - 1] = (word_str, word_data_offset, word_data_size)

                i = self.db["_index_idx"].insert().values(idx = self._index - 1,
                          word_str = word_str.decode("utf-8"),
                          word_data_offset = word_data_offset,
                          word_data_size = word_data_size)

                self.db["conn"].execute(i)
                #if word_str not in self.db["_word_idx"]:
                #    self.db["_word_idx"][word_str] = []
                #self.db["_word_idx"][word_str].append(self._index - 1)
                s = self.db["_word_idx"].select().where(self.db["_word_idx"].c.word_str == word_str.decode("utf-8"))
                rs = s.execute()
                result = rs.fetchone()
                t = time()
                if result is None :
                    i = self.db["_word_idx"].insert().values(word_str = word_str.decode("utf-8"), idx = str([]))
                    self.db["conn"].execute(i)
                    rs = s.execute()
                    result = rs.fetchone()

                newlist = eval(result[1])
                newlist.append(self._index - 1)
                j = self.db["_word_idx"].update().values(idx = str(newlist)).where(self.db["_word_idx"].c.word_str == word_str.decode("utf-8"))

                self.db["conn"].execute(j)

            trans.commit()
            del self._index_offset_bits

            mdebug("There were " + str(self._offset) + " total words.")

Exemplo n.º 14

0

Exibir arquivo

Arquivo: cif_gz_stream.py Projeto: NatureGeorge/pdb-profiling

def full_io(url, path, remove=True):
    path = UnsyncFetch.fetch_file(semaphore, 'get', dict(url=url), path,
                                  1).result()
    with gzip_open(path, 'rt') as handle:
        mmcif_dict = MMCIF2DictPlus(handle, ('_pdbe_chain_remapping.', ))
    if remove:
        path.unlink()
    return mmcif_dict

Exemplo n.º 15

0

Exibir arquivo

def main():

    # parse the command line

    requireEof = True
    markEndOfFile = False
    filenames = []

    for arg in argv[1:]:
        if (arg in ["--noendmark", "--noeof", "--nomark"]):  # (unadvertised)
            requireEof = False
        elif (arg in ["--markend]", "--markeof"]):
            requireEof = False
            markEndOfFile = True
        else:
            filenames += [arg]

    if (filenames == []):
        usage("you have to give me at least one file")

    # copy the files;  note that we don't bother (or care) to verify that they
    # are really output from ncrf

    for (ix, filename) in enumerate(filenames):
        if (ix > 0): print

        eofMarkerSeen = False

        if (filename.endswith(".gz")) or (filename.endswith(".gzip")):
            f = gzip_open(filename, "rt")
        else:
            f = file(filename, "rt")

        for line in f:
            line = line.rstrip("\n")
            if (eofMarkerSeen) and (line != ""):
                exit(
                    "%s: \"%s\" contains additional stuff after end marker (starting with \"%s\")"
                    % (os_path.basename(argv[0]), filename, line[:10]))
            if (line == "# ncrf end-of-file"):
                eofMarkerSeen = True
                markEndOfFile = True
                continue
            if (not eofMarkerSeen):
                try:
                    print line
                except IOError, ex:
                    # "Broken pipe" can happen when downstream tools reject
                    # our output as their input
                    if (ex.errno == EPIPE):
                        exit("%s: [Errno %d] Broken pipe" %
                             (os_path.basename(argv[0]), ex.errno))

        f.close()

        if (requireEof) and (not eofMarkerSeen):
            exit("%s: \"%s\" may have been truncated (end marker is absent)" %
                 (os_path.basename(argv[0]), filename))

Exemplo n.º 16

0

Exibir arquivo

Arquivo: minbias.py Projeto: jordantucker/cmssw-usercode

def _from_dbs_and_cache(fn, ds):
    print 'hitting DBS %s for %s' % (ds, fn)
    from JMTucker.Tools.DBS import files_in_dataset
    files = files_in_dataset(ds, instance='phys03' if ds.endswith('/USER') else 'global')
    with gzip_open(fn, 'w') as f:
        for file in files:
            f.write(file)
            f.write('\n')
    return files

Exemplo n.º 17

0

Exibir arquivo

Arquivo: minbias.py Projeto: jordantucker/cmssw-usercode

def _read_gzip_file(fn):
    print 'getting minbias file list from cache', fn
    files = []
    with gzip_open(fn) as f:
        for line in f:
            line = line.strip()
            if line:
                files.append(line)
    return files

Exemplo n.º 18

0

Exibir arquivo

Arquivo: minbias.py Projeto: pkotamnives/cmssw-usercode

def _read_gzip_file(fn):
    print 'getting minbias file list from cache', fn
    files = []
    with gzip_open(fn) as f:
        for line in f:
            line = line.strip()
            if line:
                files.append(line)
    return files

Exemplo n.º 19

0

Exibir arquivo

Arquivo: minbias.py Projeto: pkotamnives/cmssw-usercode

def _from_dbs_and_cache(fn, ds):
    print 'hitting DBS %s for %s' % (ds, fn)
    from JMTucker.Tools.DBS import files_in_dataset
    files = files_in_dataset(ds, instance='phys03' if ds.endswith('/USER') else 'global')
    with gzip_open(fn, 'w') as f:
        for file in files:
            f.write(file)
            f.write('\n')
    return files

Exemplo n.º 20

0

Exibir arquivo

Arquivo: functions.py Projeto: asassheng/pageant

def trans_gz(gz_file: str, out_dir: str):
    if gz_file.split('.')[-1] == 'gz':
        with gzip_open(gz_file) as fr:
            with open(os.path.join(out_dir, 'prs_data'), 'w') as fw:
                for line in fr:
                    fw.write(line.decode())
        return os.path.join(out_dir, 'prs_data')
    else:
        return gz_file

Exemplo n.º 21

0

Exibir arquivo

    def __init__(self, filename=JSONL_FILENAME):
        """Setup file for reading.

        Parameters
        ----------
        filename : str
            Filename for JSONL file with CVR data.

        """
        self.filename = filename
        if filename.endswith('.gz'):
            self.fid = gzip_open(filename, mode='rt')
        else:
            try:
                self.fid = open(filename)
            except IOError:
                self.fid = gzip_open(filename + '.gz', mode='rt')
        self.line_number = 0

Exemplo n.º 22

0

Exibir arquivo

Arquivo: mozilla_transform_rules.py Projeto: Acidburn0zzz/socorro

 def _extract_memory_info(self, dump_pathname, processor_notes):
     """Extract and return the JSON data from the .json.gz memory report.
     file"""
     try:
         fd = gzip_open(dump_pathname, "rb")
     except IOError, x:
         error_message = "error in gzip for %s: %r" % (dump_pathname, x)
         processor_notes.append(error_message)
         return {"ERROR": error_message}

Exemplo n.º 23

0

Exibir arquivo

 def _extract_memory_info(self, dump_pathname, processor_notes):
     """Extract and return the JSON data from the .json.gz memory report.
     file"""
     try:
         fd = gzip_open(dump_pathname, "rb")
     except IOError, x:
         error_message = "error in gzip for %s: %r" % (dump_pathname, x)
         processor_notes.append(error_message)
         return {"ERROR": error_message}

Exemplo n.º 24

0

Exibir arquivo

 def __init__(self, filename):
     """
     Parameters
     ----------
     filename: str
          full path input file name
     """
     super().__init__(filename)
     self.file_object = gzip_open(filename, 'rb')

Exemplo n.º 25

0

Exibir arquivo

def dump_gps_map(gps_map, pickle_gz_file_path):

    if not pickle_gz_file_path.endswith(".pickle.gz"):

        pickle_gz_file_path += ".pickle.gz"

    with gzip_open(pickle_gz_file_path, mode="wb") as pickle_gz_file:

        dump(gps_map, pickle_gz_file)

Exemplo n.º 26

0

Exibir arquivo

Arquivo: sources.py Projeto: ParsonsRD/ctapipe

 def __init__(self, filename):
     """
     Parameters
     ----------
     filename: str
          full path input file name
     """
     super().__init__(filename)
     self.file_object = gzip_open(filename, 'rb')

Exemplo n.º 27

0

Exibir arquivo

Arquivo: modelbuilder.py Projeto: SNWBRS/ModelManager

    def __init__(self, file):
        tmp = load(gzip_open(file, 'rb'))
        self.__models = tmp['models']
        self.__conf = tmp['config']
        self.__workpath = '.'

        self.Nlim = self.__conf.get('nlim', 1)
        self.TOL = self.__conf.get('tol', 1e10)
        self.__units = self.__conf.get('report_units')
        self.__show_structures = self.__conf.get('show_structures')

Exemplo n.º 28

0

Exibir arquivo

Arquivo: Stash.py Projeto: aniknarayan/ioticiser_new

    def __save(self):
        stashdump = self.__calc_stashdump()
        if stashdump is not None:
            with gzip_open(self.__fname, 'wb') as f:
                f.write(stashdump)

        if len(self.__properties) and self.__properties_changed:
            with self.__stash_lock:
                with open(self.__pname, 'w') as f:
                    json.dump(self.__properties, f)

Exemplo n.º 29

0

Exibir arquivo

Arquivo: file_cache.py Projeto: simao234430/online_judage_site

    def cache_data(self, hash, file, data, url=True, gzip=True):
        if gzip and self.gzip:
            with gzip_open(self.get_path(hash, file + '.gz'), 'wb') as f:
                f.write(data)

        with open(self.get_path(hash, file), 'wb') as f:
            f.write(data)

        if url:
            return self.get_url(hash, file)

Exemplo n.º 30

0

Exibir arquivo

def save_message(mid, message, store_path):
    """
    :type  mid:        int   message id
    :type  message:    str   raw MIME message
    :param store_path: str   path to store
    :return:
    """
    path = path_join(store_path, '%x.gz' % mid)
    with gzip_open(path, 'wb') as f:
        f.write(message)

Exemplo n.º 31

0

Exibir arquivo

def load_message(mid, store_path):
    """
    :type mid:         int    message id
    :type store_path:  str    path to load
    :return:
    """
    path = path_join(store_path, '%x.gz' % mid)
    with gzip_open(path, 'rb') as f:
        message = f.read()
    return message

Exemplo n.º 32

0

Exibir arquivo

Arquivo: rpCache.py Projeto: brsynth/rpCache

 def store_cache_to_file(self, _attr_name, data, gzip=False):
     filename = self.cache_dir + '/' + _attr_name + '.pickle'
     pickle_obj = pickle_dumps(data)
     if gzip:
         filename += '.gz'
         with gzip_open(filename, "wb") as f:
             f.write(pickle_obj)
     else:
         with open(filename, "wb") as f:
             f.write(pickle_obj)

Exemplo n.º 33

0

Exibir arquivo

Arquivo: bin_step04_quantization_test.py Projeto: rbergmair-utopiarefraktor/recruitment_challenge

def step04( datadir ):

  uvalues_by_dim = {};


  with gzip_open( datadir+"/train_trn.tsv.gz", "rt" ) as f:

    firstline = f.readline();
    if firstline and firstline[-1] == '\n':
      firstline = firstline[:-1];
    firstline = firstline.split( '\t' );

    assert \
         firstline \
      == (   [ '"id"', '"y"', '"cId"' ]
           + [ '"x{}"'.format(i) for i in range(1,101) ] );

    for line in f:

      if line and line[-1] == '\n':
        line = line[:-1];
      line = line.split( '\t' );

      id_ = line[0];
      y = line[1];
      cid = line[2];
      x = [ None ] + line[3:];

      for dim in range(1,101):

        if not dim in uvalues_by_dim:
          uvalues_by_dim[ dim ] = set();
        if len( uvalues_by_dim[dim] ) < 500:
          uvalues_by_dim[ dim ].add( x[dim] );

  dim_by_uvalues = [];
  binary = [];
  for ( dim, uvalues ) in uvalues_by_dim.items():
    if len( uvalues ) < 5:
      uvalues_ = uvalues;
    else:
      uvalues_ = None;
    if len( uvalues ) == 2:
      binary.append( dim );
    dim_by_uvalues.append( ( len(uvalues), dim, uvalues_ ) );


  with open( datadir+"/step04.txt", "wt" ) as out:

    for ( uvalues, dim, uvalues_ ) in sorted( dim_by_uvalues ):
      print( "{:3d} {:7d} {:s}".format( dim, uvalues, repr(uvalues_) ) );
      print( "{:3d} {:7d} {:s}".format( dim, uvalues, repr(uvalues_) ), file=out );

    print( "-->", repr(binary) );
    print( "-->", repr(binary), file=out );

Exemplo n.º 34

0

Exibir arquivo

def gzip_file(source_path, archive_path):
    """
    Create a gzip compressed archive of ``source_path`` at ``archive_path``.
    An empty archive file will be created if the source file does not exist.
    This gives the diagnostic archive a consistent set of files which can
    easily be tested.
    """
    with gzip_open(archive_path, 'wb') as archive:
        if os.path.isfile(source_path):
            with open(source_path, 'rb') as source:
                copyfileobj(source, archive)

Exemplo n.º 35

0

Exibir arquivo

Arquivo: test_workspace.py Projeto: MarcGrotheer/core

 def test_downsample_16bit_image(self):
     with pushd_popd(tempdir=True) as tempdir:
         with gzip_open(join(dirname(__file__), 'data/OCR-D-IMG_APBB_Mitteilungen_62.0002.tif.gz'), 'rb') as gzip_in:
             with open('16bit.tif', 'wb') as tif_out:
                 tif_out.write(gzip_in.read())
         ws = self.resolver.workspace_from_nothing(directory=tempdir)
         ws.add_file('IMG', ID='foo', url='16bit.tif', mimetype='image/tiff', pageId=None)
         pil_before = Image.open('16bit.tif')
         assert pil_before.mode == 'I;16'
         pil_after = ws._resolve_image_as_pil('16bit.tif')
         assert pil_after.mode == 'L'

Exemplo n.º 36

0

Exibir arquivo

Arquivo: package.py Projeto: parazyd/amprolla

def load_packages_file(filename):
    """
    Load a gzip'd packages file.
    Returns a dictionary of package name and package key-values.
    """
    # TODO: should we skip files like this if they don't exist?
    if filename is not None and isfile(filename):
        packages_contents = gzip_open(filename).read()
        packages_contents = packages_contents.decode('utf-8')
        return parse_packages(packages_contents)

    return None

Exemplo n.º 37

0

Exibir arquivo

Arquivo: file_io.py Projeto: hltcoe/concrete-python

    def open(self, filename):
        """
        Open specified file for writing.  File will be compressed
        if the gzip flag of the constructor was set to True.

        Args:
            filename (str): path to file to open for writing
        """
        if self.gzip:
            self.file = gzip_open(filename, 'wb')
        else:
            self.file = open(filename, 'wb')

Exemplo n.º 38

0

Exibir arquivo

	def _uncompress_archive(self, app_cache, local_archive):
		try:
			with gzip_open(local_archive) as zipped_file:
				archive_content = zipped_file.read()
				with open(os.path.join(app_cache.get_cache_dir(), '.all.tar'), 'wb') as extracted_file:
					extracted_file.write(archive_content)
		except (zlib.error, EnvironmentError) as exc:
			self.warn('Error while reading %s: %s' % (local_archive, exc))
			return False
		else:
			self._extract_archive(app_cache)
			return True

Exemplo n.º 39

0

Exibir arquivo

Arquivo: update.py Projeto: venkatesh87/ucs-management

	def _load_index_json(self, app_cache):
		index_json_gz_filename = os.path.join(app_cache.get_cache_dir(), '.index.json.gz')
		if not ucr_is_false('appcenter/index/verify'):
			detached_sig_path = index_json_gz_filename + '.gpg'
			(rc, gpg_error) = gpg_verify(index_json_gz_filename, detached_sig_path)
			if rc:
				if gpg_error:
					self.fatal(gpg_error)
				raise Abort('Signature verification for %s failed' % index_json_gz_filename)
		with gzip_open(index_json_gz_filename, 'rb') as fgzip:
			content = fgzip.read()
			return loads(content)

Exemplo n.º 40

0

Exibir arquivo

Arquivo: file_io.py Projeto: hltcoe/concrete-python

    def open(self, filename):
        """
        Open specified file for writing.  File will be compressed
        if the gzip flag of the constructor was set to True.

        Args:
            filename (str): path to file to open for writing
        """
        if self.gzip:
            self.file = gzip_open(filename, 'wb')
        else:
            self.file = open(filename, 'wb')

Exemplo n.º 41

0

Exibir arquivo

Arquivo: fetch.py Projeto: chwnam/lifemotif-diem

def get_archive(mid, archive_path):

    if path_isabs(archive_path):
        archive_dir = realpath(archive_path)
    else:
        archive_dir = realpath(expanduser(path_join(getcwd(), archive_path)))

    path = path_join(archive_dir, '%x.gz' % mid)

    with gzip_open(path, 'rb') as f:
        mime = f.read()

    logger.debug('Archive \'%s\' extracted successfully. %d bytes' % (path, len(mime)))

    return mime

Exemplo n.º 42

0

Exibir arquivo

Arquivo: stardict.py Projeto: hinesmr/mica

    def __init__(self, filename, dict_ifo, dict_index, compressed = False):
        """Constructor.
        
        Arguments:
        - `filename`: filename of .dict file.
        - `dict_ifo`: IfoFileReader object.
        - `dict_index`: IdxFileReader object.
        """
        self._dict_ifo = dict_ifo
        self._dict_index = dict_index
        self._compressed = compressed
        self._offset = 0

        if self._compressed:
            self.fh = gzip_open(filename, "rb")
        else:
            self.fh = open(filename, "rb")

Exemplo n.º 43

0

Exibir arquivo

Arquivo: util.py Projeto: joey711/biom-format

def biom_open(fp, permission='U'):
    """Wrapper to allow opening of gzipped or non-compressed files
    
    Read or write the contents of a file

    file_fp : file path
    permission : either 'r','w','a'

    If the file is binary, be sure to pass in a binary mode (append 'b' to
    the mode); opening a binary file in text mode (e.g., in default mode 'U')
    will have unpredictable results.
    
    This code was copied from QIIME (www.qiime.org).
    """
    if is_gzip(fp):
        return gzip_open(fp,'rb')
    else:
        return open(fp, permission)

Exemplo n.º 44

0

Exibir arquivo

Arquivo: util.py Projeto: Jorge-C/biom-format

def biom_open(fp, permission='U'):
    """Wrapper to allow opening of gzipped or non-compressed files

    Read or write the contents of a file

    file_fp : file path
    permission : either 'r','w','a'

    If the file is binary, be sure to pass in a binary mode (append 'b' to
    the mode); opening a binary file in text mode (e.g., in default mode 'U')
    will have unpredictable results.

    This function is ported from QIIME (http://www.qiime.org), previously named
    qiime_open. QIIME is a GPL project, but we obtained permission from the
    authors of this function to port it to the BIOM Format project (and keep it
    under BIOM's BSD license).
    """
    if is_gzip(fp):
        return gzip_open(fp, 'rb')
    else:
        return open(fp, permission)

Exemplo n.º 45

0

Exibir arquivo

Arquivo: serializer.py Projeto: TarekHC/ctapipe

 def __init__(self, outfile, mode='x'):
     """
     Parameters
     ----------
     outfile:  Unicode
         full path output file name
     mode: str
         'w'	open for writing, truncating the file first
         'x'	open for exclusive creation, failing if the file already exists
         'a'	open for writing, appending to the end of the file if it exists
     Raises
     ------
     FileNotFoundError: When the file cannot be opened
     FileExistsError: when infile exist and mode is x
     """
     super().__init__(outfile)
     mode += 'b'
     try:
         self.file_object = gzip_open(outfile, mode)
     except FileExistsError:
         raise FileExistsError('file exists: {} and mode is {}'.
                               format(outfile, mode))

Exemplo n.º 46

0

Exibir arquivo

Arquivo: da_read.py Projeto: rbergmair-utopiarefraktor/recruitment_challenge

def da_read( fn ):

  assert isfile( fn );

  with gzip_open( fn, "rt" ) as f:

    firstline = f.readline();

    if firstline and firstline[-1] == '\n':
      firstline = firstline[:-1];
    firstline = firstline.split( '\t' );

    has_y = None;

    if firstline[:3] == [ '"id"', '"y"', '"cId"' ]:
      has_y = True;
    elif firstline[:3] == [ '"id"', '"cId"', '"x1"' ]:    
      has_y = False;
    else:
      assert False;

    if has_y:
      assert \
           firstline \
        == (   [ '"id"', '"y"', '"cId"' ]
             + [ '"x{}"'.format(i) for i in range(1,101) ] );
    else:
      assert \
           firstline \
        == (   [ '"id"', '"cId"' ]
             + [ '"x{}"'.format(i) for i in range(1,101) ] );

    x_check = {};

    for line in f:

      if line and line[-1] == '\n':
        line = line[:-1];
      line = line.split( '\t' );

      id_ = line[0];
      id_ = int( id_ );

      if has_y:
        y = line[1];
        assert y in [ "0", "1" ];
        y = int( y );
        rest = line[2:];
      else:
        rest = line[1:];

      c = rest[0];

      assert c[0] == '"';
      assert c[-1] == '"';
      c = int( c[1:-1] );

      b = [];
      x = [];

      for i in range( 1, len(rest) ):

        try:

          val = rest[i];

          if i in BINARY_FEATs:

            assert val in [ "0", "1" ];
            val = int(val)
            b.append( val );
            continue;

          if not '.' in val:
            val = val+'.';
          val = val.split( '.' );

          assert \
            ( ( val[0][0] == '-' ) and ( len(val[0]) == 2 ) ) \
                  or ( ( val[0][0] != '-' ) and ( len(val[0]) == 1 ) );
          assert \
            len( val[1] ) <= 3;

          while len( val[1] ) < 3:
            val[1] = val[1] + '0';

          assert \
            len( val[1] ) == 3;

          if val[0][0] == '-':
            val = - int( val[0][1:] ) * 1000 - int( val[1] );
          else:
            val = int( val[0] ) * 1000 + int( val[1] );          

          assert ( float(val) / 1000.0 ) == float(rest[i]);

          x_check_ = x_check.get( i, set() );
          if len( x_check_ ) < 3:
            x_check_.add( val );
            x_check[ i ] = x_check_;

          x.append( val );

        except:

          print( repr(val), rest[i] );
          raise;

      if has_y:
        yield ( id_, y, [c], b, x );
      else:
        yield ( id_, None, [c], b, x );

    for v in x_check.values():
      assert len( v ) > 2;

Exemplo n.º 47

0

Exibir arquivo

Arquivo: bin_step13_explore_this_further.py Projeto: rbergmair-utopiarefraktor/recruitment_challenge

def step13( datadir ):

  with gzip_open( datadir+"/train.tsv.gz", "rt" ) as f:

    firstline = f.readline();

    if firstline and firstline[-1] == '\n':
      firstline = firstline[:-1];
    firstline = firstline.split( '\t' );

    assert \
         firstline \
      == (   [ '"id"', '"y"', '"cId"' ]
           + [ '"x{}"'.format(i) for i in range(1,101) ] );

    pos_valsx = [];
    neg_valsx = [];
    pos_rndx = [];
    neg_rndx = [];

    pos_valsx_ = [];
    neg_valsx_ = [];
    pos_rndx_ = [];
    neg_rndx_ = [];

    i = 1;
    for line in f:

      i += 1;
      if i > 10000:
        break;

      line_ = line;

      if line and line[-1] == '\n':
        line = line[:-1];
      line = line.split( '\t' );

      id_ = line[0];
      y = line[1];
      cid = line[2];
      x = [ None ];
      assert cid.startswith( '"' );
      assert cid.endswith( '"' );
      cid = int( cid[1:-1] );

      for x_ in line[3:]:
        x.append( float(x_) )

      relevant_x = x[cid];
      random_x = choice( x[1:] );

      if y == '0':

        neg_valsx.append(relevant_x);
        if relevant_x not in [ 0.0, 1.0 ]:
          neg_valsx_.append(relevant_x);

        neg_rndx.append(random_x);
        if random_x not in [ 0.0, 1.0 ]:
          neg_rndx_.append(random_x);

      elif y == '1':

        pos_valsx.append(relevant_x);
        if relevant_x not in [ 0.0, 1.0 ]:
          pos_valsx_.append(relevant_x);

        pos_rndx.append(random_x);
        if random_x not in [ 0.0, 1.0 ]:
          pos_rndx_.append(random_x);

  ( fig, ax ) = plt.subplots( nrows=2, ncols=2, figsize=(6,6) );
  ax[0,0].hist( [ neg_valsx, pos_valsx ], 100, histtype='step', color='br', linewidth=3 );  
  ax[0,1].hist( [ neg_rndx, pos_rndx ], 100, histtype='step', color='br', linewidth=3 );  
  ax[1,0].hist( [ neg_valsx_, pos_valsx_ ], 100, histtype='step', color='br', linewidth=3 );  
  ax[1,1].hist( [ neg_rndx_, pos_rndx_ ], 100, histtype='step', color='br', linewidth=3 );  
  fig.savefig( datadir+'/step13.png' );

Exemplo n.º 48

0

Exibir arquivo

Arquivo: file_io.py Projeto: hltcoe/concrete-python

    def __init__(self, thrift_type, filename,
                 postprocess=None, filetype=FileType.AUTO):
        """
        Args:
            thrift_type: Class for Thrift type, e.g. Communication, TokenLattice
            filename (str):
            postprocess (function): A post-processing function that is called
                with the Thrift object as argument each time a Thrift object
                is read from the file
            filetype (FileType): Expected type of file.  Default value is
                `FileType.AUTO`, where function will try to automatically
                determine file type.

        Raises:
            ValueError: if filetype is not a known filetype name or id
        """
        filetype = FileType.lookup(filetype)

        self._seek_supported = True

        self._thrift_type = thrift_type
        if postprocess is None:
            def _noop(obj):
                return
            self._postprocess = _noop
        else:
            self._postprocess = postprocess
        self._source_filename = filename

        if filetype == FileType.TAR:
            self.filetype = 'tar'
            self.tar = tarfile.open(filename, 'r|')

        elif filetype == FileType.TAR_GZ:
            self.filetype = 'tar'
            self.tar = tarfile.open(filename, 'r|gz')

        elif filetype == FileType.TAR_BZ2:
            self.filetype = 'tar'
            self.tar = tarfile.open(filename, 'r|bz2')

        elif filetype == FileType.ZIP:
            self.filetype = 'zip'
            self.zip = zipfile.ZipFile(filename, 'r')
            self.zip_infolist = self.zip.infolist()
            self.zip_infolist_index = 0

        elif filetype == FileType.STREAM:
            self.filetype = 'stream'
            f = open(filename, 'rb')

        elif filetype == FileType.STREAM_GZ:
            self.filetype = 'stream'
            f = gzip_open(filename, 'rb')

        elif filetype == FileType.STREAM_BZ2:
            self.filetype = 'stream'
            f = bz2.BZ2File(filename, 'r')

        elif filetype == FileType.AUTO:
            if tarfile.is_tarfile(filename):
                self.filetype = 'tar'
                self.tar = tarfile.open(filename, 'r|*')

            elif zipfile.is_zipfile(filename):
                self.filetype = 'zip'
                self.zip = zipfile.ZipFile(filename, 'r')
                self.zip_infolist = self.zip.infolist()
                self.zip_infolist_index = 0

            elif mimetypes.guess_type(filename)[1] == 'gzip':
                # this is not a true stream---is_tarfile will have
                # successfully seeked backwards on the file if we have
                # reached this point
                self.filetype = 'stream'
                f = gzip_open(filename, 'rb')

            elif mimetypes.guess_type(filename)[1] == 'bzip2':
                # this is not a true stream
                self.filetype = 'stream'
                f = bz2.BZ2File(filename, 'r')

            else:
                # this is not a true stream
                self.filetype = 'stream'
                f = open(filename, 'rb')

        else:
            raise ValueError('unknown filetype %d' % filetype)

        if self.filetype == 'stream':
            self.transport = TTransport.TFileObjectTransport(f)
            self.protocol = factory.createProtocol(self.transport)
            self.transport.open()

Exemplo n.º 49

0

Exibir arquivo

Arquivo: runs.py Projeto: aashaqshah/cmssw-1

            n = getattr(new, name)
            print 'runs in old not in new:'
            print sorted(o-n)
            l = sorted(x for x in n-o if x < 157460)
            print 'runs in new not in old before 2011: #', len(l), 'min', l[0] if len(l) else None, 'max', l[-1] if len(l) else None
            l = sorted(x for x in n-o if x >= 157460)
            print 'runs in new not in old during 2011: #', len(l), 'min', l[0] if len(l) else None, 'max', l[-1] if len(l) else None
            print
        sys.exit(0)
    elif 'make_lists' in sys.argv:
        from datetime import datetime
        from gzip import open as gzip_open
        from MuonAnalysis.Cosmics.runregistry import RunRegistryHelper

        epoch = min_time = datetime(2010, 2, 1)
        rrh = RunRegistryHelper(gzip_open('download.xml.gz')) # Get this from the run registry Table->Get Data->Generate... then Table->Get Data->Export->XML (all).

        kinds = [
            ('cosmics', ['Cosmic10', 'Cosmics10', 'Cosmics11']),
            ('commissioning', ['BeamCommissioning10', 'BeamCommisioning10', 'Commissioning', 'Commissioining10', 'Commisioning10', 'Commissioning10', 'Commissioning11']), # "commissioning" is hard to spell
            ('collisions', ['Collisions10', 'PostCollisions10', 'Collisions11']),
            ]

        for kind_label, kind_groups in kinds:
            for det in ['dt', 'csc', 'strip', 'pix', 'rpc']:
                runs = rrh.get_good_runs([det.upper()], min_time, kind_groups)
                print '%s_runs_%s = set(%s)' % (kind_label, det, repr(runs).replace(' ', ''))
            print
    elif 'dump_trigger_menus' in sys.argv:
        from gzip import open as gzip_open
        from MuonAnalysis.Cosmics.runregistry import RunRegistryHelper

Exemplo n.º 50

0

Exibir arquivo

Arquivo: _predict.py Projeto: nlhepler/idepi

def main(args=None):
    if args is None:
        args = sys.argv[1:]

    np.seterr(all='raise')

    parser, ns, args = init_args(description='Predict label for unlabeled sequences', args=args)

    parser = hmmer_args(parser)

    parser.add_argument('MODEL', type=PathType)
    parser.add_argument('SEQUENCES', type=PathType)

    ARGS = parse_args(parser, args, namespace=ns)

    with gzip_open(ARGS.MODEL, 'rb') as fh:
        try:
            model = pickle_load(fh)
            if model[0] != MODEL_VERSION:
                raise ImportError('incompatible model version')
            ARGS.ENCODER, ARGS.LABEL, hmm, extractor, clf = model[1:]
        except ImportError:
            msg = 'your model is not of the appropriate version, please re-learn your model'
            raise RuntimeError(msg)

    # create a temporary file wherein space characters have been removed
    with open(ARGS.SEQUENCES) as seq_fh:

        def seqrecords():
            is_dna = ARGS.ENCODER == DNAEncoder
            seq_fmt = seqfile_format(ARGS.SEQUENCES)
            source = Verifier(SeqIO.parse(seq_fh, seq_fmt), DNAAlphabet)
            try:
                for record in source:
                    yield record if is_dna else translate(record)
            except VerifyError:
                if is_dna:
                    msg = (
                        "your model specifies a DNA encoding "
                        "which is incompatible with protein sequences"
                        )
                    raise RuntimeError(msg)
                source.set_alphabet(AminoAlphabet)
                for record in source:
                    yield record

        try:
            fd, tmphmm = mkstemp(); close(fd)
            with open(tmphmm, 'wb') as hmm_fh:
                hmm_fh.write(hmm)
                # explicitly gc hmm
                hmm = None
            tmpaln = generate_alignment_(seqrecords(), tmphmm, ARGS)
            alignment = load_stockholm(tmpaln, trim=True)
        finally:
            if exists(tmphmm):
                remove(tmphmm)
            if exists(tmpaln):
                remove(tmpaln)

    X = extractor.transform(alignment)
    y = clf.predict(X)

    feature_names = extractor.get_feature_names()
    support = clf.named_steps['mrmr'].support_
    labels = ['"{0:s}"'.format(feature_names[i]) for i, s in enumerate(support) if s]
    emptys = [' ' * (len(label) + 2) for label in labels]
    idlen = max(len(r.id) for r in alignment) + 3

    print('{{\n  "label": "{0:s}",\n  "predictions": ['.format(ARGS.LABEL), file=ARGS.OUTPUT)
    for i, r in enumerate(alignment):
        if i > 0:
            print(',')
        features = ['[ ']
        for j, x in enumerate(X[i, support]):
            if x:
                features.append(labels[j])
                features.append(', ')
            else:
                features.append(emptys[j])
        features.append(' ]')
        # replace the last comma with a space
        idx = None
        for k, f in enumerate(features):
            if f == ', ':
                idx = k
        if idx is None:
            features[0] = features[0].rstrip()
            features[-1] = features[-1].lstrip()
        else:
            features[idx] = ''
        features_ = ''.join(features)
        print(
            '    {{{{ "id": {{0:<{0:d}s}} "value": {{1: d}}, "features": {{2:s}} }}}}'.format(
                idlen).format('"{0:s}",'.format(r.id), y[i], features_),
            file=ARGS.OUTPUT, end='')
    print('\n  ]\n}', file=ARGS.OUTPUT)

    finalize_args(ARGS)

    return 0

Exemplo n.º 51

0

Exibir arquivo

Arquivo: util.py Projeto: biocore/biom-format

 def opener(fp, mode):
     codecs.getwriter('utf-8')(gzip_open(fp, mode))

Exemplo n.º 52

0

Exibir arquivo

Arquivo: bin_step05_do_some_more_stats.py Projeto: rbergmair-utopiarefraktor/recruitment_challenge

def step05( datadir ):

  stats_by_b = {};
  stats_by_cid_b = {};

  with gzip_open( datadir+"/train_trn.tsv.gz", "rt" ) as f:

    firstline = f.readline();
    if firstline and firstline[-1] == '\n':
      firstline = firstline[:-1];
    firstline = firstline.split( '\t' );

    assert \
         firstline \
      == (   [ '"id"', '"y"', '"cId"' ]
           + [ '"x{}"'.format(i) for i in range(1,101) ] );

    for line in f:

      if line and line[-1] == '\n':
        line = line[:-1];
      line = line.split( '\t' );

      id_ = line[0];
      y = line[1];
      cid = line[2];

      assert cid[0] == '"';
      assert cid[-1] == '"';
      cid = int( cid[1:-1] );

      x = [ None ];
      b = [];

      for i in range( 3, len(line) ):
        if (i-2) in BINARY_FEATs:
          b.append( line[i] );
        else:
          x.append( line[i] );

      b_ = 0;
      for i in range( 0, len(b) ):
        if b[i] == '0':
          b_i = 0;
        elif b[i] == '1':
          b_i = 1;
        else:
          assert False;
        b_ |= b_i << i;

      (total,pos) = stats_by_b.get( b_, (0,0) );
      total += 1;
      if y == '1':
        pos += 1;        
      stats_by_b[ b_ ] = ( total, pos );

      (total,pos) = stats_by_cid_b.get( (cid,b_), (0,0) );
      total += 1;
      if y == '1':
        pos += 1;        
      stats_by_cid_b[ (cid,b_) ] = ( total, pos );


  with open( datadir+"/step05.txt", "wt" ) as out:

    for b in sorted( stats_by_b ):

      ( total, pos ) = stats_by_b[ b ];
      p = float(pos) / float(total);

      # print( "{:20s};{:7d};{:7d};{:1.4f}".format( hex(b), pos, total, p ) );
      print( "{:20s};{:7d};{:7d};{:1.4f}".format( hex(b), pos, total, p ), file=out );

    print( "-->", len(stats_by_b) );

    for (cid,b) in sorted( stats_by_cid_b ):

      ( total, pos ) = stats_by_cid_b[ (cid,b) ];
      p = float(pos) / float(total);

      # print( "{:20s};{:7d};{:7d};{:1.4f}".format( hex(cid)+'.'+hex(b), pos, total, p ) );
      print( "{:20s};{:7d};{:7d};{:1.4f}".format( hex(cid)+'.'+hex(b), pos, total, p ), file=out );

    print( "-->", len(stats_by_cid_b) );

Exemplo n.º 53

0

Exibir arquivo

Arquivo: _learn.py Projeto: nlhepler/idepi

 def GzipType(string):
     try:
         return gzip_open(string, 'wb')
     except:
         return ArgumentTypeError("cannot open '{0:s}' for writing".format(string))

Exemplo n.º 54

0

Exibir arquivo

Arquivo: bench_transformSamples.py Projeto: Jorge-C/biom-format

#!/usr/bin/env python

from sys import argv
from gzip import open as gzip_open
from biom.parse import parse_biom_table

if __name__ == '__main__':
    table = parse_biom_table(gzip_open(argv[1]))
    foo = table.transformSamples(lambda x, y, z: x)

Exemplo n.º 55

0

Exibir arquivo

Arquivo: runregistry.py Projeto: aashaqshah/cmssw-1

            # use the ratings in the newest created dataset
            datasets.sort(key=lambda ds: self.parse_timestamp(ds.find('CREATE_TIME').text))
            ds = datasets[-1]
            cmps = ds.find('CMPS').findall('CMP')

            if all(self.is_good(cmps, subdet) for subdet in subdets):
                good.append(run_number)

        good.sort()
        return good

if __name__ == '__main__':
    from gzip import open as gzip_open
    min_time = datetime(2010, 2, 1)
    rrh = RunRegistryHelper(gzip_open('download.xml.gz'))
    dt_st = rrh.get_good_runs(['DT', 'STRIP'], min_time)
    dt_px_st = rrh.get_good_runs(['DT', 'PIX', 'STRIP'], min_time)

    # "Histogram" of GROUP_NAMEs used.
    from collections import defaultdict
    from pprint import pprint
    d = defaultdict(list)
    for run in rrh.runs:
        d[rrh.group_name(run)].append((rrh.run_number(run), rrh.start_time(run)))
    to_show = 8
    for k in sorted(d.keys()):
        v = d[k]
        v.sort()
        print k
        if len(v) > to_show: