Пример #1
0
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError(
                    'Zero files found in ZIP file {}'.format(path))
            else:
                raise ValueError(
                    'Multiple files found in ZIP file.'
                    ' Only one file per ZIP :{}'.format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' % compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f
Пример #2
0
class Snakefile:
    def __init__(self, path, rulecount=0):
        self.path = path
        try:
            self.file = open(self.path, encoding="utf-8")
        except FileNotFoundError as e:
            try:
                self.file = TextIOWrapper(urllib.request.urlopen(self.path),
                                          encoding="utf-8")
            except (HTTPError, URLError, ContentTooShortError, ValueError):
                raise WorkflowError("Failed to open {}.".format(path))

        self.tokens = tokenize.generate_tokens(self.file.readline)
        self.rulecount = rulecount
        self.lines = 0

    def __next__(self):
        return next(self.tokens)

    def __iter__(self):
        return self

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.file.close()
Пример #3
0
def _write_table(profile_dir,
                 table_name,
                 rows,
                 fields,
                 append=False,
                 gzip=False):
    # don't gzip if empty
    rows = iter(rows)
    try:
        first_row = next(rows)
    except StopIteration:
        gzip = False
    else:
        rows = chain([first_row], rows)
    if gzip and append:
        logging.warning('Appending to a gzip file may result in '
                        'inefficient compression.')

    if not os.path.exists(profile_dir):
        raise ItsdbError(
            'Profile directory does not exist: {}'.format(profile_dir))

    tbl_filename = os.path.join(profile_dir, table_name)
    mode = 'a' if append else 'w'
    if gzip:
        # text mode only from py3.3; until then use TextIOWrapper
        #mode += 't'  # text mode for gzip
        f = TextIOWrapper(gzopen(tbl_filename + '.gz', mode=mode))
    else:
        f = open(tbl_filename, mode=mode)

    for row in rows:
        f.write(make_row(row, fields) + '\n')

    f.close()
Пример #4
0
    def _generate(self):
        """ Generate the Python code. """

        needs_close = False

        if sys.hexversion >= 0x03000000:
            if self._opts.output == '-':
                from io import TextIOWrapper

                pyfile = TextIOWrapper(sys.stdout.buffer, encoding='utf8')
            else:
                pyfile = open(self._opts.output, 'wt', encoding='utf8')
                needs_close = True
        else:
            if self._opts.output == '-':
                pyfile = sys.stdout
            else:
                pyfile = open(self._opts.output, 'wt')
                needs_close = True

        import_from = self._opts.import_from

        if import_from:
            from_imports = True
        elif self._opts.from_imports:
            from_imports = True
            import_from = '.'
        else:
            from_imports = False

        compileUi(self._ui_file, pyfile, self._opts.execute, self._opts.indent,
                  from_imports, self._opts.resource_suffix, import_from)

        if needs_close:
            pyfile.close()
Пример #5
0
def readline(config: LanguageIndexingConfiguration, default_config: TextIOWrapper) -> None:
    title = Title()
    name = None
    for line in default_config:
        # Do nothing if it is a comment or empty line
        if line.startswith("#") or line.startswith("\n"):
            continue
        # Save the name for the section
        elif line.startswith("["):
            # When reach a new section, save everything before it into config
            if name is not None:
                setattr(config, name, title)
            name = line[1:-2]
            title = Title()
        # Save each individual data under the section
        else:
            word = line.split(" : ")
            # If there is no value for the key
            if len(word) == 1:
                setattr(title, word[0].rstrip(), None)
            # If there is value for the key
            else:
                setattr(title, word[0], word[1].rstrip())
    setattr(config, name, title)
    default_config.close()
    setDefault(config)
Пример #6
0
    def get_curlbomb(self, args, script=None, override_defaults={}):
        """Prepare curlbomb to run in a thread

        Assumes args has a '{script}' formatter in it to replace a temporary path with
        If no '{script}' formatter is found, stdin is mocked through settings['stdin']

        Returns tuple(curlbomb_thread, client_command)
        """
        if type(script) == str:
            script = bytes(script, "utf-8")
        stdin = "{script}" not in args and script is not None
        try:
            log.info("Using stdin: {}".format(stdin))
            if stdin:
                s = TextIOWrapper(BytesIO(script))
                override_defaults['stdin'] = s
            else:
                s = NamedTemporaryFile()
                if script is not None:
                    s.write(script)
                    s.flush()
                    args = args.format(script=s.name)
            args = shlex.split(args)
            log.warn("starting curlbomb: {}".format(args))
            settings = curlbomb.get_settings(args, override_defaults)
            client_cmd = settings['get_curlbomb_command'](settings)
            curlbomb_thread = CurlbombThread(settings)
            curlbomb_thread.start()
            return (curlbomb_thread,
                    client_cmd)
        finally:
            s.close()
Пример #7
0
def _write_table(profile_dir, table_name, rows, fields,
                 append=False, gzip=False):
    # don't gzip if empty
    rows = iter(rows)
    try:
        first_row = next(rows)
    except StopIteration:
        gzip = False
    else:
        rows = chain([first_row], rows)
    if gzip and append:
        logging.warning('Appending to a gzip file may result in '
                        'inefficient compression.')

    if not os.path.exists(profile_dir):
        raise ItsdbError('Profile directory does not exist: {}'
                         .format(profile_dir))

    tbl_filename = os.path.join(profile_dir, table_name)
    mode = 'a' if append else 'w'
    if gzip:
        # text mode only from py3.3; until then use TextIOWrapper
        #mode += 't'  # text mode for gzip
        f = TextIOWrapper(gzopen(tbl_filename + '.gz', mode=mode))
    else:
        f = open(tbl_filename, mode=mode)

    for row in rows:
        f.write(make_row(row, fields) + '\n')

    f.close()
Пример #8
0
class S3GZipCSVReader:
    """Reads a Gzipped CSV file from S3
        Downloads and decompresses on-the-fly, so the entire file doesn't have
        to be loaded into memory
    """
    def __init__(self, s3_config, s3_path, **kwargs):
        self.s3file = s3_config.fs_open(_strip_schema(s3_path))
        self.gzfile = TextIOWrapper(
            GzipFile(fileobj=self.s3file, mode='rb'),
            encoding='utf-8',
            newline='',
        )
        self.reader = get_csv_reader(self.gzfile, **kwargs)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def __iter__(self):
        return self.reader.__iter__()

    def next(self):
        return self.reader.next()

    def close(self):
        self.gzfile.close()
        self.s3file.close()
Пример #9
0
    def cpplint_tests(self, filename):
        # Run checks directly from cpplint
        from io import TextIOWrapper, BytesIO
        output_string = ''
        real_stderr = sys.stderr
        fake_out = TextIOWrapper(BytesIO(), sys.stdout.encoding)
        try:
            sys.stderr = fake_out
            cpplint.ProcessFile(filename, 0)
        except TypeError:
            print(
                'TODO: Fix TypeError: must be unicode, not str in Python 2.x')
        finally:
            # get output
            sys.stderr.seek(0)  # jump to the start
            output_string = sys.stderr.read()  # read output

            # restore stderr
            sys.stderr = real_stderr
            fake_out.close()

        # Parse output from cpplint
        for line in output_string.splitlines():
            result = re.search(filename + r'\:(\d+)\:\s+(.*)', line)
            if result:
                line_num = int(result.group(1))
                self.add_error(label="CPPLINT_ERROR",
                               line=line_num,
                               data={'message': result.group(2)})
Пример #10
0
def query_to_csv(**kwargs):
    query = kwargs["query"]
    parameters = kwargs.get("parameters", {})
    dest_file = kwargs["dest_file"]
    chunksize = 5000
    oracle = OracleSqlAlchemyHook()
    df = oracle.get_pandas_df(query,
                              parameters=parameters,
                              chunksize=chunksize)
    access_key = s3_conn.login
    secret = s3_conn.password
    bucket_name = s3_conn.extra_dejson["bucket_name"]
    s3 = s3fs.S3FileSystem(anon=False, key=access_key, secret=secret)
    logger = kwargs["ti"].log
    logger.info(
        f"Running {kwargs['task_instance_key_str']} for {kwargs['ds']}")

    if dest_file:
        path = f"{bucket_name}/{kwargs['ds']}/{kwargs['task_instance_key_str']}/{dest_file}"
        with s3.open(
                path,
                "wb",
        ) as csvfile:
            first_chunk = True
            tcsv = TextIOWrapper(csvfile)
            chunk: DataFrame
            for idx, chunk in enumerate(df):
                logger.info(f"Processing chunk {idx} of {dest_file}")
                chunk.to_csv(tcsv,
                             index=False,
                             chunksize=chunksize,
                             header=first_chunk)
                first_chunk = False
            tcsv.close()
        return f"s3://{path}"
Пример #11
0
def generate_domain_list(pattern_str: str, wordlist_file: TextIOWrapper,
                         skip_files: list, chunk: int, chunks: int, l: Logger):
    if pattern_str is not None:
        d = pattern.generate_candiates(pattern_str)
    else:
        d = [l.rstrip('\n') for l in wordlist_file.readlines()]
        wordlist_file.close()

    l.log('{} domains to test (initially)'.format(len(d)))

    if skip_files is not None:
        domains_to_skip = []
        for file in skip_files:
            domains_to_skip.extend(l.rstrip('\n') for l in file.readlines())
            file.close()
        d = sorted(set(d) - set(domains_to_skip))

    if chunk is not None and chunks is not None:
        # find the chunk of the list
        l.log("Take chunk {} of {}".format(chunk, chunks))
        d = chunking.get(d, chunk, chunks)

    length = len(d)

    l.log('{} domains to test (after skipping and chunking)'.format(length))

    return d, length
Пример #12
0
    def load(self):
        """Load this table's data into Athena."""
        data_file_names = self._get_file_names()
        districts = sorted(data_file_names.keys())
        for district in districts:
            district_file_name = data_file_names[district]
            with NamedTemporaryFile('w+b') as raw_file:
                with gzip.open(raw_file, 'wb') as gzip_file:
                    text_gzip_file = TextIOWrapper(gzip_file, encoding='utf-8')
                    self._convert_raw_file(district_file_name, text_gzip_file)
                    text_gzip_file.close()
                self._athena.upload_data(self.name,
                                         raw_file,
                                         district=district)

        is_partitioned = None not in districts

        ddl = self._generate_ddl(is_partitioned)
        self._athena.execute_query(ddl)
        self.logger.debug('Ensured table exists for {0}'.format(self.name))

        if is_partitioned:
            self._athena.execute_query('MSCK REPAIR TABLE {0};'.format(
                self.name))
            self.logger.debug('Repaired table for {0}'.format(self.name))

        self.logger.info('Loaded normal table {0}'.format(self.name))
Пример #13
0
 def load(self, config_file: TextIOWrapper):
     if not config_file:
         loc = os.path.join(os.path.dirname(__file__), 'pysguard.conf.yaml')
         config_file = open(loc, 'r')
     self._config = yaml.safe_load(config_file)
     config_file.close()
     return self
Пример #14
0
    def download(self, outfile: io.TextIOWrapper) -> Result:
        if outfile.closed:
            result = Result(Status.FILE_CLOSED, outfile)
            self.emit('download', result)
            return result
        if outfile.mode.startswith('r'):
            self._req.add_header('Range',
                                 'bytes=%i-' % length(outfile, restore=False))
        elif outfile.mode.startswith('a'):
            self._req.add_header('Range', 'bytes=%i-' % outfile.tell())

        if self._conn is None or self.__conn.closed:
            st = self.connect()
            if not st:
                outfile.close()
                return st
            else:
                self.__conn = st.value
        bytes_written = 0
        self.emit("download", Result(Status.OK, outfile))
        for chunk in self:
            self.emit('data', chunk)
            outfile.write(chunk)
            bytes_written += len(chunk)
        if bytes_written >= int(self.__conn.headers['Content-Length']):
            result = Result(Status.OK, outfile)
        else:
            result = Result(
                Status.DISCONNECTED, outfile, "The connection was aborted," +
                "check your internet connection")
        self.emit('end', result)
        return result
Пример #15
0
def load_gff(gff):
    """Parses a single GFF file and returns a chromosome-indexed dict for
       that file.

    Arguments
    ---------
    gff: str
        Filepath to GFF

    Returns
    -------
    dict: A dictionary representation of the GFF entries, indexed by
            chromosome ID
    """
    annotations = {}

    if gff.endswith('.gz'):
        import gzip
        from io import TextIOWrapper
        fp = TextIOWrapper(gzip.open(gff))
    else:
        fp = open(gff)

    for entry in GFF.parse(fp):
        if len(entry.features) > 0 and entry.features[0].type == 'chromosome':
            annotations[entry.id] = entry
    fp.close()

    return annotations
Пример #16
0
    class Context(IteratorContext[Target], metaclass=ABCMeta):
        @staticmethod
        def _setup(instance: 'TargetReader.Context',
                   makefile: Makefile) -> None:
            instance._makefile = makefile
            instance._process = None

        def __init__(self) -> None:
            super().__init__()
            self._makefile = None  # type: Makefile
            self._process = None  # type: Popen
            self._stdout = None  # type: TextIOBase

        def __enter__(self) -> Iterator[Target]:
            self._process = self._spawn_subprocess()
            self._stdout = TextIOWrapper(self._process.stdout,
                                         encoding='utf-8')
            return DefaultParsePipelineFactory.make().build_parse_pipeline(
                self._stdout, self._makefile)

        def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
            if self._process is not None:
                self._stdout.close()
                self._process.wait()
                self._check_returncode()
            return False

        def _check_returncode(self) -> None:
            if self._process.returncode != 0:
                raise CalledProcessError(self._process.returncode, ' '.join(
                    self._process.args))  # type: ignore

        @abstractmethod
        def _spawn_subprocess(self) -> Popen:
            pass
Пример #17
0
    def _generate(self):
        """ Generate the Python code. """

        needs_close = False

        if sys.hexversion >= 0x03000000:
            if self._opts.output == '-':
                from io import TextIOWrapper

                pyfile = TextIOWrapper(sys.stdout.buffer, encoding='utf8')
            else:
                pyfile = open(self._opts.output, 'wt', encoding='utf8')
                needs_close = True
        else:
            if self._opts.output == '-':
                pyfile = sys.stdout
            else:
                pyfile = open(self._opts.output, 'wt')
                needs_close = True

        compileUi(self._ui_file, pyfile, self._opts.execute, self._opts.indent,
                self._opts.pyqt3_wrapper, self._opts.from_imports,
                self._opts.resource_suffix)

        if needs_close:
            pyfile.close()
Пример #18
0
def run(input_file: TextIOWrapper, output_file: TextIOWrapper, jobs: int,
        max_articles: Optional[int]) -> None:
    log.info("Import done, now starting work")

    current_articles = []

    nb_batches = 0

    for i, line in enumerate(input_file):
        current_articles.append(line)

        if (i + 1) % BATCH_SIZE == 0:
            log.info("Starting batch %9d", nb_batches + 1)
            result = Parallel(n_jobs=jobs)(delayed(_run)(a)
                                           for a in current_articles)

            current_articles = []

            append_to(output_file, result, end="\n")
            nb_batches += 1
            log.info("%9d batches done", nb_batches)

    if len(current_articles) > 0:
        if nb_batches == 0:
            log.info("There is only one batch to process")
        result = Parallel(n_jobs=jobs)(delayed(_run)(a)
                                       for a in current_articles)

        if len(result) > 0:
            append_to(output_file, result, end="")

    input_file.close()
    output_file.close()
Пример #19
0
class Snakefile:
    def __init__(self, path):
        self.path = path
        try:
            self.file = open(self.path, encoding="utf-8")
        except FileNotFoundError as e:
            try:
                self.file = TextIOWrapper(urllib.request.urlopen(self.path),
                                          encoding="utf-8")
            except (HTTPError, URLError, ContentTooShortError, ValueError):
                raise WorkflowError("Failed to open {}.".format(path))

        self.tokens = tokenize.generate_tokens(self.file.readline)
        self.rulecount = 0
        self.lines = 0

    def __next__(self):
        return next(self.tokens)

    def __iter__(self):
        return self

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.file.close()
Пример #20
0
    def _read_table(cls, tar_handle, filename, requied_columns=()):
        requied_columns = frozenset(requied_columns) | frozenset(("ID", ))
        handle = TextIOWrapper(tar_handle.extractfile(filename))
        result = {}

        try:
            header = handle.readline().rstrip("\r\n").split("\t")
            if len(header) != len(set(header)):
                raise ZonkeyDBError(
                    "Table %r does contains duplicate columns!" % (filename, ))

            if requied_columns - set(header):
                raise ZonkeyDBError("Required columns are missign in table "
                                    "%r: %s" % (filename, ", ".join()))

            for linenum, line in enumerate(handle):
                fields = line.rstrip("\r\n").split("\t")

                if len(fields) != len(header):
                    raise ZonkeyDBError(
                        "Error reading %r at line %i; "
                        "expected  %i columns, found %i "
                        "columns!" %
                        (filename, linenum, len(header), len(fields)))

                row = dict(zip(header, fields))
                if row["ID"] in result:
                    raise ZonkeyDBError("Duplicate IDs in %r: %s" %
                                        (filename, row["ID"]))

                result[row["ID"]] = row
        finally:
            handle.close()

        return result
Пример #21
0
class bom_open():
    """Context manager to open a file or stdin/stdout. Encoding can be detected
    with chardet. Pass additional arguments to `open()`.
    Python writes BOM for utf-8-sig, utf-16, or utf-32.  BOM is not written
    when endianness is specified.
    If `file=None` or `'-'`, open stdin (for reading) or stdout (for writing).
    If `encoding=None` and `mode='r'` or `'w+'`, file encoding will be detected
    using chardet."""
    def __init__(self,
                 file,
                 mode='r',
                 buffering=-1,
                 encoding=None,
                 *args,
                 **kwargs):
        if file == '-':
            self.file = None
        else:
            self.file = file

        self.mode = mode
        self.buffering = buffering
        self.encoding = encoding
        self.args = args
        self.kwargs = kwargs

    def __enter__(self):
        if self.file:
            self._f = open(self.file, self.mode, self.buffering, self.encoding,
                           *self.args, **self.kwargs)
        elif self.mode == 'r':
            self._f = sys.stdin
        elif self.mode == 'w':
            if self.encoding:
                sys.stdout = open(sys.stdout.fileno(),
                                  'w',
                                  encoding=self.encoding,
                                  buffering=1)
            self._f = sys.stdout
        else:
            raise StdIOError('No file specified, and mode not appropriate '
                             'for stdin (r) or stdout (w)')

        if (self.encoding is None and 'b' not in self.mode
                and ('r' in self.mode or '+' in self.mode)):
            # run chardet on buffer without advancing file position
            peek = self._f.buffer.peek()
            detected = chardet.detect(peek)
            self.encoding = detected['encoding']

            # re-attach file with detected encoding
            if self._f.encoding.lower() != self.encoding.lower():
                self._f = TextIOWrapper(self._f.detach(),
                                        encoding=self.encoding)

        return self._f

    def __exit__(self, type, value, traceback):
        self._f.close()
Пример #22
0
    def deserialize_workflow_spec(self, s_state, filename=None):
        """
        :param s_state: a byte-string with the contents of the packaged
        workflow archive, or a file-like object.

        :param filename: the name of the package file.
        """
        if isinstance(s_state, dict):
            return super().deserialize_workflow_spec(s_state)
        if isinstance(s_state, str):
            return super().deserialize_workflow_spec(s_state)
        if isinstance(s_state, bytes):
            s_state = BytesIO(s_state)

        package_zip = zipfile.ZipFile(s_state,
                                      "r",
                                      compression=zipfile.ZIP_DEFLATED)
        config = configparser.ConfigParser()
        ini_fp = TextIOWrapper(package_zip.open(Packager.METADATA_FILE),
                               encoding="UTF-8")
        try:
            config.read_file(ini_fp)
        finally:
            ini_fp.close()

        parser_class = BpmnParser
        parser_class_module = config.get('MetaData',
                                         'parser_class_module',
                                         fallback=None)

        if parser_class_module:
            mod = __import__(parser_class_module,
                             fromlist=[config.get('MetaData', 'parser_class')])
            parser_class = getattr(mod, config.get('MetaData', 'parser_class'))

        parser = parser_class()

        for info in package_zip.infolist():
            parts = os.path.split(info.filename)
            if (len(parts) == 2 and not parts[0]
                    and parts[1].lower().endswith('.bpmn')):
                # It is in the root of the ZIP and is a BPMN file
                try:
                    svg = package_zip.read(info.filename[:-5] + '.svg')
                except KeyError:
                    svg = None

                bpmn_fp = package_zip.open(info)
                try:
                    bpmn = etree.parse(bpmn_fp)
                finally:
                    bpmn_fp.close()

                parser.add_bpmn_xml(bpmn,
                                    svg=svg,
                                    filename='%s:%s' %
                                    (filename, info.filename))
        spec_name = config.get('MetaData', 'entry_point_process')
        return parser.get_spec(spec_name)
Пример #23
0
def test_readline(rf, fn):
    f = rf.open(fn)
    tr = TextIOWrapper(BufferedReader(f))
    while 1:
        ln = tr.readline()
        if not ln:
            break
    tr.close()
Пример #24
0
def test_readline(rf, fn):
    f = rf.open(fn)
    tr = TextIOWrapper(BufferedReader(f))
    while 1:
        ln = tr.readline()
        if not ln:
            break
    tr.close()
Пример #25
0
def close_logs_file(file: TextIOWrapper):
    """Closes the logs file"""
    print_prefix(prefix=0)
    print("Finishing logs...")
    file.write("\nEnd of log entry\n")
    file.close()
    print_prefix(prefix=0)
    print("Done writing logs")
Пример #26
0
 def urlopen(self, url):
     if url in self.to_cache:
         if url not in self._url_cache:
             f = TextIOWrapper(urlopen(url, auto_deflate=True), errors="ignore")
             self._url_cache[url] = f.read().splitlines()
             f.close()
         return self._url_cache[url]
     return TextIOWrapper(urlopen(url, auto_deflate=True), errors="ignore")
Пример #27
0
def get_fasta_from_file(in_memory_file):
    file = TextIOWrapper(in_memory_file, 'utf-8')
    fasta_seq = SeqIO.parse(file, 'fasta')
    records = []
    for record in fasta_seq:
        temp = record.id, str(record.seq).lower()
        records.append(temp)
    file.close()
    return records
Пример #28
0
def extract_files(
    filenames,
    header_definitions,
    transform_functions,
    output_writers,
    error_filename,
    input_encoding="latin1",
    censorship=True,
):
    """Extract files from a fixed-width file containing more than one row type

    `filenames` is expected to be a list of ZIP files having only one file
    inside each. The file is read and metadata inside `fobjs` is used to parse
    it and save the output files.
    """
    error_fobj = open_compressed(error_filename, mode="w", encoding="latin1")
    error_writer = CsvLazyDictWriter(error_fobj)

    for filename in filenames:
        # TODO: use another strategy to open this file (like using rows'
        # open_compressed when archive support is implemented)
        if os.path.isdir(filename):
            continue
        if not str(filename).endswith('.zip'):
            continue

        zf = ZipFile(filename)
        inner_filenames = zf.filelist
        assert (
            len(inner_filenames) == 1
        ), f"Only one file inside the zip is expected (got {len(inner_filenames)})"
        # XXX: The current approach of decoding here and then extracting
        # fixed-width-file data will work only for encodings where 1 character is
        # represented by 1 byte, such as latin1. If the encoding can represent one
        # character using more than 1 byte (like UTF-8), this approach will make
        # incorrect results.
        fobj = TextIOWrapper(zf.open(inner_filenames[0]), encoding=input_encoding)
        for line in tqdm(fobj, desc=f"Extracting {filename}"):
            row_type = line[0]
            try:
                row = parse_row(header_definitions[row_type], line)
            except ParsingError as exception:
                error_writer.writerow(
                    {"error": exception.error, "line": exception.line}
                )
                continue
            data = transform_functions[row_type](row)
            for row in data:
                if censorship:  # Clear sensitive information
                    censor(row_type, row)
                output_writers[row_type].writerow(row)

        fobj.close()
        zf.close()

    error_fobj.close()
Пример #29
0
 def parse_post_body(request):
     text_io = TextIOWrapper(request.rfile, encoding="UTF-8")
     splitted_data = text_io.readline().split("&")
     print("Splitted data:" + str(splitted_data))
     text_io.close()
     key_value = dict()
     for data in splitted_data:
         temp = data.split("=")
         key_value.update({temp[0]: temp[1]})
     return key_value
Пример #30
0
def enqueue_output(out: TextIOWrapper, queue: Queue[Any]) -> bool:
    try:
        for line in out.readlines():
            queue.put_nowait(line)
    finally:
        if out and not out.closed:
            try:
                out.close()
                return True
            except:
                return False
Пример #31
0
    def put(self, request, filename, format=None):
        file_obj = TextIOWrapper(request.FILES['file'].file,
                                 encoding=request.encoding)
        csv_importer = CSVImporter()
        import_status = csv_importer.importFromFile(
            file_obj).get_import_status()

        data = {'Import status': import_status}
        response = Response(data, status=204)
        response.data = data
        file_obj.close()
        return Response(data)
Пример #32
0
	def make(self, gnuplot="gnuplot"):
		"""
		Execute gnuplot to make the plot output.

		There is no return from this call; gnuplot should generate its output
		to a file (or something).
		"""
		p = Popen(gnuplot, stdin=PIPE)
		try:
			wrapper = TextIOWrapper(p.stdin)
			self.make_script(wrapper)
		finally:
			wrapper.close()
			p.stdin.close()
Пример #33
0
class Console():
    def __init__(self):
        self.console_output = TextIOWrapper(BytesIO(), sys.stdout.encoding)

    def __enter__(self):
        sys.stdout = self.console_output
        return self

    def __exit__(self, type, value, traceback):
        self.console_output.close()
        sys.stdout = sys.__stdout__

    def get_output(self):
        self.console_output.seek(0)
        return self.console_output.read()
Пример #34
0
    def deserialize_workflow_spec(self, s_state, filename=None):
        """
        :param s_state: a byte-string with the contents of the packaged workflow archive, or a file-like object.
        :param filename: the name of the package file.
        """
        if isinstance(s_state, (str, bytes)):
            s_state = BytesIO(s_state)

        package_zip = zipfile.ZipFile(s_state, "r", compression=zipfile.ZIP_DEFLATED)
        config = configparser.SafeConfigParser()
        ini_fp = TextIOWrapper(package_zip.open(Packager.METADATA_FILE), encoding="UTF-8")
        try:
            config.readfp(ini_fp)
        finally:
            ini_fp.close()

        parser_class = BpmnParser

        try:
            parser_class_module = config.get('MetaData', 'parser_class_module', fallback=None)
        except TypeError:
            # unfortunately the fallback= does not exist on python 2
            parser_class_module = config.get('MetaData', 'parser_class_module', None)

        if parser_class_module:
            mod = __import__(parser_class_module, fromlist=[config.get('MetaData', 'parser_class')])
            parser_class = getattr(mod, config.get('MetaData', 'parser_class'))

        parser = parser_class()

        for info in package_zip.infolist():
            parts = os.path.split(info.filename)
            if len(parts) == 2 and not parts[0] and parts[1].lower().endswith('.bpmn'):
                #It is in the root of the ZIP and is a BPMN file
                try:
                    svg = package_zip.read(info.filename[:-5]+'.svg')
                except KeyError as e:
                    svg = None

                bpmn_fp = package_zip.open(info)
                try:
                    bpmn = ET.parse(bpmn_fp)
                finally:
                    bpmn_fp.close()

                parser.add_bpmn_xml(bpmn, svg=svg, filename='%s:%s' % (filename, info.filename))

        return parser.get_spec(config.get('MetaData', 'entry_point_process'))
Пример #35
0
def load(path=None):
    datafile = "names.pickle"
    if path:
        datafile = os.path.join(path, datafile)
    if os.path.exists(datafile):
        return pickle.load(open(datafile, "rb"))

    _temp_file = BytesIO()
    _temp_file.write(urllib.request.urlopen(NAMES_URL).read())

    _zip_file = zipfile.ZipFile(_temp_file, "r")
    names = dict()

    for filename in _zip_file.namelist():
        if ".txt" not in filename:
            continue

        _file = _zip_file.open(filename)
        _file = TextIOWrapper(_file)
        rows = csv.reader(_file, delimiter=",")

        for row in rows:
            if len(row) < 3:
                continue
            name = row[0].lower()
            gender = row[1]
            count = int(row[2])

            if name not in names:
                names[name] = dict(M=0, F=0)
            names[name][gender] = names[name][gender] + count

        _file.close()

    for key, value in names.items():
        count = value["M"] + value["F"]
        if value["M"] > value["F"]:
            value["probability"] = float(value["M"]) / count
            value["gender"] = "M"
        else:
            value["probability"] = float(value["F"]) / count
            value["gender"] = "F"

    _datafile = open(datafile, "wb")
    pickle.dump(names, _datafile, -1)
    _datafile.close()
    return names
Пример #36
0
def load(path=None):
    datafile = 'names.pickle'
    if path:
        datafile = os.path.join(path, datafile)
    if os.path.exists(datafile):
        return pickle.load(open(datafile, 'rb'))

    _temp_file = BytesIO()
    _temp_file.write(urllib.request.urlopen(NAMES_URL).read())

    _zip_file = zipfile.ZipFile(_temp_file, 'r')
    names = dict()

    for filename in _zip_file.namelist():
        if '.txt' not in filename:
            continue

        _file = _zip_file.open(filename)
        _file = TextIOWrapper(_file)
        rows = csv.reader(_file, delimiter=',')

        for row in rows:
            if len(row) < 3:
                continue
            name = row[0].lower()
            gender = row[1]
            count = int(row[2])

            if name not in names:
                names[name] = dict(M=0, F=0)
            names[name][gender] = names[name][gender] + count

        _file.close()

    for key, value in names.items():
        count = value['M'] + value['F']
        if value['M'] > value['F']:
            value['probability'] = float(value['M']) / count
            value['gender'] = 'M'
        else:
            value['probability'] = float(value['F']) / count
            value['gender'] = 'F'

    _datafile = open(datafile, 'wb')
    pickle.dump(names, _datafile, -1)
    _datafile.close()
    return names
Пример #37
0
def read_csvfile_rest(csvfile_handle: TextIOWrapper) -> Iterator[FileRow]:
    reader = csv.reader(csvfile_handle)
    for row in reader:
        if len(row) == 1:
            yield CommandRow(command=row[0])
        else:
            command, hyps, goal, *predictions = row
            yield TacticRow(command=command,
                            hyps=hyps,
                            goal=goal,
                            predictions=[
                                PredictionResult(predictions[i],
                                                 predictions[i + 1])
                                for i in range(0,
                                               len(predictions) - 1, 2)
                            ])
    csvfile_handle.close()
Пример #38
0
    def test_io_not_autoclose_textiowrapper(self):
        fp = BytesIO(
            b"\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x9f\n\xce\xb1\xce\xb2\xce\xb3\xce\xb4"
        )
        resp = HTTPResponse(fp, preload_content=False, auto_close=False)
        reader = TextIOWrapper(resp, encoding="utf8")
        assert list(reader) == ["äöüß\n", "αβγδ"]

        assert not reader.closed
        assert not resp.closed
        with pytest.raises(StopIteration):
            next(reader)

        reader.close()
        assert reader.closed
        assert resp.closed
        with pytest.raises(ValueError, match="I/O operation on closed file.?"):
            next(reader)
Пример #39
0
    def build_fa(self, input_config: io.TextIOWrapper) -> FA.FA:
        """
        build FA from your input configuration file

        :param input_config: input configuration file
        :return: FA build from given configuration file
        """

        int(input_config.readline())
        delta = int(input_config.readline())
        input_config.readline()
        transition_table = []
        for i in range(0, delta):
            transition_table.append([int(number) for number in input_config.readline().rstrip('\n').split(' ')])
        input_config.readline()
        final_states = {int(number) for number in input_config.readline().rstrip('\n').split(' ')}

        input_config.close()
        del input_config

        return FA.FA(tuple(transition_table), final_states)
Пример #40
0
class SmtLibSolver(Solver):
    """Wrapper for using a solver via textual SMT-LIB interface.

    The solver is launched in a subprocess using args as arguments of
    the executable. Interaction with the solver occurs via pipe.
    """

    OptionsClass = SmtLibOptions

    def __init__(self, args, environment, logic, LOGICS=None, **options):
        Solver.__init__(self,
                        environment,
                        logic=logic,
                        **options)
        self.to = self.environment.typeso
        if LOGICS is not None: self.LOGICS = LOGICS
        self.args = args
        self.declared_vars = set()
        self.declared_sorts = set()
        self.solver = Popen(args, stdout=PIPE, stderr=PIPE, stdin=PIPE,
                            bufsize=-1)
        # Give time to the process to start-up
        time.sleep(0.01)
        self.parser = SmtLibParser(interactive=True)
        if PY2:
            self.solver_stdin = self.solver.stdin
            self.solver_stdout = self.solver.stdout
        else:
            self.solver_stdin = TextIOWrapper(self.solver.stdin)
            self.solver_stdout = TextIOWrapper(self.solver.stdout)

        # Initialize solver
        self.options(self)
        self.set_logic(logic)

    def set_option(self, name, value):
        self._send_silent_command(SmtLibCommand(smtcmd.SET_OPTION,
                                                [name, value]))

    def set_logic(self, logic):
        self._send_silent_command(SmtLibCommand(smtcmd.SET_LOGIC, [logic]))

    def _debug(self, msg, *format_args):
        if self.options.debug_interaction:
            print(msg % format_args)

    def _send_command(self, cmd):
        """Sends a command to the STDIN pipe."""
        self._debug("Sending: %s", cmd.serialize_to_string())
        cmd.serialize(self.solver_stdin, daggify=True)
        self.solver_stdin.write("\n")
        self.solver_stdin.flush()

    def _send_silent_command(self, cmd):
        """Sends a command to the STDIN pipe and awaits for acknowledgment."""
        self._send_command(cmd)
        self._check_success()

    def _get_answer(self):
        """Reads a line from STDOUT pipe"""
        res = self.solver_stdout.readline().strip()
        self._debug("Read: %s", res)
        return res

    def _get_value_answer(self):
        """Reads and parses an assignment from the STDOUT pipe"""
        lst = self.parser.get_assignment_list(self.solver_stdout)
        self._debug("Read: %s", lst)
        return lst
    
    def _declare_sort(self, sort):
        cmd = SmtLibCommand(smtcmd.DECLARE_SORT, [sort])
        self._send_silent_command(cmd)
        self.declared_sorts.add(sort)

    def _declare_variable(self, symbol):
        cmd = SmtLibCommand(smtcmd.DECLARE_FUN, [symbol])
        self._send_silent_command(cmd)
        self.declared_vars.add(symbol)

    def _check_success(self):
        res = self._get_answer()
        if res != "success":
            raise UnknownSolverAnswerError("Solver returned: '%s'" % res)

    def solve(self, assumptions=None):
        assert assumptions is None
        self._send_command(SmtLibCommand(smtcmd.CHECK_SAT, []))
        ans = self._get_answer()
        if ans == "sat":
            return True
        elif ans == "unsat":
            return False
        elif ans == "unknown":
            raise SolverReturnedUnknownResultError
        else:
            raise UnknownSolverAnswerError("Solver returned: " + ans)

    def reset_assertions(self):
        self._send_silent_command(SmtLibCommand(smtcmd.RESET_ASSERTIONS, []))
        return

    def add_assertion(self, formula, named=None):
        # This is needed because Z3 (and possibly other solvers) incorrectly
        # recognize N * M * x as a non-linear term
        formula = formula.simplify()
        sorts = self.to.get_types(formula, custom_only=True)
        for s in sorts:
            if s not in self.declared_sorts:
                self._declare_sort(s)
        deps = formula.get_free_variables()
        for d in deps:
            if d not in self.declared_vars:
                self._declare_variable(d)
        self._send_silent_command(SmtLibCommand(smtcmd.ASSERT, [formula]))

    def push(self, levels=1):
        self._send_silent_command(SmtLibCommand(smtcmd.PUSH, [levels]))

    def pop(self, levels=1):
        self._send_silent_command(SmtLibCommand(smtcmd.POP, [levels]))

    def get_value(self, item):
        self._send_command(SmtLibCommand(smtcmd.GET_VALUE, [item]))
        lst = self._get_value_answer()
        assert len(lst) == 1
        assert len(lst[0]) == 2
        return lst[0][1]

    def print_model(self, name_filter=None):
        if name_filter is not None:
            raise NotImplementedError
        for v in self.declared_vars:
            print("%s = %s" % (v, self.get_value(v)))

    def get_model(self):
        assignment = {}
        for s in self.environment.formula_manager.get_all_symbols():
            if s.is_term():
                v = self.get_value(s)
                assignment[s] = v
        return EagerModel(assignment=assignment, environment=self.environment)

    def _exit(self):
        self._send_command(SmtLibCommand(smtcmd.EXIT, []))
        self.solver_stdin.close()
        self.solver_stdout.close()
        self.solver.stderr.close()
        self.solver.terminate()
        return
Пример #41
0
def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
        If 'infer' and `filepath_or_buffer` is path-like, then detect
        compression from the following extensions: '.gz', '.bz2', '.zip',
        or '.xz' (otherwise no compression).
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, str)

    if is_path:
        compression = _infer_compression(path_or_buf, compression)

    if compression:

        # GZ Compression
        if compression == 'gzip':
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            # Ensure the container is closed as well.
            handles.append(zf)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if encoding:
            # Encoding
            f = open(path_or_buf, mode, encoding=encoding, newline="")
        elif is_text:
            # No explicit encoding
            f = open(path_or_buf, mode, errors='replace', newline="")
        else:
            # Binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # Convert BytesIO or file objects passed with an encoding
    if is_text and (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding, newline='')
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles
Пример #42
0
class Reader(object):
    """ Read SAM/BAM format file as an iterable. """
    def __init__(self, f, regions=False, kind=None, samtools_path=None):
        ext = None
        if samtools_path is None:
            samtools_path = "samtools"  # Get from the PATH
        self.samtools_path = samtools_path
        self.spool = None  # use this to catch alignment during reader scraping
        self.type = 'sam'
        try:
            self._f_name = f.name
            _, ext = os.path.splitext(f.name)
            if f.name == '<stdin>':  # stdin stream
                self._sam_init(f)
            elif ext == '.bam' or kind == 'bam':
                self._bam_init(f, regions)
                self.type = 'bam'
            elif ext == '.sam' or kind == 'sam':
                self._sam_init(f)
            else:
                self._sam_init(f)
            if (regions and ext != '.bam' and kind is None) or (regions and kind is not None and kind != 'bam'):
                self.__exit__()
                raise ValueError("Region support requires bam file.")
        except AttributeError:
            self._f_name = None
            if isinstance(f, Connection):
                self._pipe_init(f)
            else:
                self._sam_init(f)

    def _pipe_init(self, f):
        header = []
        for line in iter(f.recv, ''):
            if line[0] == '@':
                header.append(line.rstrip('\n\r'))
            else:
                self.spool = line
                break
        self.header_as_dict(header)
        self.f = iter(f.recv, '')
        self._conn = 'pipe'

    def _sam_init(self, f):
        header = []
        self.f = f
        for line in self.f:
            if line[0] == '@':
                header.append(line.rstrip('\n\r'))
            else:
                self.spool = line
                break
        self.header_as_dict(header)
        self._conn = 'file'

    def _bam_init(self, f, regions):
        pline = [self.samtools_path, 'view', '-H', f.name]
        try:
            p = Popen(pline, bufsize=-1, stdout=PIPE,
                      stderr=PIPE)
        except OSError:
            raise OSError('Samtools must be installed for BAM file support!\n')
        self.header_as_dict([line.decode('utf-8').rstrip('\n\r') for line in p.stdout])
        p.wait()
        if regions:
            try:
                open(''.join([f.name, '.bai']))
            except EnvironmentError:
                sys.stderr.write("BAM index not found. Attempting to index file.\n")
                index_p = Popen([self.samtools_path, 'index', f.name], stdout=PIPE, stderr=PIPE)

                _, err = index_p.communicate()
                if index_p.returncode > 0 or re.search("fail", str(err)):
                    raise OSError("Indexing failed. Is the BAM file sorted?\n")
                else:
                    sys.stderr.write("Index created successfully.\n")
            pline = [self.samtools_path, 'view', f.name, regions]
        else:
            pline = [self.samtools_path, 'view', f.name]
        self.p = Popen(pline, bufsize=-1, stdout=PIPE,
                  stderr=PIPE)
        if PY3:
            self.f = TextIOWrapper(self.p.stdout)
        else:
            self.f = self.p.stdout

        self._conn = 'proc'

    def next(self):
        """ Returns the next :class:`.Sam` object """
        try:
            if self.spool:  # this will be the first alignment in a SAM file or stream
                line = self.spool.rstrip('\n\r')
                self.spool = None
            else:
                line = next(self.f).rstrip('\n\r')
            if line == '':
                raise StopIteration
            fields = line.split('\t')
            required = fields[:11]
            tags = fields[11:]
            return Sam(*required, tags=tags)
        except StopIteration:
            raise StopIteration

    def __next__(self):
        return self.next()

    def __iter__(self):
        return self

    def __len__(self):
        """ Returns the number of reads in an indexed BAM file.
        Not implemented for SAM files. """
        if self.type != 'bam':
            raise NotImplementedError("len(Reader) is only implemented for BAM files.")
        elif self.type == 'bam':
            return sum(bam_read_count(self._f_name, self.samtools_path))

    def subsample(self, n):
        """ Returns an interator that draws every nth read from
        the input file. Returns :class:`.Sam`. """
        for i, line in enumerate(self.f):
            if i % n == 0:
                fields = line.split('\t')
                required = fields[:11]
                tags = fields[11:]
                yield Sam(*required, tags=tags)

    def header_as_dict(self, header):
        """ Parse the header list and return a nested dictionary. """
        self.header = DefaultOrderedDict(OrderedDict)
        for line in header:
            line = line.split('\t')
            key, fields = (line[0], line[1:])
            try:
                self.header[key][fields[0]] = fields[1:]
            except IndexError:
                self.header[key][fields[0]] = ['']

    @property
    def seqs(self):
        """ Return just the sequence names from the @SQ library as a generator. """
        for key in self.header['@SQ'].keys():
            yield key.split(':')[1]

    def tile_genome(self, width):
        """ Return a generator of UCSC-style regions tiling ``width``. """
        assert isinstance(width, int)
        for k, v in self.header['@SQ'].items():
            rname = k.split(':')[1]
            seqlength = v[0].split(':')[1]
            for region in tile_region(rname, 1, int(seqlength), width):
                yield region
                
    def close(self):
        self.__exit__()

    def __enter__(self):
        return self

    def __exit__(self, *args):
        if self._conn == 'file':
            self.f.close()
        if self._conn == 'proc':
            self.f.close()
            self.p.terminate()
Пример #43
0
def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f
Пример #44
0
def _get_handle(source, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """

    f = source
    is_path = isinstance(source, compat.string_types)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and isinstance(source, compat.BytesIO):
        from io import TextIOWrapper

        return TextIOWrapper(source, encoding=encoding)

    elif compression is not None:
        compression = compression.lower()
        if encoding is not None and not compat.PY3 and not is_path:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip

            f = gzip.GzipFile(source, mode) \
                if is_path else gzip.GzipFile(fileobj=source)

        # BZ Compression
        elif compression == 'bz2':
            import bz2

            if is_path:
                f = bz2.BZ2File(source, mode)

            else:
                f = bz2.BZ2File(source) if compat.PY3 else StringIO(
                    bz2.decompress(source.read()))
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(source)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(source))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(source, mode)

        else:
            raise ValueError('Unrecognized compression: %s' % compression)

        if compat.PY3:
            from io import TextIOWrapper

            f = TextIOWrapper(f, encoding=encoding)

        return f

    elif is_path:
        if compat.PY3:
            if encoding:
                f = open(source, mode, encoding=encoding)
            else:
                f = open(source, mode, errors='replace')
        else:
            f = open(source, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f
Пример #45
0
class SmtLibSolver(Solver):
    """Wrapper for using a solver via textual SMT-LIB interface.

    The solver is launched in a subprocess using args as arguments of
    the executable. Interaction with the solver occurs via pipe.
    """

    def __init__(self, args, environment, logic, user_options=None,
                 LOGICS=None):
        Solver.__init__(self,
                        environment,
                        logic=logic,
                        user_options=user_options)
        # Flag used to debug interaction with the solver
        self.dbg = False

        if LOGICS is not None: self.LOGICS = LOGICS
        self.args = args
        self.declared_vars = set()
        self.solver = Popen(args, stdout=PIPE, stderr=PIPE, stdin=PIPE)
        self.parser = SmtLibParser(interactive=True)
        if PY2:
            self.solver_stdin = self.solver.stdin
            self.solver_stdout = self.solver.stdout
        else:
            self.solver_stdin = TextIOWrapper(self.solver.stdin)
            self.solver_stdout = TextIOWrapper(self.solver.stdout)

        # Initialize solver
        self.set_option(":print-success", "true")
        if self.options.generate_models:
            self.set_option(":produce-models", "true")
        # Redirect diagnostic output to stdout
        self.set_option(":diagnostic-output-channel", '"stdout"')
        if self.options is not None:
            for o,v in iteritems(self.options):
                self.set_option(o,v)
        self.set_logic(logic)

    def set_option(self, name, value):
        self._send_silent_command(SmtLibCommand(smtcmd.SET_OPTION,
                                                [name, value]))

    def set_logic(self, logic):
        self._send_silent_command(SmtLibCommand(smtcmd.SET_LOGIC, [logic]))

    def _send_command(self, cmd):
        """Sends a command to the STDIN pipe."""
        if self.dbg: print("Sending: " + cmd.serialize_to_string())
        cmd.serialize(self.solver_stdin, daggify=True)
        self.solver_stdin.write("\n")
        self.solver_stdin.flush()

    def _send_silent_command(self, cmd):
        """Sends a command to the STDIN pipe and awaits for acknowledgment."""
        self._send_command(cmd)
        self._check_success()

    def _get_answer(self):
        """Reads a line from STDOUT pipe"""
        res = self.solver_stdout.readline().strip()
        if self.dbg: print("Read: " + str(res))
        return res

    def _get_value_answer(self):
        """Reads and parses an assignment from the STDOUT pipe"""
        lst = self.parser.get_assignment_list(self.solver_stdout)
        if self.dbg: print("Read: " + str(lst))
        return lst

    def _declare_variable(self, symbol):
        cmd = SmtLibCommand(smtcmd.DECLARE_FUN, [symbol])
        self._send_silent_command(cmd)
        self.declared_vars.add(symbol)

    def _check_success(self):
        res = self._get_answer()
        if res != "success":
            raise UnknownSolverAnswerError("Solver returned: '%s'" % res)

    def solve(self, assumptions=None):
        assert assumptions is None
        self._send_command(SmtLibCommand(smtcmd.CHECK_SAT, []))
        ans = self._get_answer()
        if ans == "sat":
            return True
        elif ans == "unsat":
            return False
        elif ans == "unknown":
            raise SolverReturnedUnknownResultError
        else:
            raise UnknownSolverAnswerError("Solver returned: " + ans)

    def reset_assertions(self):
        self._send_silent_command(SmtLibCommand(smtcmd.RESET_ASSERTIONS, []))
        return

    def add_assertion(self, formula, named=None):
        deps = formula.get_free_variables()
        for d in deps:
            if d not in self.declared_vars:
                self._declare_variable(d)
        self._send_silent_command(SmtLibCommand(smtcmd.ASSERT, [formula]))

    def push(self, levels=1):
        self._send_silent_command(SmtLibCommand(smtcmd.PUSH, [levels]))

    def pop(self, levels=1):
        self._send_silent_command(SmtLibCommand(smtcmd.POP, [levels]))

    def get_value(self, item):
        self._send_command(SmtLibCommand(smtcmd.GET_VALUE, [item]))
        lst = self._get_value_answer()
        assert len(lst) == 1
        assert len(lst[0]) == 2
        return lst[0][1]

    def print_model(self, name_filter=None):
        if name_filter is not None:
            raise NotImplementedError
        for v in self.declared_vars:
            print("%s = %s" % (v, self.get_value(v)))

    def get_model(self):
        assignment = {}
        for s in self.environment.formula_manager.get_all_symbols():
            if s.is_term():
                v = self.get_value(s)
                assignment[s] = v
        return EagerModel(assignment=assignment, environment=self.environment)

    def _exit(self):
        self._send_command(SmtLibCommand(smtcmd.EXIT, []))
        self.solver_stdin.close()
        self.solver_stdout.close()
        self.solver.stderr.close()
        self.solver.terminate()
        return
Пример #46
0
class MultiPageTextImporter:
    def __init__(self, mainControl):
        """
        mainControl -- Currently PersonalWikiFrame object
        """
        self.mainControl = mainControl


    def getImportTypes(self, guiparent):
        """
        Return sequence of tuples with the description of import types provided
        by this object. A tuple has the form (<imp. type>,
            <human readable description>, <panel for add. options or None>)
        If panels for additional options must be created, they should use
        guiparent as parent
        """
        if guiparent:
            res = wx.xrc.XmlResource.Get()
            mptPanel = res.LoadPanel(guiparent, "ImportSubMultipageText")
#             ctrls = XrcControls(htmlPanel)
#             config = self.mainControl.getConfig()
# 
#             ctrls.cbPicsAsLinks.SetValue(config.getboolean("main",
#                     "html_export_pics_as_links"))
#             ctrls.chTableOfContents.SetSelection(config.getint("main",
#                     "export_table_of_contents"))
#             ctrls.tfHtmlTocTitle.SetValue(config.get("main",
#                     "html_toc_title"))

        else:
            mptPanel = None

        return (
                ("multipage_text", _("Multipage text"), mptPanel),
                )


    def getImportSourceWildcards(self, importType):
        """
        If an export type is intended to go to a file, this function
        returns a (possibly empty) sequence of tuples
        (wildcard description, wildcard filepattern).
        
        If an export type goes to a directory, None is returned
        """
        if importType == "multipage_text":
            return ((_("Multipage files (*.mpt)"), "*.mpt"),
                    (_("Text file (*.txt)"), "*.txt")) 

        return None


    def getAddOptVersion(self):
        """
        Returns the version of the additional options information returned
        by getAddOpt(). If the return value is -1, the version info can't
        be stored between application sessions.
        
        Otherwise, the addopt information can be stored between sessions
        and can later handled back to the doImport method of the object
        without previously showing the import dialog.
        """
        return 0


    def getAddOpt(self, addoptpanel):
        """
        Reads additional options from panel addoptpanel.
        If getAddOptVersion() > -1, the return value must be a sequence
        of simple string, unicode and/or numeric objects. Otherwise, any object
        can be returned (normally the addoptpanel itself)
        """
        if addoptpanel is None:
            return (0,)
        else:
            ctrls = XrcControls(addoptpanel)
            showImportTableAlways = boolToInt(ctrls.cbShowImportTableAlways.GetValue())

            return (showImportTableAlways,)


    def _collectContent(self):
        """
        Collect lines from current position of importFile up to separator
        or file end collect all lines and return them as list of lines.
        """
        content = []                    
        while True:
            # Read lines of wikiword
            line = self.importFile.readline()
            if line == "":
                # The last page in mpt file without separator
                # ends as the real wiki page
#                 content = u"".join(content)
                break
            
            if line == self.separator:
                if len(content) > 0:
                    # Iff last line of mpt page is empty, the original
                    # page ended with a newline, so remove last
                    # character (=newline)

                    content[-1] = content[-1][:-1]
#                     content = u"".join(content)
                break

            content.append(line)
            
        return "".join(content)


    def _skipContent(self):
        """
        Skip content until reaching next separator or end of file
        """
        while True:
            # Read lines of wikiword
            line = self.importFile.readline()
            if line == "":
                # The last page in mpt file without separator
                # ends as the real wiki page
                break
            
            if line == self.separator:
                break



    def doImport(self, wikiDocument, importType, importSrc,
            compatFilenames, addOpt, importData=None):
        """
        Run import operation.
        
        wikiDocument -- WikiDocument object
        importType -- string tag to identify how to import
        importSrc -- Path to source directory or file to import from
        compatFilenames -- Should the filenames be decoded from the lowest
                           level compatible?
        addOpt -- additional options returned by getAddOpt()
        importData -- if not None contains data to import as bytestring.
                importSrc is ignored in this case. Needed for trashcan.
        returns True if import was done (needed for trashcan)
        """
        if importData is not None:
            self.rawImportFile = BytesIO(importData)  # TODO bytes or string???
        else:
            try:
                self.rawImportFile = open(pathEnc(importSrc), "rb")
            except IOError:
                raise ImportException(_("Opening import file failed"))

        self.wikiDocument = wikiDocument
        self.tempDb = None
        
        showImportTableAlways = addOpt[0]
#         wikiData = self.wikiDocument.getWikiData()

        
        # TODO Do not stop on each import error, instead create error list and
        #   continue

        try:
            try:
                # Wrap input file to convert format
                bom = self.rawImportFile.read(len(BOM_UTF8))
                if bom != BOM_UTF8:
                    self.rawImportFile.seek(0)
                    self.importFile = TextIOWrapper(self.rawImportFile,
                            MBCS_ENCODING, "replace")
                else:
                    self.importFile = TextIOWrapper(self.rawImportFile,
                            "utf-8", "replace")

                line = self.importFile.readline()
                if line.startswith("#!"):
                    # Skip initial line with #! to allow execution as shell script
                    line = self.importFile.readline()

                if not line.startswith("Multipage text format "):
                    raise ImportException(
                            _("Bad file format, header not detected"))

                # Following in the format identifier line is a version number
                # of the file format
                self.formatVer = int(line[22:-1])
                
                if self.formatVer > 1:
                    raise ImportException(
                            _("File format number %i is not supported") %
                            self.formatVer)

                # Next is the separator line
                line = self.importFile.readline()
                if not line.startswith("Separator: "):
                    raise ImportException(
                            _("Bad file format, header not detected"))

                self.separator = line[11:]
                
                startPos = self.importFile.tell()

                if self.formatVer == 0:
                    self._doImportVer0()
                elif self.formatVer == 1:
                    # Create temporary database. It is mainly filled during
                    # pass 1 to check for validity and other things before
                    # actual importing in pass 2
                    
                    # TODO Respect settings for general temp location!!!
                    self.tempDb = ConnectWrapSyncCommit(sqlite3.connect(""))
                    try:            # TODO: Remove column "collisionWithPresent", seems to be unused
                        self.tempDb.execSql("create table entries("
                                "unifName text primary key not null, "   # Unified name in import file
                                "seen integer not null default 0, "   # data really exists
                                "dontImport integer not null default 0, "   # don't import this (set between pass 1 and 2)
                                "missingDep integer not null default 0, "  # missing dependency(ies)
                                "importVersionData integer not null default 0, "  # versioning data present
    #                             "neededBy text default '',"
    #                             "versionContentDifferencing text default '',"

                                "collisionWithPresent text not null default '',"  # Unif. name of present entry which collides with imported one (if any)
                                "renameImportTo text not null default ''," # Rename imported element to (if at all)
                                "renamePresentTo text not null default ''"  # Rename present element in  database to (if at all)
                                ");"
                                )
    
                        # Dependencies. If unifName isn't imported (or faulty), neededBy shouldn't be either
                        self.tempDb.execSql("create table depgraph("
                                "unifName text not null default '',"
                                "neededBy text not null default '',"
                                "constraint depgraphpk primary key (unifName, neededBy)"
                                ");"
                                )

                        # Recursive processing is not supported for this table
                        self.tempDb.execSql("create table renamegraph("
                                "unifName text not null default '',"
                                "dependent text not null default '',"
                                "constraint renamegraphpk primary key (unifName, dependent),"
                                "constraint renamegraphsingledep unique (dependent)"
                                ");"
                                )


                        # Collect some initial information into the temporary database
                        self._doImportVer1Pass1()
    
                        # Draw some logical conclusions on the temp db
                        self._markMissingDependencies()
                        self._markHasVersionData()
                        self._markCollision()

                        # Now ask user if necessary
                        if showImportTableAlways or self._isUserNeeded():
                            if not self._doUserDecision():
                                # Canceled by user
                                return False

                        # Further logical processing after possible user editing
                        self._markNonImportedVersionsData()
                        self._markNonImportedDependencies()
                        self._propagateRenames()
                        # TODO: Remove version data without ver. overview or main data

                        # Back to start of import file and import according to settings 
                        # in temp db
                        self.importFile.seek(startPos)
                        self._doImportVer1Pass2()
                        
                        return True
                    finally:
                        self.tempDb.close()
                        self.tempDb = None

            except ImportException:
                raise
            except Exception as e:
                traceback.print_exc()
                raise ImportException(str(e))

        finally:
            self.importFile.close()


    def _markMissingDependencies(self):
        """
        If a datablock wasn't present, all dependent data blocks are marked as
        not to import
        """
        while True:
            self.tempDb.execSql("""
                update entries set missingDep=1, dontImport=1 where (not missingDep) and 
                    unifName in (select depgraph.neededBy from depgraph inner join 
                    entries on depgraph.unifName = entries.unifName where
                    (not entries.seen) or entries.missingDep);
                """)

            if self.tempDb.rowcount == 0:
                break


    def _markHasVersionData(self):
        """
        Mark if version data present
        """
        self.tempDb.execSql("""
            update entries set importVersionData=1 where (not importVersionData) and 
                unifName in (select substr(unifName, 21) from entries where 
                unifName glob 'versioning/overview/*' and not dontImport)
            """)  # TODO Take missing deps into account here?

#             self.tempDb.execSql("insert or replace into entries(unifName, importVersionData) "
#                 "values (?, 1)", (depunifName,))


    def _markCollision(self):
        """
        Mark collisions between existing and data blocks and such to import
        """
        # First find collisions with wiki words
        for wikipageUnifName in self.tempDb.execSqlQuerySingleColumn(
                "select unifName from entries where unifName glob 'wikipage/*' "
                "and not dontImport"):
            wpName = wikipageUnifName[9:]
        
            if not self.wikiDocument.isDefinedWikiPageName(wpName):
                continue

            self.tempDb.execSql("update entries set collisionWithPresent = ? "
                    "where unifName = ?",
                    (wikipageUnifName, wikipageUnifName))
#                     (u"wikipage/" + collisionWithPresent, wikipageUnifName))

        # Then find other collisions (saved searches etc.)
        for unifName in self.tempDb.execSqlQuerySingleColumn(
                "select unifName from entries where (unifName glob 'savedsearch/*' "
                "or unifName glob 'savedpagesearch/*') and not dontImport"):
            if self.wikiDocument.hasDataBlock(unifName):
                self.tempDb.execSql("update entries set collisionWithPresent = ? "
                        "where unifName = ?", (unifName, unifName))


    def _markNonImportedVersionsData(self):
        """
        After user dialog: If importVersionData is false for some entries
        the depending version data shouldn't be imported.
        Only the versioning overview is marked for not importing. The next step
        propagates this to the other data blocks
        """
        self.tempDb.execSql("""
                update entries set dontImport = 1 where 
                unifName in (select 'versioning/overview/' || unifName from 
                entries where not importVersionData)
                """)

#         # Vice versa the importVersionData column must be updated if
#         self.tempDb.execSql("""
#                 update entries set importVersionData = 0 where importVersionData 
#                 and ('versioning/overview/' || unifName) in (select unifName 
#                 from entries where dontImport)
#                 """)
       


    def _markNonImportedDependencies(self):
        """
        After user dialog: If some data blocks where chosen not to import
        mark all dependent blocks to not import also (especially version data)
        """
        while True:
            self.tempDb.execSql("""
                    update entries set dontImport=1 where (not dontImport) and 
                    unifName in (select depgraph.neededBy from depgraph inner join 
                    entries on depgraph.unifName = entries.unifName where
                    entries.dontImport);
                """)

            if self.tempDb.rowcount == 0:
                break


        

    def _propagateRenames(self):
        """
        Write rename commands for imported items to all parts to import
        if some parts need renaming. Renaming of present items is not propagated.
        """
        for unifName, renImportTo in self.tempDb.execSqlQuery(
                "select unifName, renameImportTo from entries "
                "where renameImportTo != '' and not dontImport"):
            for depUnifName in self.tempDb.execSqlQuerySingleColumn(
                    "select dependent from renamegraph where unifName = ? and "
                    "dependent in (select unifName from entries where "
                    "not dontImport)", (unifName,)):
                if depUnifName.endswith(unifName):
                    newName = depUnifName[:-len(unifName)] + renImportTo

                    self.tempDb.execSql("""
                        update entries set renameImportTo=? where unifName = ?
                        """, (newName, depUnifName))


    def _doUserDecision(self):
        """
        Called to present GUI to user for deciding what to do.
        This method is overwritten for trashcan GUI.
        Returns False if user canceled operation
        """
        return MultiPageTextImporterDialog.runModal(
                self.mainControl, self.tempDb,
                self.mainControl)


    def _isUserNeeded(self):
        """
        Decide if a dialog must be shown to ask user how to proceed.
        Under some circumstances the dialog may be shown regardless of the result.
        """
        if self.tempDb.execSqlQuerySingleItem("select missingDep from entries "
                "where missingDep limit 1", default=False):
            # Missing dependency
            return True
        
        if len(self.tempDb.execSqlQuerySingleItem("select collisionWithPresent "
                "from entries where collisionWithPresent != '' limit 1",
                default="")) > 0:
            # Name collision
            return True

        # No problems found
        return False



    def _doImportVer0(self):
        """
        Import wikiwords if format version is 0.
        """
        langHelper = wx.GetApp().createWikiLanguageHelper(
                self.wikiDocument.getWikiDefaultWikiLanguage())

        while True:
            # Read next wikiword
            line = self.importFile.readline()
            if line == "":
                break

            wikiWord = line[:-1]
            errMsg = langHelper.checkForInvalidWikiWord(wikiWord,
                    self.wikiDocument)
            if errMsg:
                raise ImportException(_("Bad wiki word: %s, %s") %
                        (wikiWord, errMsg))

            content = self._collectContent()
            page = self.wikiDocument.getWikiPageNoError(wikiWord)

            page.replaceLiveText(content)


    def _doImportVer1Pass1(self):
        while True:
            tag = self.importFile.readline()
            if tag == "":
                # End of file
                break
            tag = tag[:-1]
            if tag.startswith("funcpage/"):
                self._skipContent()
            elif tag.startswith("savedsearch/"):
                self._skipContent()
            elif tag.startswith("savedpagesearch/"):
                self._skipContent()
            elif tag.startswith("wikipage/"):
                self._skipContent()
            elif tag.startswith("versioning/overview/"):
                self._doImportItemVersioningOverviewVer1Pass1(tag[20:])
            elif tag.startswith("versioning/packet/versionNo/"):
                self._skipContent()
            else:
                # Unknown tag -> Ignore until separator
                self._skipContent()
                continue

            self.tempDb.execSql("insert or replace into entries(unifName, seen) "
                    "values (?, 1)", (tag,))


    def _readHintedDatablockVer1(self):
        """
        Reads datablock and preprocesses encoding if necessary.
        Returns either (hintStrings, content) or (None, None) if either
        an unknown important hint was found or if encoding had an error.

        hintStrings is a list of hints (as unistrings) which were
        not processed by the function (therefore encoding hint is removed).
        content can be a bytestring or a unistring.
        
        If (None, None) is returned, the remaining content of the entry
        was skipped already by the function.
        """
        hintLine = self.importFile.readline()[:-1]
        hintStrings = hintLine.split("  ")
        
        resultHintStrings = []

        # Set default
        useB64 = False

        # Process hints
        for hint in hintStrings:
            if hint.startswith("important/encoding/"):
                if hint[19:] == "text":
                    useB64 = False
                elif hint[19:] == "base64":
                    useB64 = True
                else:
                    # Unknown encoding: don't read further
                    self._skipContent()
                    return None, None
            elif hint.startswith("important/"):
                # There is something important we do not understand
                self._skipContent()
                return None, None
            else:
                resultHintStrings.append(hint)

        content = self._collectContent()

        if useB64:
            try:
                content = base64BlockDecode(content)
            except TypeError:
                # base64 decoding failed
                self._skipContent()
                return None, None
        
        return (resultHintStrings, content)



    def _doImportItemVersioningOverviewVer1Pass1(self, subtag):
        hintStrings, content = self._readHintedDatablockVer1()
        if content is None:
            return

        # Always encode to UTF-8 no matter what the import file encoding is
        content = content.encode("utf-8")

        try:
            ovw = Versioning.VersionOverview(self.wikiDocument,
                    unifiedBasePageName=subtag)
            
            ovw.readOverviewFromBytes(content)
            
            ovwUnifName = ovw.getUnifiedName()
            
            self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) "
                "values (?, ?)", (subtag, ovwUnifName))

            self.tempDb.execSql("insert or replace into renamegraph(unifName, dependent) "
                "values (?, ?)", (subtag, ovwUnifName))

            for depUnifName in ovw.getDependentDataBlocks(omitSelf=True):
                # Mutual dependency between version overview and each version packet
                self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) "
                    "values (?, ?)", (depUnifName, ovwUnifName))
                self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) "
                    "values (?, ?)", (ovwUnifName, depUnifName))
                    
                self.tempDb.execSql("insert or replace into renamegraph(unifName, dependent) "
                    "values (?, ?)", (subtag, depUnifName))

#                 self.tempDb.execSql("insert or replace into entries(unifName, needed) "
#                     "values (?, 1)", (depUnifName,))

        except VersioningException:
            return


    def _doImportVer1Pass2(self):
        wikiDoc = self.wikiDocument
        
        # We have to rename present items
        # First wikipages because this automatically renames depending version data
        for pageFrom, pageTo in self.tempDb.execSqlQuery(
                """
                select substr(unifName, 10), substr(renamePresentTo, 10) 
                from entries where unifName glob 'wikipage/*' and 
                renamePresentTo glob 'wikipage/*'
                """):
            if wikiDoc.isDefinedWikiPageName(pageFrom):
                wikiDoc.renameWikiWords({pageFrom: pageTo}, Consts.ModifyText.off)
                        # TODO How to handle rename of home page?

        # Then remaining data blocks
        for oldUnifName, newUnifName in self.tempDb.execSqlQuery(
                """
                select unifName, renamePresentTo
                from entries where unifName not glob 'wikipage/*' and 
                renamePresentTo != ''
                """):
            wikiDoc.renameDataBlock(oldUnifName, newUnifName)

        # For wiki pages with versions to import, existing versions must be
        # deleted

        for wikiWord in self.tempDb.execSqlQuerySingleColumn(
                """
                select substr(unifName, 10)
                from entries where unifName glob 'wikipage/*' and 
                renameImportTo == '' and not dontImport and importVersionData
                union
                select substr(renameImportTo, 10)
                from entries where unifName glob 'wikipage/*' and 
                renameImportTo glob 'wikipage/*' and not dontImport and 
                importVersionData
                """):
            if not wikiDoc.isDefinedWikiPageName(wikiWord):
                continue

            page = wikiDoc.getWikiPage(wikiWord)
            versionOverview = page.getExistingVersionOverview()
            if versionOverview is not None:
                versionOverview.delete()


        while True:
            tag = self.importFile.readline()
            if tag == "":
                # End of file
                break
            tag = tag[:-1]  # Remove line end
            
            try:
                dontImport, renameImportTo = \
                        self.tempDb.execSqlQuery(
                        "select dontImport, renameImportTo from "
                        "entries where unifName = ?", (tag,))[0]
            except IndexError:
                # Maybe dangerous
                traceback.print_exc()
                self._skipContent()
                continue

            if dontImport:
                self._skipContent()
                continue
            
            if renameImportTo == "":
                renameImportTo = tag

            if tag.startswith("wikipage/"):
                self._importItemWikiPageVer1Pass2(renameImportTo[9:])
            elif tag.startswith("funcpage/"):
                self._importItemFuncPageVer1Pass2(tag[9:])
            elif tag.startswith("savedsearch/"):
                self._importB64DatablockVer1Pass2(renameImportTo)
            elif tag.startswith("savedpagesearch/"):
                self._importHintedDatablockVer1Pass2(renameImportTo)
            elif tag.startswith("versioning/"):
                self._importHintedDatablockVer1Pass2(renameImportTo)
            else:
                # Unknown tag -> Ignore until separator
                self._skipContent()

        
        for wikiWord in self.tempDb.execSqlQuerySingleColumn(
                """
                select substr(unifName, 10)
                from entries where unifName glob 'wikipage/*' and 
                renamePresentTo == '' and importVersionData
                union
                select substr(renamePresentTo, 10)
                from entries where unifName glob 'wikipage/*' and 
                renamePresentTo glob 'wikipage/*' and importVersionData
                """):
            if not wikiDoc.isDefinedWikiPageName(wikiWord):
                continue

            page = wikiDoc.getWikiPage(wikiWord)
            versionOverview = page.getExistingVersionOverview()
            if versionOverview is not None:
                versionOverview.readOverview()





    def _importItemWikiPageVer1Pass2(self, wikiWord):
        timeStampLine = self.importFile.readline()[:-1]
        timeStrings = timeStampLine.split("  ")
        if len(timeStrings) < 3:
            traceback.print_exc()
            self._skipContent()
            return  # TODO Report error

        timeStrings = timeStrings[:3]

        try:
            timeStrings = [str(ts) for ts in timeStrings]
        except UnicodeEncodeError:
            traceback.print_exc()
            self._skipContent()
            return  # TODO Report error

        try:
            timeStamps = [timegm(time.strptime(ts, "%Y-%m-%d/%H:%M:%S"))
                    for ts in timeStrings]

        except (ValueError, OverflowError):
            traceback.print_exc()
            self._skipContent()
            return  # TODO Report error

        content = self._collectContent()
        page = self.wikiDocument.getWikiPageNoError(wikiWord)

        # TODO How to handle versions here?
        page.replaceLiveText(content)
        if page.getTxtEditor() is not None:
            page.writeToDatabase()

        page.setTimestamps(timeStamps)



    def _importItemFuncPageVer1Pass2(self, subtag):
        # The subtag is functional page tag
        try:
            # subtag is unicode but func tags are bytestrings
            subtag = str(subtag)
        except UnicodeEncodeError:
            self._skipContent()
            return

        content = self._collectContent()
        try:
            page = self.wikiDocument.getFuncPage(subtag)
            page.replaceLiveText(content)
        except BadFuncPageTagException:
            # This function tag is bad or unknown -> ignore
            return  # TODO Report error


    def _importB64DatablockVer1Pass2(self, unifName):
        # Content is base64 encoded
        b64Content = self._collectContent()
        
        try:
            datablock = base64BlockDecode(b64Content)
            self.wikiDocument.getWikiData().storeDataBlock(unifName, datablock,
                    storeHint=Consts.DATABLOCK_STOREHINT_INTERN)

        except TypeError:
            # base64 decoding failed
            return  # TODO Report error


    def _importTextDatablockVer1Pass2(self, unifName):
        content = self._collectContent()
        
        try:
            self.wikiDocument.getWikiData().storeDataBlock(unifName, content,
                    storeHint=Consts.DATABLOCK_STOREHINT_INTERN)

        except TypeError:
            return  # TODO Report error


    def _importHintedDatablockVer1Pass2(self, unifName):
        """
        A hinted datablock starts with an extra line defining encoding
        (text or B64) and storage hint. It was introduced later therefore
        only versioning packets use this while saved searches don't.
        """
        hintStrings, content = self._readHintedDatablockVer1()
        if hintStrings is None:
            return
        
        # Set defaults
        storeHint = Consts.DATABLOCK_STOREHINT_INTERN

        # Process hints
        for hint in hintStrings:
            if hint.startswith("storeHint/"):
                if hint[10:] == "extern":
                    storeHint = Consts.DATABLOCK_STOREHINT_EXTERN
                elif hint[10:] == "intern":
                    storeHint = Consts.DATABLOCK_STOREHINT_INTERN
                # No else. It is not vital to get the right storage hint

        try:
            if isinstance(content, str):
                content = BOM_UTF8 + content.encode("utf-8")

            self.wikiDocument.getWikiData().storeDataBlock(unifName, content,
                    storeHint=storeHint)

        except TypeError:
            traceback.print_exc()
            return  # TODO Report error
Пример #47
0
    stimuliParser.add_argument('--ch4', type=PortPin, required=False, help='I/O Register bit to map CH4 to.')
    stimuliParser.add_argument('-s', '--threshold', type=int, default=128, required=False, help='Logic level treshold value')
    stimuliParser.add_argument('--clkFreq', type=int, required=True, help="Simulated AVR uC clock frequency in Hz")

    actionMap = {
                    "info": info,
                    "csv": csv,
                    "plot": plot,
                    "json": json,
                    "vcd": vcd,
                    "stimuli": stimuli
                }

    args = parser.parse_args()

    try:
        with args.infile as f:
            scopeData = wfm.parseRigolWFM(f, args.forgiving)
        if isinstance(args.outfile, TextIOWrapper):
            outputFile = args.outfile
        else:
            outputFile = TextIOWrapper(args.outfile, encoding="ascii")
        actionMap[args.action](args, scopeData, outputFile)
        outputFile.close()
    except wfm.FormatError as e:
        print("Format does not follow the known file format. Try the --forgiving option.", file=sys.stderr)
        print("If you'd like to help development, please report this error:\n", file=sys.stderr)
        print(e, file=sys.stderr)
        sys.exit()

Пример #48
-1
class FileObjectPosix(object):
    """
    A file-like object that operates on non-blocking files.

    .. seealso:: :func:`gevent.os.make_nonblocking`
    """
    default_bufsize = io.DEFAULT_BUFFER_SIZE

    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :param fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file will be
            put in non-blocking mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False
        if len(mode) != 1:
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores in. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0:
            bufsize = self.default_bufsize
        if mode == 'r':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedReader(self.fileio, bufsize)
        elif mode == 'w':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedWriter(self.fileio, bufsize)
        else:
            # QQQ: not used
            self.io = BufferedRandom(self.fileio, bufsize)
        if self._translate:
            self.io = TextIOWrapper(self.io)

    @property
    def closed(self):
        """True if the file is cloed"""
        return self._closed

    def close(self):
        if self._closed:
            # make sure close() is only ran once when called concurrently
            return
        self._closed = True
        try:
            self.io.close()
            self.fileio.close()
        finally:
            self._fobj = None

    def flush(self):
        self.io.flush()

    def fileno(self):
        return self.io.fileno()

    def write(self, data):
        self.io.write(data)

    def writelines(self, lines):
        self.io.writelines(lines)

    def read(self, size=-1):
        return self.io.read(size)

    def readline(self, size=-1):
        return self.io.readline(size)

    def readlines(self, sizehint=0):
        return self.io.readlines(sizehint)

    def readable(self):
        return self.io.readable()

    def writable(self):
        return self.io.writable()

    def seek(self, *args, **kwargs):
        return self.io.seek(*args, **kwargs)

    def seekable(self):
        return self.io.seekable()

    def tell(self):
        return self.io.tell()

    def truncate(self, size=None):
        return self.io.truncate(size)

    def __iter__(self):
        return self.io

    def __getattr__(self, name):
        # XXX: Should this really be _fobj, or self.io?
        # _fobj can easily be None but io never is
        return getattr(self._fobj, name)
Пример #49
-1
class FileObjectPosix(object):
    """
    A file-like object that operates on non-blocking files but
    provides a synchronous, cooperative interface.

    .. note::
         Random read/write (e.g., ``mode='rwb'``) is not supported.
         For that, use :class:`io.BufferedRWPair` around two instance of this
         class.

    .. tip::
         Although this object provides a :meth:`fileno` method and
         so can itself be passed to :func:`fcntl.fcntl`, setting the
         :data:`os.O_NONBLOCK` flag will have no effect; likewise, removing
         that flag will cause this object to no longer be cooperative.
    """

    #: platform specific default for the *bufsize* parameter
    default_bufsize = io.DEFAULT_BUFFER_SIZE

    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :keyword fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file *will* be
            put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
        :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
            (where the "b" or "U" can be omitted).
            If "U" is part of the mode, IO will be done on text, otherwise bytes.
        :keyword int bufsize: If given, the size of the buffer to use. The default
            value means to use a platform-specific default, and a value of 0 is translated
            to a value of 1. Other values are interpreted as for the :mod:`io` package.
            Buffering is ignored in text mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False

        if len(mode) != 1 and mode not in 'rw': # pragma: no cover
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled.
            # NOTE: This is preventing a mode like 'rwb' for binary random access;
            # that code was never tested and was explicitly marked as "not used"
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0 or bufsize == 1:
            bufsize = self.default_bufsize
        elif bufsize == 0:
            bufsize = 1

        if mode == 'r':
            self.io = BufferedReader(self.fileio, bufsize)
        else:
            assert mode == 'w'
            self.io = BufferedWriter(self.fileio, bufsize)
        #else: # QQQ: not used, not reachable
        #
        #    self.io = BufferedRandom(self.fileio, bufsize)

        if self._translate:
            self.io = TextIOWrapper(self.io)

    @property
    def closed(self):
        """True if the file is cloed"""
        return self._closed

    def close(self):
        if self._closed:
            # make sure close() is only run once when called concurrently
            return
        self._closed = True
        try:
            self.io.close()
            self.fileio.close()
        finally:
            self._fobj = None

    def flush(self):
        self.io.flush()

    def fileno(self):
        return self.io.fileno()

    def write(self, data):
        self.io.write(data)

    def writelines(self, lines):
        self.io.writelines(lines)

    def read(self, size=-1):
        return self.io.read(size)

    def readline(self, size=-1):
        return self.io.readline(size)

    def readlines(self, sizehint=0):
        return self.io.readlines(sizehint)

    def readable(self):
        return self.io.readable()

    def writable(self):
        return self.io.writable()

    def seek(self, *args, **kwargs):
        return self.io.seek(*args, **kwargs)

    def seekable(self):
        return self.io.seekable()

    def tell(self):
        return self.io.tell()

    def truncate(self, size=None):
        return self.io.truncate(size)

    def __iter__(self):
        return self.io

    def __getattr__(self, name):
        # XXX: Should this really be _fobj, or self.io?
        # _fobj can easily be None but io never is
        return getattr(self._fobj, name)
Пример #50
-1
class FileObjectPosix:
    default_bufsize = io.DEFAULT_BUFFER_SIZE

    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False
        assert len(mode) == 1, 'mode can only be [rb, rU, wb]'

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0:
            bufsize = self.default_bufsize
        if mode == 'r':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedReader(self.fileio, bufsize)
        elif mode == 'w':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedWriter(self.fileio, bufsize)
        else:
            # QQQ: not used
            self.io = BufferedRandom(self.fileio, bufsize)
        if self._translate:
            self.io = TextIOWrapper(self.io)

    @property
    def closed(self):
        """True if the file is cloed"""
        return self._closed

    def close(self):
        if self._closed:
            # make sure close() is only ran once when called concurrently
            return
        self._closed = True
        try:
            self.io.close()
            self.fileio.close()
        finally:
            self._fobj = None

    def flush(self):
        self.io.flush()

    def fileno(self):
        return self.io.fileno()

    def write(self, data):
        self.io.write(data)

    def writelines(self, list):
        self.io.writelines(list)

    def read(self, size=-1):
        return self.io.read(size)

    def readline(self, size=-1):
        return self.io.readline(size)

    def readlines(self, sizehint=0):
        return self.io.readlines(sizehint)

    def __iter__(self):
        return self.io
Пример #51
-1
class FileObjectPosix(object):
    """
    A file-like object that operates on non-blocking files.

    .. seealso:: :func:`gevent.os.make_nonblocking`
    """
    default_bufsize = io.DEFAULT_BUFFER_SIZE

    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :param fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file will be
            put in non-blocking mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False
        assert len(mode) == 1, 'mode can only be [rb, rU, wb]'

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0:
            bufsize = self.default_bufsize
        if mode == 'r':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedReader(self.fileio, bufsize)
        elif mode == 'w':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedWriter(self.fileio, bufsize)
        else:
            # QQQ: not used
            self.io = BufferedRandom(self.fileio, bufsize)
        if self._translate:
            self.io = TextIOWrapper(self.io)

    @property
    def closed(self):
        """True if the file is cloed"""
        return self._closed

    def close(self):
        if self._closed:
            # make sure close() is only ran once when called concurrently
            return
        self._closed = True
        try:
            self.io.close()
            self.fileio.close()
        finally:
            self._fobj = None

    def flush(self):
        self.io.flush()

    def fileno(self):
        return self.io.fileno()

    def write(self, data):
        self.io.write(data)

    def writelines(self, lines):
        self.io.writelines(lines)

    def read(self, size=-1):
        return self.io.read(size)

    def readline(self, size=-1):
        return self.io.readline(size)

    def readlines(self, sizehint=0):
        return self.io.readlines(sizehint)

    def seek(self, *args, **kwargs):
        return self.io.seek(*args, **kwargs)

    def seekable(self):
        return self.io.seekable()

    def tell(self):
        return self.io.tell()

    def truncate(self, size=None):
        return self.io.truncate(size)

    def __iter__(self):
        return self.io

    def __getattr__(self, name):
        return getattr(self._fobj, name)
class FileObjectPosix(object):
    """
    A file-like object that operates on non-blocking files but
    provides a synchronous, cooperative interface.

    .. caution::
         This object is most effective wrapping files that can be used appropriately
         with :func:`select.select` such as sockets and pipes.

         In general, on most platforms, operations on regular files
         (e.g., ``open('/etc/hosts')``) are considered non-blocking
         already, even though they can take some time to complete as
         data is copied to the kernel and flushed to disk (this time
         is relatively bounded compared to sockets or pipes, though).
         A :func:`~os.read` or :func:`~os.write` call on such a file
         will still effectively block for some small period of time.
         Therefore, wrapping this class around a regular file is
         unlikely to make IO gevent-friendly: reading or writing large
         amounts of data could still block the event loop.

         If you'll be working with regular files and doing IO in large
         chunks, you may consider using
         :class:`~gevent.fileobject.FileObjectThread` or
         :func:`~gevent.os.tp_read` and :func:`~gevent.os.tp_write` to bypass this
         concern.

    .. note::
         Random read/write (e.g., ``mode='rwb'``) is not supported.
         For that, use :class:`io.BufferedRWPair` around two instance of this
         class.

    .. tip::
         Although this object provides a :meth:`fileno` method and
         so can itself be passed to :func:`fcntl.fcntl`, setting the
         :data:`os.O_NONBLOCK` flag will have no effect; however, removing
         that flag will cause this object to no longer be cooperative.

    .. versionchanged:: 1.1
       Now uses the :mod:`io` package internally. Under Python 2, previously
       used the undocumented class :class:`socket._fileobject`. This provides
       better file-like semantics (and portability to Python 3).
    """

    #: platform specific default for the *bufsize* parameter
    default_bufsize = io.DEFAULT_BUFFER_SIZE

    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :keyword fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file *will* be
            put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
        :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
            (where the "b" or "U" can be omitted).
            If "U" is part of the mode, IO will be done on text, otherwise bytes.
        :keyword int bufsize: If given, the size of the buffer to use. The default
            value means to use a platform-specific default, and a value of 0 is translated
            to a value of 1. Other values are interpreted as for the :mod:`io` package.
            Buffering is ignored in text mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False

        if len(mode) != 1 and mode not in 'rw': # pragma: no cover
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled.
            # NOTE: This is preventing a mode like 'rwb' for binary random access;
            # that code was never tested and was explicitly marked as "not used"
            raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0 or bufsize == 1:
            bufsize = self.default_bufsize
        elif bufsize == 0:
            bufsize = 1

        if mode == 'r':
            self.io = BufferedReader(self.fileio, bufsize)
        else:
            assert mode == 'w'
            self.io = BufferedWriter(self.fileio, bufsize)
        #else: # QQQ: not used, not reachable
        #
        #    self.io = BufferedRandom(self.fileio, bufsize)

        if self._translate:
            self.io = TextIOWrapper(self.io)

    @property
    def closed(self):
        """True if the file is closed"""
        return self._closed

    def close(self):
        if self._closed:
            # make sure close() is only run once when called concurrently
            return
        self._closed = True
        try:
            self.io.close()
            self.fileio.close()
        finally:
            self._fobj = None

    def flush(self):
        self.io.flush()

    def fileno(self):
        return self.io.fileno()

    def write(self, data):
        self.io.write(data)

    def writelines(self, lines):
        self.io.writelines(lines)

    def read(self, size=-1):
        return self.io.read(size)

    def readline(self, size=-1):
        return self.io.readline(size)

    def readlines(self, sizehint=0):
        return self.io.readlines(sizehint)

    def readable(self):
        """
        .. versionadded:: 1.1b2
        """
        return self.io.readable()

    def writable(self):
        """
        .. versionadded:: 1.1b2
        """
        return self.io.writable()

    def seek(self, *args, **kwargs):
        return self.io.seek(*args, **kwargs)

    def seekable(self):
        return self.io.seekable()

    def tell(self):
        return self.io.tell()

    def truncate(self, size=None):
        return self.io.truncate(size)

    def __iter__(self):
        return self.io

    def __getattr__(self, name):
        # XXX: Should this really be _fobj, or self.io?
        # _fobj can easily be None but io never is
        return getattr(self._fobj, name)