示例#1
0
 def _do_execute_direct(self, code):
     shell = builtins.__xonsh_shell__
     env = builtins.__xonsh_env__
     out = io.StringIO()
     err = io.StringIO()
     enc = env.get('XONSH_ENCODING')
     out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                encoding=enc, newline='\n')
     err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                encoding=enc, newline='\n')
     try:
         with redirect_stdout(out), redirect_stderr(err), \
              swap(builtins, '__xonsh_stdout_uncaptured__', out), \
              swap(builtins, '__xonsh_stderr_uncaptured__', err), \
              env.swap({'XONSH_STORE_STDOUT': False}):
             shell.default(code)
         interrupted = False
     except KeyboardInterrupt:
         interrupted = True
     output, error = '', ''
     if out.tell() > 0:
         out.seek(0)
         output = out.read()
     if err.tell() > 0:
         err.seek(0)
         error = err.read()
     out.close()
     err.close()
     return output, error, interrupted
示例#2
0
    def do_execute(self, code, silent, store_history=True, user_expressions=None,
                   allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {'status': 'ok', 'execution_count': self.execution_count,
                    'payload': [], 'user_expressions': {}}
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get('XONSH_ENCODING')
        out = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        err = SpooledTemporaryFile(max_size=MAX_SIZE, mode='w+t',
                                   encoding=enc, newline='\n')
        try:
            with redirect_stdout(out), redirect_stderr(err), \
                 swap(builtins, '__xonsh_stdout_uncaptured__', out), \
                 swap(builtins, '__xonsh_stderr_uncaptured__', err), \
                 env.swap({'XONSH_STORE_STDOUT': False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks('stdout', out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks('stderr', err.read())
            if hasattr(builtins, '_') and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks('stdout', pformat(builtins._))
                builtins._ = None
            if len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks('stdout', hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {'status': 'abort', 'execution_count': self.execution_count}

        rtn = 0 if len(hist) == 0 else hist.rtns[-1]
        if 0 < rtn:
            message = {'status': 'error', 'execution_count': self.execution_count,
                       'ename': '', 'evalue': str(rtn), 'traceback': []}
        else:
            message = {'status': 'ok', 'execution_count': self.execution_count,
                       'payload': [], 'user_expressions': {}}
        return message
示例#3
0
    def do_execute(self, code, silent, store_history=True, user_expressions=None, allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {"status": "ok", "execution_count": self.execution_count, "payload": [], "user_expressions": {}}
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get("XONSH_ENCODING")
        out = SpooledTemporaryFile(max_size=MAX_SIZE, mode="w+t", encoding=enc, newline="\n")
        err = SpooledTemporaryFile(max_size=MAX_SIZE, mode="w+t", encoding=enc, newline="\n")
        try:
            with redirect_stdout(out), redirect_stderr(err), swap(builtins, "__xonsh_stdout_uncaptured__", out), swap(
                builtins, "__xonsh_stderr_uncaptured__", err
            ), env.swap({"XONSH_STORE_STDOUT": False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks("stdout", out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks("stderr", err.read())
            if hasattr(builtins, "_") and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks("stdout", pformat(builtins._))
                builtins._ = None
            if len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks("stdout", hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {"status": "abort", "execution_count": self.execution_count}

        rtn = 0 if len(hist) == 0 else hist.rtns[-1]
        if 0 < rtn:
            message = {
                "status": "error",
                "execution_count": self.execution_count,
                "ename": "",
                "evalue": str(rtn),
                "traceback": [],
            }
        else:
            message = {"status": "ok", "execution_count": self.execution_count, "payload": [], "user_expressions": {}}
        return message
示例#4
0
    def graph(self):
        if self.code is '':
            return None
        
        stdin = SpooledTemporaryFile()
        stdout = SpooledTemporaryFile()

        stdin.write('@startuml\n')
        stdin.write(self.code)
        stdin.write('@enduml\n')
        
        stdin.seek(0)

        args = [
           self.java,
           '-jar',
           self.jar,
           '-p',
           '-tdot',
        ]

        p = Popen(args, stdin=stdin, stdout=stdout)

        if p.wait() != 0:
            return None

        stdout.seek(0)
        graph = stdout.read()
        return graph_from_dot_data(graph)
示例#5
0
    def run_command(self, command, stdin=None, env=None):
        """
        Launch a shell command line.

        :param command: Command line to launch
        :type command: str
        :param stdin: Standard input of command
        :type stdin: file
        :param env: Environment variable used in command
        :type env: dict
        :return: Standard output of command
        :rtype: file
        """
        cmd = shlex.split(command)
        stdout = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        stderr = SpooledTemporaryFile(max_size=settings.TMP_FILE_MAX_SIZE,
                                      dir=settings.TMP_DIR)
        full_env = self.env.copy()
        full_env.update(env or {})
        try:
            process = Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr,
                            env=full_env)
            process.wait()
            if process.poll():
                stderr.seek(0)
                raise exceptions.CommandConnectorError(
                    "Error running: {}\n{}".format(command, stderr.read()))
            stdout.seek(0)
            stderr.seek(0)
            return stdout, stderr
        except OSError as err:
            raise exceptions.CommandConnectorError(
                "Error running: {}\n{}".format(command, str(err)))
示例#6
0
class InputStream(object):
    """
    FCGI_STDIN or FCGI_DATA stream.
    Uses temporary file to store received data once max_mem bytes
    have been received.
    """
    def __init__(self, max_mem=1024):
        self._file = SpooledTemporaryFile(max_mem)
        self._eof_received = Event()

    def feed(self, data):
        if self._eof_received.is_set():
            raise IOError('Feeding file beyond EOF mark')
        if not data:  # EOF mark
            self._file.seek(0)
            self._eof_received.set()
        else:
            self._file.write(data)

    def __iter__(self):
        self._eof_received.wait()
        return iter(self._file)

    def read(self, size=-1):
        self._eof_received.wait()
        return self._file.read(size)

    def readlines(self, sizehint=0):
        self._eof_received.wait()
        return self._file.readlines(sizehint)

    @property
    def eof_received(self):
        return self._eof_received.is_set()
示例#7
0
文件: we.py 项目: FomkaV/wifi-arsenal
	def _shell_command(self, cmd):
		"""Shell out a subprocess and return what it writes to stdout as a string"""
		in_mem_file = SpooledTemporaryFile(max_size=2048, mode="r+")
		check_call(cmd, shell=True, stdout=in_mem_file)
		in_mem_file.seek(0)
		stdout = in_mem_file.read()
		in_mem_file.close()
		return stdout
示例#8
0
def decode_bytes(bytes):
    temp = SpooledTemporaryFile()
    string = ""
    for byte in bytes:
        string = string + chr(byte)
    temp.write(string)
    temp.seek(0)  # work around a stupid python bug
    return Decode(0, temp.read(), Decode64Bits)
示例#9
0
def image(type, spec=' ', ext='png'):

    # Parameters for `suml`.
    import suml.common
    import optparse
    options = optparse.Values(({
        'scruffy': True,
        'png': ext == 'png',
        'svg': ext == 'svg' or ext == 'pdf',
        'font': os.getenv('SCRUFFY_FONT', suml.common.defaultScruffyFont()),
        'shadow': False,
    }))

    from tempfile import SpooledTemporaryFile
    fout = SpooledTemporaryFile()

    # Execute Scruffy `suml`.
    if type == 'class':
        suml.yuml2dot.transform(spec, fout, options)
    elif type == 'sequence':
        suml.suml2pic.transform(spec, fout, options)
    else:
        return HTTPError(404, 'Unhandled diagram type.')

    # Retrieve the data generated.
    fout.seek(0)
    data = fout.read()
    fout.close()

    # Convert SVG to PDF?
    if ext == 'pdf':
        # Load SVG file.
        doc = xml.dom.expatbuilder.parseString(data)

        # Convert to a RLG drawing
        svg_renderer = svglib.svglib.SvgRenderer()
        svg_renderer.render(doc.documentElement)
        drawing = svg_renderer.finish()

        # Generate PDF.
        data = reportlab.graphics.renderPDF.drawToString(drawing)

    # Server the generated image.
    if ext == 'png':
        response.content_type = 'image/png'
    elif ext == 'svg':
        response.content_type = 'image/svg+xml'
    elif ext == 'pdf':
        response.content_type = 'application/pdf'
    else:
        return HTTPError(500, 'Unhandled extension type.')
    return data
示例#10
0
class StartFinish(object):
    def __init__(self):
        pass

    def start(self, args):
        self.outFile = SpooledTemporaryFile()
        self.errFile = SpooledTemporaryFile()
        self.cmdline = list2cmdline(args)
        print 'starting: ' + self.cmdline
        self.process = Popen(args,
            stderr=self.errFile, stdout=self.outFile, universal_newlines=False)
        self.process_start = time()

    def finish(self, timeout):
        print 'finishing "' + self.cmdline + '"'
        kill_time = self.process_start + timeout
        self.process_end = time()
        while self.process.poll() is None:
            self.process_end = time()
            if self.process_end < kill_time:
                sleep(0.1)
            else:
                self.process.kill()
                raise Exception('timeout "' + self.cmdline + '"')
        # read temp streams from start
        self.outFile.seek(0)
        self.errFile.seek(0)
        self.out = self.outFile.read()
        self.err = self.errFile.read()
        self.outFile.close()
        self.errFile.close()
        print 'out:', self.out
        print 'err:', self.err
        print 'returncode:', self.process.returncode
        print 'elapsed:', self.process_end - self.process_start
        return self.process.returncode
示例#11
0
 def csv_fn(pydata):
     csvfile=SpooledTemporaryFile()
     csvw=csv.writer(csvfile)
     fieldnames=[]
     for h in pydata:
         for k in pydata[h]:
             if k not in fieldnames:
                 fieldnames.append(k)
     if showheader:
         csvw.writerow(['system_name'] + fieldnames)
     for system_name in pydata:
         csvw.writerow([system_name] + [pydata[system_name].get(k, None) for k in fieldnames])
     csvfile.seek(0)
     results=csvfile.read()
     csvfile.close()
     return results
示例#12
0
文件: util.py 项目: certik/lab-dev
def run(args, timeout = 10):
    '''
    Run a command with a timeout after which it will be forcibly
    killed.
    '''
    out_file = SpooledTemporaryFile()
    p = Popen(args, shell=True, stdout=out_file, stderr=STDOUT)
    wait_remaining_sec = timeout
    while p.poll() is None and wait_remaining_sec > 0:
        sleep(1)
        wait_remaining_sec -= 1
    if wait_remaining_sec <= 0:
        kill_proc(p.pid)
        r = -9
    else:
        r = p.returncode
    out_file.seek(0)
    out = out_file.read()
    return out, r
示例#13
0
class LLDB:
  def _parseStackTrace(self, jibberish):
    not_jibberish = re.findall(r'\(lldb\) bt(.*)\(lldb\)', jibberish, re.DOTALL)
    if len(not_jibberish) != 0:
      return not_jibberish[0].replace(' frame ', '')
    else:
      return 'Stack Trace failed:', jibberish

  def _waitForResponse(self, wait=True):
    while wait:
      self.lldb_stdout.seek(self.last_position)
      for line in self.lldb_stdout:
        if line == '(lldb) ':
          self.last_position = self.lldb_stdout.tell()
          return True
      time.sleep(0.05)
    time.sleep(0.05)
    return True

  def getStackTrace(self, pid):
    lldb_commands = [ 'attach -p ' + pid + '\n', 'bt\n', 'quit\n', 'Y\n' ]
    self.lldb_stdout = SpooledTemporaryFile()
    self.last_position = 0
    lldb_process = subprocess.Popen(['lldb', '-x'], stdin=subprocess.PIPE, stdout=self.lldb_stdout, stderr=self.lldb_stdout)
    while lldb_process.poll() == None:
      for command in lldb_commands:
        if command == lldb_commands[-1]:
          lldb_commands = []
          if self._waitForResponse(False):
            # I have seen LLDB exit out from under us
            try:
              lldb_process.stdin.write(command)
            except:
              pass
        elif self._waitForResponse():
          lldb_process.stdin.write(command)
    self.lldb_stdout.seek(0)
    stack_trace = self._parseStackTrace(self.lldb_stdout.read())
    self.lldb_stdout.close()
    return stack_trace
示例#14
0
class GDB:
  def _parseStackTrace(self, jibberish):
    not_jibberish = re.findall(r'\(gdb\) (#.*)\(gdb\)', jibberish, re.DOTALL)
    if len(not_jibberish) != 0:
      return not_jibberish[0]
    else:
      return 'Stack Trace failed:', jibberish

  def _waitForResponse(self, wait=True):
    while wait:
      self.gdb_stdout.seek(self.last_position)
      for line in self.gdb_stdout:
        if line == '(gdb) ':
          self.last_position = self.gdb_stdout.tell()
          return True
      time.sleep(0.05)
    time.sleep(0.05)
    return True

  def getStackTrace(self, pid):
    gdb_commands = [ 'attach ' + pid + '\n', 'set verbose off\n', 'thread\n', 'apply\n', 'all\n', 'bt\n', 'quit\n', 'y\n' ]
    self.gdb_stdout = SpooledTemporaryFile()
    self.last_position = 0
    gdb_process = subprocess.Popen(['gdb', '-nx'], stdin=subprocess.PIPE, stdout=self.gdb_stdout, stderr=self.gdb_stdout)
    while gdb_process.poll() == None:
      for command in gdb_commands:
        if command == gdb_commands[-1]:
          gdb_commands = []
        elif self._waitForResponse():
          # I have seen GDB exit out from under us
          try:
            gdb_process.stdin.write(command)
          except:
            pass
    self.gdb_stdout.seek(0)
    stack_trace = self._parseStackTrace(self.gdb_stdout.read())
    self.gdb_stdout.close()
    return stack_trace
示例#15
0
def load_stix(stix):
    # Just save the pain and load it if the first character is a <
    log.debug("Loading STIX...")
    if sys.version_info < (3, 5):
        json_exception = ValueError
    else:
        json_exception = json.JSONDecodeError

    if isinstance(stix, STIXPackage):
        log.debug("Argument was already STIX package, ignoring.")
        # Oh cool we're ok
        # Who tried to load this? Honestly.
        return stix

    elif hasattr(stix, 'read'):
        log.debug("Argument has 'read' attribute, assuming file-like.")

        # It's a file!
        # But somehow, sometimes, reading it returns a bytes stream
        # and the loader dies on python 3.4.
        # Luckily, STIXPackage.from_json (which is mixbox.Entity.from_json)
        # will happily load a string.
        # So we're going to play dirty.

        data = stix.read()
        log.debug("Read file, type %s.", type(data))

        if isinstance(data, bytes):
            data = data.decode()
        try:
            log.debug("Attempting to load from JSON...")
            # Try loading from JSON
            stix_package = STIXPackage.from_json(data)
        except json_exception:
            log.debug("Attempting to load from XML...")
            # Ok then try loading from XML
            # Loop zoop
            # Read the STIX into an Etree
            stix.seek(0)
            stix_xml = etree.fromstring(stix.read())

            ns_map = stix_xml.nsmap

            # Remove any "marking" sections because the US-Cert is evil
            log.debug("Removing Marking elements...")
            pattern = ".//{http://data-marking.mitre.org/Marking-1}Marking"
            for element in stix_xml.findall(pattern):
                element.getparent().remove(element)

            log.debug("Writing cleaned XML to Tempfile")
            f = SpooledTemporaryFile(max_size=10 * 1024)
            f.write(etree.tostring(stix_xml))
            f.seek(0)

            # Pray to anything you hold sacred
            ns_objmap = map(lambda x: Namespace(ns_map[x], x), ns_map)

            for ns in ns_objmap:
                log.debug("Trying to add namespace %s", ns)
                try:
                    nsparser.STIX_NAMESPACES.add_namespace(ns)
                    mixbox.namespaces.register_namespace(ns)
                except Exception as ex:
                    log.exception(ex)
            try:
                log.debug("Attempting to read clean XML into STIX...")
                stix_package = STIXPackage.from_xml(f)
            except Exception as ex:
                # No joy. Quit.
                log.fatal("Could not :<")
                log.exception(ex)
                f.seek(0)
                with open("FAILED_STIX.xml", "wb") as g:
                    g.write(f.read())
                raise STIXLoadError("Could not load stix file. {}".format(ex))

        return stix_package

    elif isinstance(stix, (str, bytes)):
        if isinstance(stix, bytes):
            stix = stix.decode()

        # It's text, we'll need to use a temporary file

        # Create a temporary file to load from
        # Y'know I should probably give it a max size before jumping to disk
        # idk, 10MB? Sounds reasonable.
        f = SpooledTemporaryFile(max_size=10 * 1024)

        # O I have idea for sneak
        # Will be very sneak
        # Write the (probably) XML to file
        f.write(stix.encode("utf-8"))

        # Reset the file so we can read from it
        f.seek(0)

        # AHA SNEAK DIDN'T EXPECT RECURSION DID YOU
        return load_stix(f)
示例#16
0
class Buffer(FileWrapper):
    """Class implementing buffering of input and output streams.
    
    This class uses a separate buffer file to hold the contents of the
    underlying file while they are being manipulated.  As data is read
    it is duplicated into the buffer, and data is written from the buffer
    back to the file on close.
    """

    def __init__(self, fileobj, mode=None, max_size_in_memory=1024 * 8):
        """Buffered file wrapper constructor."""
        self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory)
        self._in_eof = False
        self._in_pos = 0
        self._was_truncated = False
        super(Buffer, self).__init__(fileobj, mode)

    def _buffer_size(self):
        try:
            return len(self._buffer.file.getvalue())
        except AttributeError:
            return os.fstat(self._buffer.fileno()).st_size

    def _buffer_chunks(self):
        chunk = self._buffer.read(16 * 1024)
        if chunk == "":
            yield chunk
        else:
            while chunk != "":
                yield chunk
                chunk = self._buffer.read(16 * 1024)

    def _write_out_buffer(self):
        if self._check_mode("r"):
            self._read_rest()
            if "a" in self.mode:
                self._buffer.seek(self._in_pos)
                self._fileobj.seek(self._in_pos)
            else:
                self._fileobj.seek(0)
                self._buffer.seek(0)
        else:
            self._buffer.seek(0)
        if self._was_truncated:
            self._fileobj.truncate(0)
            self._was_truncated = False
        for chunk in self._buffer_chunks():
            self._fileobj.write(chunk)

    def flush(self):
        # flush the buffer; we only write to the underlying file on close
        self._buffer.flush()

    def close(self):
        if self.closed:
            return
        if self._check_mode("w"):
            self._write_out_buffer()
        super(Buffer, self).close()
        self._buffer.close()

    def _read(self, sizehint=-1):
        #  First return any data available from the buffer.
        #  Since we don't flush the buffer after every write, certain OSes
        #  (guess which!) will happily read junk data from the end of it.
        #  Instead, we explicitly read only up to self._in_pos.
        if not self._in_eof:
            buffered_size = self._in_pos - self._buffer.tell()
            if sizehint >= 0:
                buffered_size = min(sizehint, buffered_size)
        else:
            buffered_size = sizehint
        data = self._buffer.read(buffered_size)
        if data != "":
            return data
        # Then look for more data in the underlying file
        if self._in_eof:
            return None
        data = self._fileobj.read(sizehint)
        self._in_pos += len(data)
        self._buffer.write(data)
        if sizehint < 0 or len(data) < sizehint:
            self._in_eof = True
            self._buffer.flush()
        return data

    def _write(self, data, flushing=False):
        self._buffer.write(data)
        if self._check_mode("r") and not self._in_eof:
            diff = self._buffer.tell() - self._in_pos
            if diff > 0:
                junk = self._fileobj.read(diff)
                self._in_pos += len(junk)
                if len(junk) < diff:
                    self._in_eof = True
                    self._buffer.flush()

    def _seek(self, offset, whence):
        # Ensure we've read enough to simply do the seek on the buffer
        if self._check_mode("r") and not self._in_eof:
            if whence == 0:
                if offset > self._in_pos:
                    self._read_rest()
            if whence == 1:
                if self._buffer.tell() + offset > self._in_pos:
                    self._read_rest()
            if whence == 2:
                self._read_rest()
        # Then just do it on the buffer...
        self._buffer.seek(offset, whence)

    def _tell(self):
        return self._buffer.tell()

    def _truncate(self, size):
        if self._check_mode("r") and not self._in_eof:
            if size > self._in_pos:
                self._read_rest()
        self._in_eof = True
        try:
            self._buffer.truncate(size)
        except TypeError:
            et, ev, tb = sys.exc_info()
            # SpooledTemporaryFile.truncate() doesn't accept size paramter.
            try:
                self._buffer._file.truncate(size)
            except Exception:
                raise et, ev, tb
        # StringIO objects don't truncate to larger size correctly.
        if hasattr(self._buffer, "_file"):
            _file = self._buffer._file
            if hasattr(_file, "getvalue"):
                if len(_file.getvalue()) != size:
                    curpos = _file.tell()
                    _file.seek(0, 2)
                    _file.write("\x00" * (size - len(_file.getvalue())))
                    _file.seek(curpos)
        self._was_truncated = True

    def _read_rest(self):
        """Read the rest of the input stream."""
        if self._in_eof:
            return
        pos = self._buffer.tell()
        self._buffer.seek(0, 2)
        data = self._fileobj.read(self._bufsize)
        while data:
            self._in_pos += len(data)
            self._buffer.write(data)
            data = self._fileobj.read(self._bufsize)
        self._in_eof = True
        self._buffer.flush()
        self._buffer.seek(pos)
示例#17
0
class UploadFile:
    """
    An uploaded file included as part of the request data.
    """

    __slots__ = ("filename", "content_type", "file")

    spool_max_size = 1024 * 1024

    def __init__(self, filename: str, content_type: str = "") -> None:
        self.filename = filename
        self.content_type = content_type
        self.file = SpooledTemporaryFile(max_size=self.spool_max_size,
                                         mode="w+b")

    @property
    def in_memory(self) -> bool:
        rolled_to_disk = getattr(self.file, "_rolled", True)
        return not rolled_to_disk

    def write(self, data: bytes) -> None:
        self.file.write(data)

    async def awrite(self, data: bytes) -> None:
        if self.in_memory:
            self.write(data)
        else:
            await asyncio.get_event_loop().run_in_executor(
                None, self.write, data)

    def read(self, size: int = -1) -> bytes:
        return self.file.read(size)

    async def aread(self, size: int = -1) -> bytes:
        if self.in_memory:
            return self.read(size)
        return await asyncio.get_event_loop().run_in_executor(
            None, self.read, size)

    def seek(self, offset: int) -> None:
        self.file.seek(offset)

    async def aseek(self, offset: int) -> None:
        if self.in_memory:
            self.seek(offset)
        else:
            await asyncio.get_event_loop().run_in_executor(
                None, self.seek, offset)

    def close(self) -> None:
        self.file.close()

    async def aclose(self) -> None:
        if self.in_memory:
            self.close()
        else:
            await asyncio.get_event_loop().run_in_executor(None, self.close)

    def save(self, filepath: str) -> None:
        """
        Save file to disk.
        """
        # from shutil.COPY_BUFSIZE
        copy_bufsize = 1024 * 1024 if os.name == "nt" else 64 * 1024
        file_position = self.file.tell()
        self.file.seek(0, 0)
        try:
            with open(filepath, "wb+") as target_file:
                source_read = self.file.read
                target_write = target_file.write
                while True:
                    buf = source_read(copy_bufsize)
                    if not buf:
                        break
                    target_write(buf)
        finally:
            self.file.seek(file_position)

    async def asave(self, filepath: str) -> None:
        """
        Save file to disk, work in threading pool.
        """
        await asyncio.get_event_loop().run_in_executor(None, self.save,
                                                       filepath)
示例#18
0
    def do_execute(self,
                   code,
                   silent,
                   store_history=True,
                   user_expressions=None,
                   allow_stdin=False):
        """Execute user code."""
        if len(code.strip()) == 0:
            return {
                'status': 'ok',
                'execution_count': self.execution_count,
                'payload': [],
                'user_expressions': {}
            }
        env = builtins.__xonsh_env__
        shell = builtins.__xonsh_shell__
        hist = builtins.__xonsh_history__
        enc = env.get('XONSH_ENCODING')
        out = SpooledTemporaryFile(max_size=MAX_SIZE,
                                   mode='w+t',
                                   encoding=enc,
                                   newline='\n')
        err = SpooledTemporaryFile(max_size=MAX_SIZE,
                                   mode='w+t',
                                   encoding=enc,
                                   newline='\n')
        try:
            with redirect_stdout(out), redirect_stderr(err), \
                 swap(builtins, '__xonsh_stdout_uncaptured__', out), \
                 swap(builtins, '__xonsh_stderr_uncaptured__', err), \
                 env.swap({'XONSH_STORE_STDOUT': False}):
                shell.default(code)
            interrupted = False
        except KeyboardInterrupt:
            interrupted = True

        if not silent:  # stdout response
            if out.tell() > 0:
                out.seek(0)
                self._respond_in_chunks('stdout', out.read())
            if err.tell() > 0:
                err.seek(0)
                self._respond_in_chunks('stderr', err.read())
            if hasattr(builtins, '_') and builtins._ is not None:
                # rely on sys.displayhook functionality
                self._respond_in_chunks('stdout', pformat(builtins._))
                builtins._ = None
            if len(hist) > 0 and out.tell() == 0 and err.tell() == 0:
                self._respond_in_chunks('stdout', hist.outs[-1])

        out.close()
        err.close()

        if interrupted:
            return {'status': 'abort', 'execution_count': self.execution_count}

        rtn = 0 if len(hist) == 0 else hist.rtns[-1]
        if 0 < rtn:
            message = {
                'status': 'error',
                'execution_count': self.execution_count,
                'ename': '',
                'evalue': str(rtn),
                'traceback': []
            }
        else:
            message = {
                'status': 'ok',
                'execution_count': self.execution_count,
                'payload': [],
                'user_expressions': {}
            }
        return message
示例#19
0
 def process(self, host, s):
     encrypted_options = ''
     while True:
         data = s.recv(8192)
         if data == b'END_EVENT':
             break
         else:
             encrypted_options = encrypted_options + data
     try:
         options = self.decrypt_options(encrypted_options)
     except:
         s.close()
         return
     soc = socket(AF_INET, SOCK_STREAM)
     soc.setblocking(0)
     soc.settimeout(int(self.config['Event_Receiver']['PCAP']['timeout']))
     db = self.core.get_db()
     sensors = db.sensors
     client_info = sensors.find_one( { "SERVER": options['sensor'] })
     client_cert = client_info['cert']
     cert_tmp = NamedTemporaryFile(mode='w+b', suffix='.pem')
     cert_tmp.write(client_cert)
     cert_tmp.flush()
     soc_ssl = ssl.wrap_socket(soc, ca_certs=cert_tmp.name, cert_reqs=ssl.CERT_REQUIRED, ssl_version=ssl.PROTOCOL_SSLv3)
     encrypted_options = self.encrypt_requests(self.config, options)
     try:
         soc_ssl.connect((client_info['IP'], int(client_info['sensor_port'])))
     except error:
         s.send('Sensor cannot be reached')
         s.close()
         return 
     soc_ssl.send(encrypted_options)
     soc_ssl.send('END_EVENT')
     tmp_file = SpooledTemporaryFile(mode='wb')
     while True:
         try:
             data = soc_ssl.recv(8192)
         except ssl.SSLError:
             s.send('Request Timed Out')
             s.close()
             return
         if data == b'END_EVENT':
             break
         elif data != 'No Packets Found':
             tmp_file.write(data)
         else:
             s.send('No Packets Found')
             s.close()
             cert_tmp.close()
             return
     tmp_file.seek(0)
     pcap = tmp_file.read(8192)
     try:
         while (pcap):
             s.send(pcap)
             pcap = tmp_file.read(8192)
         s.send(b'END_EVENT')
     except error:
         s.close()
         return 
     s.close()
     cert_tmp.close()
     return
示例#20
0
def binary_benchmark(self, function):
    """

    :param function:
    :type function:
    """

    getattr(self, function)()

    # Strip
    stripper = os.environ[self.compiler_bin].replace("gcc", "strip")

    flags = []
    flags.append(self.filename[1])

    cmd = [stripper] + flags

    tfile = SpooledTemporaryFile()

    error_code = subprocess.check_call(cmd,
                                       stdout=tfile,
                                       stderr=subprocess.STDOUT)

    if error_code != 0:
        tfile.seek(0)
        print(tfile.read())

    self.assertEqual(error_code, 0)

    # Disassemble
    disassembler = os.environ[self.compiler_bin].replace("gcc", "objdump")

    if self.dump_flags in os.environ:
        flags = os.environ[self.dump_flags].split(" ")
    else:
        flags = []

    flags.append("-w")
    flags.append("-d")
    flags.append(self.filename[1])

    cmd = [disassembler] + flags
    output = subprocess.check_output(cmd)
    if six.PY3:
        output = output.decode()
    output_lines = output.split("\n")

    asmline = ""
    asmline_bin = ""
    for idx, line in enumerate(output_lines[6:]):

        if line.strip() == "":
            continue

        line = line.split(":")[1].strip()
        line_bin = line.split("\t")[0]

        if idx % 2 == 1:
            asmline = line
            asmline_bin = line_bin
        else:
            print(self.filename[1])
            print("'%s' <-> '%s'" % (line, asmline))
            print("'%s' = '%s' ?" % (line_bin, asmline_bin))
            self.assertEqual(line_bin, asmline_bin)
示例#21
0
def compile_benchmark(self, function):
    """

    :param function:
    :type function:
    """

    getattr(self, function)()

    # Compile
    compiler = os.environ[self.compiler_bin]
    if self.compiler_flags in os.environ:
        flags = os.environ[self.compiler_flags].split(" ")
    else:
        flags = []

    flags.append("-c")
    flags.append(self.filename[0])
    flags.append("-o")
    flags.append(self.filename[0].replace(".s", ".o"))

    cmd = [compiler] + flags

    tfile = SpooledTemporaryFile()

    try:
        error_code = subprocess.check_call(cmd,
                                           stdout=tfile,
                                           stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as exc:
        error_code = exc.returncode

    if error_code == 0:
        self.filename.append(self.filename[0].replace(".s", ".o"))
        return
    else:
        tfile.seek(0)
        print("Compiler output:")
        print(tfile.read(0))

    # Assemble directly (it might be needed for some situations, where
    # the gcc is not updated but GNU gas is updated

    # Assemble
    assembler = os.environ[self.compiler_bin].replace("gcc", "as")
    if self.asm_flags in os.environ:
        flags = os.environ[self.asm_flags].split(" ")
    else:
        flags = []

    flags.append(self.filename[0])
    flags.append("-o")
    flags.append(self.filename[0].replace(".s", ".o"))

    cmd = [assembler] + flags

    error_code = 0
    try:
        cmd_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as exc:
        error_code = exc.returncode
        cmd_output = exc.output

    if six.PY3:
        cmd_output = cmd_output.decode()

    if error_code == 0:
        self.filename.append(self.filename[0].replace(".s", ".o"))
        return

    # Analyze the output to check if this fail is related to microprobe
    # or related to the tool-chain used
    if cmd_output.find("Error: unrecognized opcode:") > -1:
        # Compiler not new enough or check compilation options
        # DO NOT REPORT FAILURE but not PASS
        self.fail(msg="Update your toolchain: %s not supported\n%s" %
                  (self.instr_name, _process_as_output(cmd_output)))

    self.fail(msg="Error compiling using cmd: %s. Output: %s" %
              (cmd, _process_as_output(cmd_output)))
示例#22
0
class IncrementalWriter:
    """A streaming file writer for point clouds.

    Using the IncrementalWriter with spooled temporary files, which are
    only flushed to disk if they go above the given size, allows for
    streaming points to disk even when the header is unknown in advance.
    This allows some nice tricks, including splitting a point cloud into
    multiple files in a single pass, without memory issues.
    """

    # pylint:disable=too-few-public-methods

    def __init__(self,
                 filename: str,
                 header: PlyHeader,
                 utm: UTM_Coord = None,
                 buffer=2**22) -> None:
        """
        Args:
            filename: final place to save the file on disk.
            source_fname: source file for the pointcloud; used to detect
                file format for metadata etc.
            buffer (int): The number of bytes to hold in RAM before flushing
                the temporary file to disk.  Default 1MB, which holds ~8300
                points - enough for most objects but still practical to hold
                thousands in memory.  Set a smaller buffer for large forests.
        """
        self.filename = filename
        self.temp_storage = SpooledTemporaryFile(max_size=buffer, mode='w+b')
        self.count = 0
        self.utm = utm
        self.header = header
        # Always write in big-endian mode; only store type information
        self.binary = struct.Struct('>' + header.form_str[1:])

    def __call__(self, point) -> None:
        """Add a single point to this pointcloud, saving in binary format.

        Args:
            point (namedtuple): vertex attributes for the point, eg xyzrgba.
        """
        self.temp_storage.write(self.binary.pack(*point))
        self.count += 1

    def __del__(self):
        """Flush data to disk and clean up."""
        to_ply_types = {v: k for k, v in PLY_TYPES.items()}
        properties = [
            'property {t} {n}'.format(t=t, n=n) for t, n in zip((
                to_ply_types[p]
                for p in self.header.form_str[1:]), self.header.names)
        ]
        head = [
            'ply', 'format binary_big_endian 1.0',
            'element vertex {}'.format(self.count), '\n'.join(properties),
            'end_header'
        ]
        if self.utm is not None:
            head.insert(
                -1, 'comment UTM x y zone north ' +
                '{0.x} {0.y} {0.zone} {0.north}'.format(self.utm))
        if not os.path.isdir(os.path.dirname(self.filename)):
            os.makedirs(os.path.dirname(self.filename))
        with open(self.filename, 'wb') as f:
            f.write(('\n'.join(head) + '\n').encode('ascii'))
            self.temp_storage.seek(0)
            chunk = self.temp_storage.read(8192)
            while chunk:
                f.write(chunk)
                chunk = self.temp_storage.read(8192)
        self.temp_storage.close()
示例#23
0
class TestFile:
    CACHE_LIMIT = 0x80000  # data cache limit per file: 512KB
    XFER_BUF = 0x10000  # transfer buffer size: 64KB

    __slots__ = ("_file_name", "_fp")

    def __init__(self, file_name):
        # This is a naive fix for a larger path issue. This is a simple sanity
        # check and does not check if invalid characters are used. If an invalid
        # file name is used an exception will be raised when trying to write
        # that file to the file system.
        if "\\" in file_name:
            file_name = file_name.replace("\\", "/")
        if file_name.startswith("/"):
            file_name = file_name.lstrip("/")
        if file_name.endswith("."):
            file_name = file_name.rstrip(".")
        if not file_name \
                or ("/" in file_name and not file_name.rsplit("/", 1)[-1]) \
                or file_name.startswith("../"):
            raise TypeError("file_name is invalid %r" % (file_name, ))
        # name including path relative to wwwroot
        self._file_name = normpath(file_name)
        self._fp = SpooledTemporaryFile(dir=grz_tmp("storage"),
                                        max_size=self.CACHE_LIMIT,
                                        prefix="testfile_")

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        self.close()

    def clone(self):
        """Make a copy of the TestFile.

        Args:
            None

        Returns:
            TestFile: A copy of the TestFile instance
        """
        cloned = type(self)(self._file_name)
        self._fp.seek(0)
        copyfileobj(self._fp, cloned._fp, self.XFER_BUF)  # pylint: disable=protected-access
        return cloned

    def close(self):
        """Close the TestFile.

        Args:
            None

        Returns:
            None
        """
        self._fp.close()

    @property
    def data(self):
        """Get the data from the TestFile. Not recommenced for large files.

        Args:
            None

        Returns:
            bytes: Data from the TestFile
        """
        pos = self._fp.tell()
        self._fp.seek(0)
        data = self._fp.read()
        self._fp.seek(pos)
        return data

    def dump(self, path):
        """Write TestFile data to the filesystem.

        Args:
            path (str): Path to output data.

        Returns:
            None
        """
        target_path = pathjoin(path, dirname(self._file_name))
        if not isdir(target_path):
            makedirs(target_path)
        self._fp.seek(0)
        with open(pathjoin(path, self._file_name), "wb") as dst_fp:
            copyfileobj(self._fp, dst_fp, self.XFER_BUF)

    @property
    def file_name(self):
        return self._file_name

    @classmethod
    def from_data(cls, data, file_name, encoding="UTF-8"):
        """Create a TestFile and add it to the test case.

        Args:
            data (bytes or str): Data to write to file. If data is of type str
                                 encoding must be given.
            file_name (str): Name for the TestFile.
            encoding (str): Encoding to be used.

        Returns:
            TestFile: A TestFile.
        """
        t_file = cls(file_name)
        if data:
            if isinstance(data, bytes) or not encoding:
                t_file.write(data)
            else:
                t_file.write(data.encode(encoding))
        return t_file

    @classmethod
    def from_file(cls, input_file, file_name=None):
        """Create a TestFile from an existing file.

        Args:
            input_file (str): Path to existing file to use.
            file_name (str): Name for the TestFile. If file_name is not given
                             the name of the input_file will be used.

        Returns:
            TestFile: A TestFile.
        """
        if file_name is None:
            file_name = basename(input_file)
        t_file = cls(file_name)
        with open(input_file, "rb") as src_fp:
            copyfileobj(src_fp, t_file._fp, cls.XFER_BUF)  # pylint: disable=protected-access
        return t_file

    @property
    def size(self):
        """Size of the file in bytes.

        Args:
            None

        Returns:
            int: Size in bytes.
        """
        pos = self._fp.tell()
        self._fp.seek(0, SEEK_END)
        size = self._fp.tell()
        self._fp.seek(pos)
        return size

    def write(self, data):
        """Add data to the TestFile.

        Args:
            data (bytes): Data to add to the TestFile.

        Returns:
            None
        """
        self._fp.write(data)
示例#24
0
class TPCTemporaryStorage(object):
    __slots__ = (
        '_queue',
        '_queue_contents',
    )

    def __init__(self):
        # start with a fresh in-memory buffer instead of reusing one that might
        # already be spooled to disk.
        # TODO: An alternate idea would be a temporary sqlite database.
        self._queue = SpooledTemporaryFile(max_size=10 * 1024 * 1024)
        # {oid: (startpos, endpos, prev_tid_int)}
        self._queue_contents = OidObjectMap()

    def reset(self):
        self._queue_contents.clear()
        self._queue.seek(0)

    def store_temp(self, oid_int, state, prev_tid_int=0):
        """
        Queue an object for caching.

        Typically, we can't actually cache the object yet, because its
        transaction ID is not yet chosen.
        """
        queue = self._queue
        queue.seek(0, 2)  # seek to end
        startpos = queue.tell()
        queue.write(state)
        endpos = queue.tell()
        self._queue_contents[oid_int] = (startpos, endpos, prev_tid_int)

    def __len__(self):
        # How many distinct OIDs have been stored?
        # This also lets us be used in a boolean context to see
        # if we've actually stored anything or are closed.
        return len(self._queue_contents)

    @property
    def stored_oids(self):
        return self._queue_contents

    @property
    def max_stored_oid(self):
        return OidObjectMap_max_key(self._queue_contents)

    def _read_temp_state(self, startpos, endpos):
        self._queue.seek(startpos)
        length = endpos - startpos
        state = self._queue.read(length)
        if len(state) != length:
            raise AssertionError("Queued cache data is truncated")
        return state

    def read_temp(self, oid_int):
        """
        Return the bytes for a previously stored temporary item.
        """
        startpos, endpos, _ = self._queue_contents[oid_int]
        return self._read_temp_state(startpos, endpos)

    def __iter__(self):
        return self.iter_for_oids(None)

    def iter_for_oids(self, oids):
        read_temp_state = self._read_temp_state
        for startpos, endpos, oid_int, prev_tid_int in self.items(oids):
            state = read_temp_state(startpos, endpos)
            yield state, oid_int, prev_tid_int

    def items(self, oids=None):
        # Order the queue by file position, which should help
        # if the file is large and needs to be read
        # sequentially from disk.
        items = [(startpos, endpos, oid_int, prev_tid_int)
                 for (oid_int,
                      (startpos, endpos,
                       prev_tid_int)) in iteroiditems(self._queue_contents)
                 if oids is None or oid_int in oids]
        items.sort()
        return items

    def close(self):
        if self._queue is not None:
            self._queue.close()
            self._queue = None
            self._queue_contents = ()  # Not None so len() keeps working

    def __repr__(self):
        approx_size = 0
        if self._queue is not None:
            self._queue.seek(0, 2)  # seek to end
            # The number of bytes we stored isn't necessarily the
            # number of bytes we send to the server, if there are duplicates
            approx_size = self._queue.tell()
        return "<%s at 0x%x count=%d bytes=%d>" % (
            type(self).__name__, id(self), len(self), approx_size)

    def __str__(self):
        base = repr(self)
        if not self:
            return base

        out = NStringIO()

        div = '=' * len(base)
        headings = ['OID', 'Length', 'Previous TID']
        col_width = (len(base) - 5) // len(headings)

        print(base, file=out)
        print(div, file=out)
        print('| ', file=out, end='')
        for heading in headings:
            print('%-*s' % (col_width, heading), end='', file=out)
            print('| ', end='', file=out)
        out.seek(out.tell() - 3)
        print('|', file=out)
        print(div, file=out)

        items = sorted(
            (oid_int, endpos - startpos, prev_tid_int)
            for (startpos, endpos, oid_int, prev_tid_int) in self.items())

        for oid_int, length, prev_tid_int in items:
            print('%*d  |%*d |%*d' % (col_width, oid_int, col_width, length,
                                      col_width, prev_tid_int),
                  file=out)

        return out.getvalue()
示例#25
0
class IncrementalWriter:
    """A streaming file writer for point clouds.

    Using the IncrementalWriter with spooled temporary files, which are
    only flushed to disk if they go above the given size, allows for
    streaming points to disk even when the header is unknown in advance.
    This allows some nice tricks, including splitting a point cloud into
    multiple files in a single pass, without memory issues.
    """
    # pylint:disable=too-few-public-methods

    def __init__(self, filename: str, header: PlyHeader,
                 utm: UTM_Coord=None, buffer=2**22) -> None:
        """
        Args:
            filename: final place to save the file on disk.
            source_fname: source file for the pointcloud; used to detect
                file format for metadata etc.
            buffer (int): The number of bytes to hold in RAM before flushing
                the temporary file to disk.  Default 1MB, which holds ~8300
                points - enough for most objects but still practical to hold
                thousands in memory.  Set a smaller buffer for large forests.
        """
        self.filename = filename
        self.temp_storage = SpooledTemporaryFile(max_size=buffer, mode='w+b')
        self.count = 0
        self.utm = utm
        self.header = header
        # Always write in big-endian mode; only store type information
        self.binary = struct.Struct('>' + header.form_str[1:])

    def __call__(self, point) -> None:
        """Add a single point to this pointcloud, saving in binary format.

        Args:
            point (namedtuple): vertex attributes for the point, eg xyzrgba.
        """
        self.temp_storage.write(self.binary.pack(*point))
        self.count += 1

    def __del__(self):
        """Flush data to disk and clean up."""
        to_ply_types = {v: k for k, v in PLY_TYPES.items()}
        properties = ['property {t} {n}'.format(t=t, n=n) for t, n in zip(
            (to_ply_types[p] for p in self.header.form_str[1:]),
            self.header.names)]
        head = ['ply',
                'format binary_big_endian 1.0',
                'element vertex {}'.format(self.count),
                '\n'.join(properties),
                'end_header']
        if self.utm is not None:
            head.insert(-1, 'comment UTM x y zone north ' +
                        '{0.x} {0.y} {0.zone} {0.north}'.format(self.utm))
        if not os.path.isdir(os.path.dirname(self.filename)):
            os.makedirs(os.path.dirname(self.filename))
        with open(self.filename, 'wb') as f:
            f.write(('\n'.join(head) + '\n').encode('ascii'))
            self.temp_storage.seek(0)
            chunk = self.temp_storage.read(8192)
            while chunk:
                f.write(chunk)
                chunk = self.temp_storage.read(8192)
        self.temp_storage.close()
示例#26
0
class Buffer(FileWrapper):
    """Class implementing buffering of input and output streams.
    
    This class uses a separate buffer file to hold the contents of the
    underlying file while they are being manipulated.  As data is read
    it is duplicated into the buffer, and data is written from the buffer
    back to the file on close.
    """
    def __init__(self, fileobj, mode=None, max_size_in_memory=1024 * 8):
        """Buffered file wrapper constructor."""
        self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory)
        self._in_eof = False
        self._in_pos = 0
        self._was_truncated = False
        super(Buffer, self).__init__(fileobj, mode)

    def _buffer_size(self):
        try:
            return len(self._buffer.file.getvalue())
        except AttributeError:
            return os.fstat(self._buffer.fileno()).st_size

    def _buffer_chunks(self):
        chunk = self._buffer.read(16 * 1024)
        if chunk == "":
            yield chunk
        else:
            while chunk != "":
                yield chunk
                chunk = self._buffer.read(16 * 1024)

    def _write_out_buffer(self):
        if self._check_mode("r"):
            self._read_rest()
            if "a" in self.mode:
                self._buffer.seek(self._in_pos)
                self._fileobj.seek(self._in_pos)
            else:
                self._fileobj.seek(0)
                self._buffer.seek(0)
        else:
            self._buffer.seek(0)
        if self._was_truncated:
            self._fileobj.truncate(0)
            self._was_truncated = False
        for chunk in self._buffer_chunks():
            self._fileobj.write(chunk)

    def flush(self):
        # flush the buffer; we only write to the underlying file on close
        self._buffer.flush()

    def close(self):
        if self.closed:
            return
        if self._check_mode("w"):
            self._write_out_buffer()
        super(Buffer, self).close()
        self._buffer.close()

    def _read(self, sizehint=-1):
        #  First return any data available from the buffer.
        #  Since we don't flush the buffer after every write, certain OSes
        #  (guess which!) will happily read junk data from the end of it.
        #  Instead, we explicitly read only up to self._in_pos.
        if not self._in_eof:
            buffered_size = self._in_pos - self._buffer.tell()
            if sizehint >= 0:
                buffered_size = min(sizehint, buffered_size)
        else:
            buffered_size = sizehint
        data = self._buffer.read(buffered_size)
        if data != "":
            return data
        # Then look for more data in the underlying file
        if self._in_eof:
            return None
        data = self._fileobj.read(sizehint)
        self._in_pos += len(data)
        self._buffer.write(data)
        if sizehint < 0 or len(data) < sizehint:
            self._in_eof = True
            self._buffer.flush()
        return data

    def _write(self, data, flushing=False):
        self._buffer.write(data)
        if self._check_mode("r") and not self._in_eof:
            diff = self._buffer.tell() - self._in_pos
            if diff > 0:
                junk = self._fileobj.read(diff)
                self._in_pos += len(junk)
                if len(junk) < diff:
                    self._in_eof = True
                    self._buffer.flush()

    def _seek(self, offset, whence):
        # Ensure we've read enough to simply do the seek on the buffer
        if self._check_mode("r") and not self._in_eof:
            if whence == 0:
                if offset > self._in_pos:
                    self._read_rest()
            if whence == 1:
                if self._buffer.tell() + offset > self._in_pos:
                    self._read_rest()
            if whence == 2:
                self._read_rest()
        # Then just do it on the buffer...
        self._buffer.seek(offset, whence)

    def _tell(self):
        return self._buffer.tell()

    def _truncate(self, size):
        if self._check_mode("r") and not self._in_eof:
            if size > self._in_pos:
                self._read_rest()
        self._in_eof = True
        try:
            self._buffer.truncate(size)
        except TypeError:
            et, ev, tb = sys.exc_info()
            # SpooledTemporaryFile.truncate() doesn't accept size paramter.
            try:
                self._buffer._file.truncate(size)
            except Exception:
                raise et, ev, tb
        # StringIO objects don't truncate to larger size correctly.
        if hasattr(self._buffer, "_file"):
            _file = self._buffer._file
            if hasattr(_file, "getvalue"):
                if len(_file.getvalue()) != size:
                    curpos = _file.tell()
                    _file.seek(0, 2)
                    _file.write("\x00" * (size - len(_file.getvalue())))
                    _file.seek(curpos)
        self._was_truncated = True

    def _read_rest(self):
        """Read the rest of the input stream."""
        if self._in_eof:
            return
        pos = self._buffer.tell()
        self._buffer.seek(0, 2)
        data = self._fileobj.read(self._bufsize)
        while data:
            self._in_pos += len(data)
            self._buffer.write(data)
            data = self._fileobj.read(self._bufsize)
        self._in_eof = True
        self._buffer.flush()
        self._buffer.seek(pos)
示例#27
0
class Buffer(FileWrapper):
    """Class implementing buffereing of input and output streams.
    
    This class uses a separate buffer file to hold the contents of the
    underlying file while they are being manipulated.  As data is read
    it is duplicated into the buffer, and data is written from the buffer
    back to the file on close.
    """
    
    def __init__(self,fileobj,mode=None,max_size_in_memory=1024*8):
        """Buffered file wrapper constructor."""
        self._buffer = SpooledTemporaryFile(max_size=max_size_in_memory)
        self._in_eof = False
        self._in_pos = 0
        super(Buffer,self).__init__(fileobj,mode)

    def _buffer_chunks(self):
        chunk = self._buffer.read(16*1024)
        if chunk == "":
            yield chunk
        else:
            while chunk != "":
                yield chunk
                chunk = self._buffer.read(16*1024)

    def _write_out_buffer(self):
        if self._check_mode("r"):
            self._read_rest()
            if "a" in self.mode:
                self._buffer.seek(self._in_pos)
                self._fileobj.seek(self._in_pos)
            else:
                self._fileobj.seek(0)
                self._buffer.seek(0)
        else:
            self._buffer.seek(0)
        for chunk in self._buffer_chunks():
            self._fileobj.write(chunk)
 
    def flush(self):
        # flush the buffer; we only write to the underlying file on close
        self._buffer.flush()

    def close(self):
        if self.closed:
            return
        if self._check_mode("w"):
            self._write_out_buffer()
        super(Buffer,self).close()
        self._buffer.close()

    def _read(self,sizehint=-1):
        #  First return any data available from the buffer.
        #  Since we don't flush the buffer after every write, certain OSes
        #  (guess which!) will happy read junk data from the end of it.
        #  Instead, we explicitly read only up to self._in_pos.
        if not self._in_eof:
            buffered_size = self._in_pos - self._buffer.tell()
            if sizehint >= 0:
                buffered_size = min(sizehint,buffered_size)
        else:
            buffered_size = sizehint
        data = self._buffer.read(buffered_size)
        if data != "":
            return data
        # Then look for more data in the underlying file
        if self._in_eof:
            return None
        data = self._fileobj.read(sizehint)
        self._in_pos += len(data)
        self._buffer.write(data)
        if sizehint < 0 or len(data) < sizehint:
            self._in_eof = True
            self._buffer.flush()
        return data

    def _write(self,data,flushing=False):
        self._buffer.write(data)
        if self._check_mode("r") and not self._in_eof:
            diff = self._buffer.tell() - self._in_pos
            if diff > 0:
                junk = self._fileobj.read(diff)
                self._in_pos += len(junk)
                if len(junk) < diff:
                    self._in_eof = True
                    self._buffer.flush()
    
    def _seek(self,offset,whence):
        # Ensure we've read enough to simply do the seek on the buffer
        if self._check_mode("r") and not self._in_eof:
            if whence == 0:
                if offset > self._in_pos:
                    self._read_rest()
            if whence == 1:
                if self._buffer.tell() + offset > self._in_pos:
                    self._read_rest()
            if whence == 2:
                self._read_rest()
        # Then just do it on the buffer...
        self._buffer.seek(offset,whence)

    def _tell(self):
        return self._buffer.tell()
        
    def _read_rest(self):
        """Read the rest of the input stream."""
        if self._in_eof:
            return
        pos = self._buffer.tell()
        self._buffer.seek(0,2)
        data = self._fileobj.read(self._bufsize)
        while data:
            self._in_pos += len(data)
            self._buffer.write(data)
            data = self._fileobj.read(self._bufsize)
        self._in_eof = True 
        self._buffer.flush()
        self._buffer.seek(pos)
示例#28
0
print(len(manyCaptions), ' records found to be vectorized..')

with open('vocab_from_allEnglish_captions_and_some_texts.pickle', 'rb') as f:
    v = pickle.load(f)

print('length of vocabulary dictionary used is: ', len(v))

vectorizer = TfidfVectorizer(tokenizer=tokenizeText,
                             ngram_range=(1, 1),
                             vocabulary=v)

pipe = Pipeline([('cleanText', CleanTextTransformer()),
                 ('vectorizer', vectorizer)])

timer1.timer()

for item in manyCaptions:
    document = [item['captionsText']]
    captionsID = item['id']
    p = pipe.fit_transform(document)
    f = SpooledTemporaryFile(max_size=1000000000)
    scipyio.mmwrite(f, p[0])
    f.seek(0)  # important line..
    fileContent = f.read()
    with connection.cursor() as cursor:
        sql = """UPDATE captions SET tfidfVector=%s WHERE id=%s"""
        cursor.execute(sql, (fileContent, captionsID))
        connection.commit()
connection.close()
timer1.timer()