Exemplo n.º 1
0
    def dataentry(self):
        self.toaster.msgblockbegin("writing to temporary file")
        f_tmp = TemporaryFile()
        try:
            total_padding = self.data.write(f_tmp)
            # comparing the files will usually be different because blocks may
            # have been written back in a different order, so cheaply just compare
            # file sizes
            self.toaster.msg("comparing file sizes")
            self.stream.seek(0, 2)
            f_tmp.seek(0, 2)
            if self.stream.tell() != f_tmp.tell():
                self.toaster.msg("original size: %i" % self.stream.tell())
                self.toaster.msg("written size:  %i" % f_tmp.tell())
                self.toaster.msg("padding:       %i" % total_padding)
                if self.stream.tell() > f_tmp.tell() or self.stream.tell() + total_padding < f_tmp.tell():
                    f_tmp.seek(0)
                    f_debug = open("debug.cgf", "wb")
                    f_debug.write(f_tmp.read(-1))
                    f_debug.close()
                    raise Exception('write check failed: file sizes differ by more than padding')
        finally:
            f_tmp.close()
        self.toaster.msgblockend()

        # spell is finished: prevent recursing into the tree
        return False
Exemplo n.º 2
0
    def dataentry(self):
        self.toaster.msgblockbegin("writing to temporary file")
        f_tmp = TemporaryFile()
        try:
            total_padding = self.data.write(f_tmp)
            # comparing the files will usually be different because blocks may
            # have been written back in a different order, so cheaply just compare
            # file sizes
            self.toaster.msg("comparing file sizes")
            self.stream.seek(0, 2)
            f_tmp.seek(0, 2)
            if self.stream.tell() != f_tmp.tell():
                self.toaster.msg("original size: %i" % self.stream.tell())
                self.toaster.msg("written size:  %i" % f_tmp.tell())
                self.toaster.msg("padding:       %i" % total_padding)
                if self.stream.tell() > f_tmp.tell(
                ) or self.stream.tell() + total_padding < f_tmp.tell():
                    f_tmp.seek(0)
                    f_debug = open("debug.cgf", "wb")
                    f_debug.write(f_tmp.read(-1))
                    f_debug.close()
                    raise Exception(
                        'write check failed: file sizes differ by more than padding'
                    )
        finally:
            f_tmp.close()
        self.toaster.msgblockend()

        # spell is finished: prevent recursing into the tree
        return False
Exemplo n.º 3
0
 def test_execute_non_blocking(self):
     self.sandbox = Sandbox()
     output = TemporaryFile(mode="w+b")
     start_time = perf_counter()
     self.sandbox.execute(command="sleep 0.2 ; echo foo",
                          stdin_fd=None,
                          stdout_fd=output,
                          stderr_fd=None,
                          blocking=False)
     self.assertLess(perf_counter() - start_time, 0.1)
     self.assertEqual(output.tell(), 0)  # Haven't yet printed anything
     sleep(0.3)
     self.assertEqual(output.tell(), 4)  # But printing it eventually
class T(threading.Thread):
    _shutdown_msg = "shutdown"

    def __init__(self):
        threading.Thread.__init__(self)
        self._fd = TemporaryFile()
        self._comm_fd = TemporaryFile()
        self._run = False

    def get_file_handle(self):
        return self._fd

    def run(self):
        self._run = True
        while self._run:
            t1 = time.time()
            r, _, _ = select.select([self._fd.fileno(), self._comm_fd.fileno()], [], [])
            print "select time:", time.time()-t1
            for elem in r:
                if elem == self._fd.fileno():
                    s = self._fd.tell()
                    self._fd.seek(0, os.SEEK_END)  # to the end
                    e = self._fd.tell()
                    if s == e:  # nothing new
                        continue
                    self._fd.seek(-(e-s), os.SEEK_END)
                    diff = self._fd.read(e-s)
                    if True:
                        sys.stdout.write(diff)
                        sys.stdout.flush()

                # exit
                elif elem == self._comm_fd.fileno():
                    self._comm_fd.seek(0, os.SEEK_END)
                    if self._comm_fd.tell() == len(T._shutdown_msg):
                        self._run = False
        self._comm_fd.write(T._shutdown_msg)
        self._comm_fd.flush()

    def stop(self):
        self._comm_fd.seek(0, os.SEEK_END)
        if self._comm_fd.tell() != 0:
            return
        self._comm_fd.write(T._shutdown_msg)
        self._comm_fd.flush()
        while self._comm_fd.tell() != 2*len(T._shutdown_msg):
            self._comm_fd.seek(0, os.SEEK_END)

    def __del__(self, ):
        self._fd.close()
Exemplo n.º 5
0
    def books(self, oncard=False, end_session=True):
        """
        Return a list of ebooks on the device.
        @param oncard: If True return a list of ebooks on the storage card,
                            otherwise return list of ebooks in main memory of device

        @return: L{BookList}
        """
        root = "/Data/media/"
        tfile = TemporaryFile()
        if oncard:
            try:
                self.get_file("a:" + self.CACHE_XML, tfile, end_session=False)
                root = "a:/"
            except PathError:
                try:
                    self.get_file("b:" + self.CACHE_XML,
                                  tfile,
                                  end_session=False)
                    root = "b:/"
                except PathError:
                    pass
            if tfile.tell() == 0:
                tfile = None
        else:
            self.get_file(self.MEDIA_XML, tfile, end_session=False)
        bl = BookList(root=root, sfile=tfile)
        paths = bl.purge_corrupted_files()
        for path in paths:
            try:
                self.del_file(path, end_session=False)
            except PathError:  # Incase this is a refetch without a sync in between
                continue
        return bl
Exemplo n.º 6
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1
        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, debug=self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception, e:
                Log.warning("could not push data to s3", cause=e)
                retry -= 1
Exemplo n.º 7
0
class CandidateUploadFile(BaseHandler):
    def initialize(self):
        self.tempfile = TemporaryFile()

    @tornado.web.authenticated
    @granted()
    @tornado.web.asynchronous
    def post(self):
        fp_url = self.get_argument("url")
        mime_type = self.get_argument("data[type]")
        size = int(self.get_argument("data[size]"))
        candidate_id = self.get_argument("id")
        self.candidate = self.db.query(Candidate).get(int(candidate_id))
        logging.info("type: %s, size: %r", mime_type, size)
        if mime_type == "image/jpeg" and size < MAX_UPLOAD_SIZE:
            http_client = tornado.httpclient.AsyncHTTPClient()
            request = tornado.httpclient.HTTPRequest(url=fp_url, streaming_callback=self.streaming_callback)
            http_client.fetch(request, self.on_download)
        else:
            self.finish(dict(status=0))

    def streaming_callback(self, data):
        self.tempfile.write(data)
        logging.info("This is the streaming_callback file tell function: %r", self.tempfile.tell())

    def on_download(self, response):
        img_path = os.path.join(os.path.dirname(__file__), "static/profiles/img/" + str(self.candidate.backup_id) + '.jpg')
        self.tempfile.seek(0)
        ptr = open(img_path, 'wb')
        ptr.write(self.tempfile.read())
        ptr.close()
        self.tempfile.close()
        self.finish(dict(src="/static/profiles/img/" + str(self.candidate.backup_id) + '.jpg', status=1))
Exemplo n.º 8
0
def output(x, y, value):
    global img, index

    pixel = index // 3

    x = x  #pixel % W
    y = y  #pixel // W

    rgbindex = index % 3

    pixelvalue = list(img.getpixel((x, y)))
    pixelvalue[rgbindex] = value
    pixelvalue = tuple(pixelvalue)

    colorcounter[str((x, y))] += 1
    #if rgbindex == 2:
    #    colorcounter[pixelvalue] += 1

    img.putpixel((x, y), pixelvalue)

    index += 1

    if index // 3 >= W * H:

        tmpfile = TemporaryFile()
        img.save(tmpfile, "png")
        compressed_imgbytes = tmpfile.tell()
        imgbytes = W * H * 3
        ratio = compressed_imgbytes / imgbytes
        print("Result .png compression ratio:", ratio)

        img = img.resize((W * SCALE, H * SCALE))
        img.save(IMAGEDIR + "/" + str(int(time() * 1000)) + ".png")
        exit(0)
Exemplo n.º 9
0
    def retr(self, path, retry=3):
        """
        Извлечение файла во временный
        :param path:
        :param retry:
        :return:
        """
        tmp = TemporaryFile()
        try:
            size = self.ftp.size(path)
            self.ftp.retrbinary('RETR ' + path, tmp.write)
        except Exception:
            if retry > 0:
                tmp.close()
                # Рекурсивно вызываем retry попыток
                return self.retr(path, retry - 1)
            else:
                tmp.close()
                return None

        # Сравниваем размер созданного временного файла с размером оригинального файла
        if size != tmp.tell():
            if retry > 0:
                tmp.close()
                return self.retr(path, retry - 1)
            else:
                tmp.close()
                return None
        return tmp
Exemplo n.º 10
0
    def books(self, oncard=False, end_session=True):
        """
        Return a list of ebooks on the device.
        @param oncard: If True return a list of ebooks on the storage card,
                            otherwise return list of ebooks in main memory of device

        @return: L{BookList}
        """
        root = "/Data/media/"
        tfile = TemporaryFile()
        if oncard:
            try:
                self.get_file("a:" + self.CACHE_XML, tfile, end_session=False)
                root = "a:/"
            except PathError:
                try:
                    self.get_file("b:" + self.CACHE_XML, tfile, end_session=False)
                    root = "b:/"
                except PathError:
                    pass
            if tfile.tell() == 0:
                tfile = None
        else:
            self.get_file(self.MEDIA_XML, tfile, end_session=False)
        bl = BookList(root=root, sfile=tfile)
        paths = bl.purge_corrupted_files()
        for path in paths:
            try:
                self.del_file(path, end_session=False)
            except PathError:  # Incase this is a refetch without a sync in between
                continue
        return bl
Exemplo n.º 11
0
    def read_file(self, data):
        temp_file = TemporaryFile(mode="w+b")

        if "content-length" in self.current_headers:
            temp_file.write(data.read(self.current_headers["content-length"]))
        else:
            bytes = data.readline()

            while not bytes[-2:] == "\r\n":
                temp_file.write(bytes)
                bytes = data.readline()
            
            temp_file.write(bytes.rstrip())
        
        filesize     = temp_file.tell()

        if filesize == 0:
            self.read_boundry(data)
            return

        key          = self.current_headers["content-disposition"]["name"]
        filename     = self.current_headers["content-disposition"].get("filename", "")
        content_type = self.current_headers["content-type"]
        
        if key not in self.files:
            self.files[key] = []

        temp_file.seek(0)
        self.files[key].append({"filename":filename, "filesize":filesize, "content-type":content_type, "data":temp_file})
        
        self.read_boundry(data)
Exemplo n.º 12
0
    def index_html (self, icon=0, preview=0, width=None, height=None,
                    REQUEST=None):
        """ Return the file with it's corresponding MIME-type """

        if REQUEST is not None:
            if self._if_modified_since_request_handler(REQUEST):
                self.ZCacheable_set(None)
                return ''

            if self._redirect_default_view_request_handler(icon, preview, REQUEST):
                return ''

        filename, content_type, icon, preview = self._get_file_to_serve(icon, preview)
        filename = self._get_fsname(filename)

        if _debug > 1: logger.info('serving %s, %s, %s, %s' %(filename, content_type, icon, preview))

        if filename:
            size = os.stat(filename)[6]
        else:
            filename = self._get_icon_file(broken=True)
            size = os.stat(filename)[6]
            content_type = 'image/gif'
            icon = 1

        if icon==0 and width is not None and height is not None:
            data = TemporaryFile() # hold resized image
            try:
                from PIL import Image
                im = Image.open(filename)
                if im.mode!='RGB':
                    im = im.convert('RGB')
                filter = Image.BICUBIC
                if hasattr(Image, 'ANTIALIAS'): # PIL 1.1.3
                    filter = Image.ANTIALIAS
                im = im.resize((int(width),int(height)), filter)
                im.save(data, 'JPEG', quality=85)
            except:
                data = open(filename, 'rb')
            else:
                data.seek(0,2)
                size = data.tell()
                data.seek(0)
                content_type = 'image/jpeg'
        else:
            data = open(filename, 'rb')

        if REQUEST is not None:
            last_mod = rfc1123_date(self._p_mtime)
            REQUEST.RESPONSE.setHeader('Last-Modified', last_mod)
            REQUEST.RESPONSE.setHeader('Content-Type', content_type)
            REQUEST.RESPONSE.setHeader('Content-Length', size)
            self.ZCacheable_set(None)
            return stream_iterator(data)

        try:
            return data.read()
        finally:
            data.close()
Exemplo n.º 13
0
def savefile(fd,fname,bfirmid,bclientid):
    # Encrypt each chunk from fd as it is read into a 
    # tmpfile which will be uploaded to Dropbox using
    # the given filename. 
    r = requests.get("%s/keyserv/key/%s/%s" % (app.config['KEYSERVER_URI'],bfirmid,bclientid)) 
    print "%s/keyserv/key/%s/%s" % (app.config['KEYSERVER_URI'],bfirmid,bclientid) 
    keyobj = r.json()
    encrkey = keyobj['key']
    print "Got key %s" % encrkey
    # Carve out a 32byte/256 bit key from the keyserver
    # but convert base64 back to binary first
    bkey = binascii.a2b_base64(encrkey)
    key = bkey[0:32]

    try:
        print "Starting encryption"
        # Setup our AES cipher
        iv = Random.new().read(AES.block_size)
        cipher = AES.new(key,AES.MODE_CFB,iv)        
        #cipher = XORCipher.new(key)        
        print "Cipher created using iv %s" % binascii.hexlify(iv)
    except:
        raise

    try:
        f = TemporaryFile()
        f.write(iv)         
   
        for chunk in chunkfd(fd,blocksize=4194304):
            f.write(cipher.encrypt(chunk))

        f.flush()
        f.seek(0,os.SEEK_END)
        fsize = f.tell()
        f.seek(0)

    except Exception as e:
        print e

    print "Getting ready for Dropbox upload"
    # Get a Dropbox uploader
    try:
        access_token = config.get('Credentials','access_token')
        dclient = DropboxClient(access_token)
        uploader = dclient.get_chunked_uploader(f,fsize)

        while uploader.offset < fsize:
            try:
                upload = uploader.upload_chunked()
            except Exception as e:
                print e
    except Exception as e:
        print e
    
    f.close()
    
    return uploader.finish(secure_filename("/%s_encr" % fname))    
Exemplo n.º 14
0
def thumb_img(img, width=None, height=None, name='thumb.jpg'):
    io = TemporaryFile()
    thumb = img.copy()
    thumb.thumbnail(image_width_height(img, width, height), Image.ANTIALIAS)
    thumb.save(io, format='JPEG', quality=100)
    del thumb
    size = io.tell()
    io.seek(0)
    return InMemoryUploadedFile(io, None, name, 'image/jpeg', size, None)
Exemplo n.º 15
0
 def exportContentInTempFile(self, context, obj_paths=None, filename=None):
     """ Export content to a zip file.
     """
     objects_list = self._createObjectList(context, obj_paths)
     tfile = TemporaryFile()
     self._getAllObjectsData(context, objects_list, tfile)
     size = tfile.tell()
     tfile.seek(0)
     return tfile, size
Exemplo n.º 16
0
class GmlZBuffer(io.IOBase):
    '''
    
    '''
    def __init__(self):
        self.buffer = StringIO()
        self.compressor = zlib.compressobj(6, ZIP_DEFLATED, -9)
        self.overflow = False
        self.crc = 0
        self.uncompressed_size = 0
        self.compressed_size = 0
        self.compressed_chunk_mark = 0

    def write(self, data):
        self.crc = zlib.crc32(data, self.crc) & 0xffffffff
        self.uncompressed_size += len(data)
        compressed_data = self.compressor.compress(data)
        if not (compressed_data and len(compressed_data)):
            return 0
        compressed_size = len(compressed_data)
        self._prepare_write(compressed_size)
        self.buffer.write(compressed_data)
        self.compressed_size += compressed_size
        return compressed_size

    def _prepare_write(self, length):
        if self.overflow: return
        if self.buffer.tell() + length > _GMLZBUFFER_MAX_SIZE:
            data = self.buffer.getvalue()
            self.buffer.close()
            self.buffer = TemporaryFile()
            self.buffer.write(data)
            self.overflow = True

    def flush(self):
        prev_mark = self.mark
        compressed_data = self.compressor.flush(zlib.Z_FULL_FLUSH)
        compressed_size = len(compressed_data)
        self._prepare_write(compressed_size)
        self.buffer.write(compressed_data)
        self.compressed_size += compressed_size
        self.mark = self.buffer.tell()
        return prev_mark, self.mark
Exemplo n.º 17
0
 def test_execute_blocking(self):
     self.sandbox = Sandbox()
     output = TemporaryFile(mode="w+b")
     start_time = perf_counter()
     self.sandbox.execute(command="sleep 0.2 ; echo foo",
                          stdin_fd=None,
                          stdout_fd=output,
                          stderr_fd=None,
                          blocking=True)
     self.assertGreaterEqual(perf_counter() - start_time, 0.2)
     self.assertEqual(output.tell(), 4)  # Already printed "foo\n"
Exemplo n.º 18
0
    def handleExport(self, action):
        data, errors = self.extractData()

        if errors:
            self.status = self.formErrorsMessage
            return

        if data['paths']:
            objs = data['paths']
        else:
            objs = [self.context]

        message = model.MessageT1()
        message.action = data['action']
        message.recipient_id = data['recipients']
        if data['subject'] is not None:
            message.subjects = [data['subject']]
        if data['comment'] is not None:
            message.comments = [data['comment']]

        if data['directive']:
            directive = model.Directive(data['directive'])
            directive.priority = data['priority']
            directive.deadline = data['deadline']
            message.directive = directive

        journal_entry = _(u'label_exported_as_ech0147',
                          default=u'Exported as eCH-0147 message')
        for obj in objs:
            message.add_object(obj)
            journal_entry_factory(obj, 'eCH-0147 Export', journal_entry)

        header_dom = message.header().toDOM(element_name='eCH-0147T0:header')
        message_dom = message.binding().toDOM()

        tmpfile = TemporaryFile()
        with ZipFile(tmpfile, 'w', ZIP_DEFLATED, True) as zipfile:
            zipfile.writestr(
                'header.xml', header_dom.toprettyxml(encoding='UTF-8'))
            zipfile.writestr(
                'message.xml', message_dom.toprettyxml(encoding='UTF-8'))
            message.add_to_zip(zipfile)

        size = tmpfile.tell()

        response = self.request.response
        response.setHeader(
            "Content-Disposition",
            'inline; filename="message.zip"')
        response.setHeader("Content-type", "application/zip")
        response.setHeader("Content-Length", size)

        self.response_body = TempfileStreamIterator(tmpfile, size)
Exemplo n.º 19
0
def check_requirements():
    "Check requirements"
    output = TemporaryFile(mode='rwt')
    pos = 0
    for req in REQUIREMENTS:
        if 0 != call(['which', req], stdout=output, stderr=output):
            # get call output
            output.seek(pos)
            err = output.read()
            print "ERROR: %s is not satisfied (%s)" % (req, err)
            sys.exit(1)
        pos = output.tell()
Exemplo n.º 20
0
    def handleExport(self, action):
        data, errors = self.extractData()

        if errors:
            self.status = self.formErrorsMessage
            return

        if data['paths']:
            objs = data['paths']
        else:
            objs = [self.context]

        message = model.MessageT1()
        message.action = data['action']
        message.recipient_id = data['recipients']
        if data['subject'] is not None:
            message.subjects = [data['subject']]
        if data['comment'] is not None:
            message.comments = [data['comment']]

        if data['directive']:
            directive = model.Directive(data['directive'])
            directive.priority = data['priority']
            directive.deadline = data['deadline']
            message.directive = directive

        journal_entry = _(u'label_exported_as_ech0147',
                          default=u'Exported as eCH-0147 message')
        for obj in objs:
            message.add_object(obj)
            journal_entry_factory(obj, 'eCH-0147 Export', journal_entry)

        header_dom = message.header().toDOM(element_name='eCH-0147T0:header')
        message_dom = message.binding().toDOM()

        tmpfile = TemporaryFile()
        with ZipFile(tmpfile, 'w', ZIP_DEFLATED, True) as zipfile:
            zipfile.writestr(
                'header.xml', header_dom.toprettyxml(encoding='UTF-8'))
            zipfile.writestr(
                'message.xml', message_dom.toprettyxml(encoding='UTF-8'))
            message.add_to_zip(zipfile)

        size = tmpfile.tell()

        response = self.request.response
        response.setHeader(
            "Content-Disposition",
            'inline; filename="message.zip"')
        response.setHeader("Content-type", "application/zip")
        response.setHeader("Content-Length", size)

        self.response_body = TempfileStreamIterator(tmpfile, size)
Exemplo n.º 21
0
def test_dup_stdout(selenium):
    # Test redirecting stdout using low level os.dup operations.
    # This sort of redirection is used in pytest.
    import os
    import sys
    from tempfile import TemporaryFile

    tf = TemporaryFile(buffering=0)
    save_stdout = os.dup(sys.stdout.fileno())
    os.dup2(tf.fileno(), sys.stdout.fileno())
    print("hi!!")
    print("there...")
    assert tf.tell() == len("hi!!\nthere...\n")
    os.dup2(save_stdout, sys.stdout.fileno())
    print("not captured")
    os.dup2(tf.fileno(), sys.stdout.fileno())
    print("captured")
    assert tf.tell() == len("hi!!\nthere...\ncaptured\n")
    os.dup2(save_stdout, sys.stdout.fileno())
    os.close(save_stdout)
    tf.seek(0)
    assert tf.read(1000).decode() == "hi!!\nthere...\ncaptured\n"
Exemplo n.º 22
0
class BadBoyResponseFilter:
    def __init__(self, client):
        if not os.path.exists(BAD_CONTENT_TMP_DIR) :
            try:
                os.makedirs(BAD_CONTENT_TMP_DIR)
            except:
                pass
        
        self.fd_orig = TemporaryFile(mode='rw+b', dir=BAD_CONTENT_TMP_DIR)
        self.fd_filtered = TemporaryFile(mode='rw+b', dir=BAD_CONTENT_TMP_DIR)
        
        self.client = client

    def feed(self, data):
        self.fd_orig.write(data)

    def filter(self):
        pass
        
    def send_response(self):
        self.fd_orig.seek(0)
        self.filter()
        self.client.father.transport.write(self.client.bb_status)
        for key,value in self.client.bb_headers :
            if key.lower() == "content-length" :
                value = self.fd_filtered.tell()
            self.client.father.transport.write("%s: %s\r\n" % (key, value))

        self.client.father.transport.write("\r\n")
            
        file_len = self.fd_filtered.tell()
        self.fd_filtered.seek(0)

        while self.fd_filtered.tell() < file_len :
            self.client.father.transport.write(self.fd_filtered.read(1024))

        self.fd_orig.close()
        self.fd_filtered.close()
Exemplo n.º 23
0
class BadBoyResponseFilter:
    def __init__(self, client):
        if not os.path.exists(BAD_CONTENT_TMP_DIR):
            try:
                os.makedirs(BAD_CONTENT_TMP_DIR)
            except:
                pass

        self.fd_orig = TemporaryFile(mode='rw+b', dir=BAD_CONTENT_TMP_DIR)
        self.fd_filtered = TemporaryFile(mode='rw+b', dir=BAD_CONTENT_TMP_DIR)

        self.client = client

    def feed(self, data):
        self.fd_orig.write(data)

    def filter(self):
        pass

    def send_response(self):
        self.fd_orig.seek(0)
        self.filter()
        self.client.father.transport.write(self.client.bb_status)
        for key, value in self.client.bb_headers:
            if key.lower() == "content-length":
                value = self.fd_filtered.tell()
            self.client.father.transport.write("%s: %s\r\n" % (key, value))

        self.client.father.transport.write("\r\n")

        file_len = self.fd_filtered.tell()
        self.fd_filtered.seek(0)

        while self.fd_filtered.tell() < file_len:
            self.client.father.transport.write(self.fd_filtered.read(1024))

        self.fd_orig.close()
        self.fd_filtered.close()
Exemplo n.º 24
0
def thumb_crop_img(img, width=None, height=None, name='thumb.jpg'):
    """
    Resizes image and crop him if it due proportions
    """
    io = TemporaryFile()
    thumb = img.copy()
    thumb.thumbnail(image_width_height(img, width=width), Image.ANTIALIAS)
    if thumb.size[1] >= height:
        thumb = thumb.crop((0, 0, width, height))
    else:
        thumb = thumb.resize((width, height), Image.ANTIALIAS)
    thumb.save(io, format='JPEG', quality=100)
    del thumb
    size = io.tell()
    io.seek(0)
    return InMemoryUploadedFile(io, None, name, 'image/jpeg', size, None)
Exemplo n.º 25
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1

        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer(
                        "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}",
                    {
                        "key": key,
                        "file_length": file_length,
                        "count": count
                    },
                        verbose=self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception as e:
                e = Except.wrap(e)
                retry -= 1
                if retry == 0 or 'Access Denied' in e or "No space left on device" in e:
                    Log.error("could not push data to s3", cause=e)
                else:
                    Log.warning("could not push data to s3", cause=e)

        if self.settings.public:
            storage.set_acl('public-read')
        return
Exemplo n.º 26
0
    def compress(self, path, writer, ext, mimetype):
        tf = TemporaryFile()

        writer(path, tf)

        tf.seek(0, io.SEEK_END)
        size = tf.tell()
        tf.seek(0)
        name = path.name if path.name else "top"
        resp = self.response_json(200, "OK")
        self.response = "200 OK"
        self.headers = [
            ("Content-length", str(size)),
            ("Content-type", mimetype),
            ("Content-disposition", "attachment; filename=" + name + ext),
        ]
        self.result = FileWrapper(tf)
Exemplo n.º 27
0
def test_dup_temp_file(selenium):
    # See https://github.com/emscripten-core/emscripten/issues/15012
    import os
    from tempfile import TemporaryFile

    tf = TemporaryFile(buffering=0)
    fd1 = os.dup(tf.fileno())
    os.dup2(tf.fileno(), 50)
    s = b"hello there!"
    tf.write(s)
    tf2 = open(fd1, "w+")
    assert tf2.tell() == len(s)
    # This next assertion actually demonstrates a bug in dup: the correct value
    # to return should be b"".
    assert os.read(fd1, 50) == b""
    tf2.seek(1)
    assert tf.tell() == 1
    assert tf.read(100) == b"ello there!"
Exemplo n.º 28
0
def retr(ftp, path, retry=3): # retrieve file via FTP and return
	tmp = TemporaryFile()
	try:
		size = ftp.size(path)
		ftp.retrbinary('RETR ' + path, tmp.write)
	except:
		if retry > 0:
			tmp.close()
			return retr(ftp, path, retry-1) # recursively call retr until retry is 0 (retry times)
		else:
			tmp.close()
			return None
	if size != tmp.tell(): # check if downloaded file size != file size on ftp
		if retry > 0:
			tmp.close()
			return retr(ftp, path, retry-1)
		else:
			tmp.close()
			return None
	return tmp
Exemplo n.º 29
0
class ContentReceiver:
    "Write-only file object used to receive data from FTP"

    def __init__(self,callback,*args):
        from tempfile import TemporaryFile
        self.data = TemporaryFile('w+b')
        self.callback = callback
        self.args = args

    def write(self,data):
        self.data.write(data)

    def close(self):
        size = self.data.tell()
        self.data.seek(0)
        args = self.args + (self.data, size)
        c = self.callback
        self.callback = None
        self.args = None
        c(*args)
Exemplo n.º 30
0
class ContentReceiver:
    "Write-only file object used to receive data from FTP"

    def __init__(self, callback, *args):
        from tempfile import TemporaryFile
        self.data = TemporaryFile('w+b')
        self.callback = callback
        self.args = args

    def write(self, data):
        self.data.write(data)

    def close(self):
        size = self.data.tell()
        self.data.seek(0)
        args = self.args + (self.data, size)
        c = self.callback
        self.callback = None
        self.args = None
        c(*args)
Exemplo n.º 31
0
def retr(ftp, path, retry=3):  # retrieve file via FTP and return
    tmp = TemporaryFile()
    try:
        size = ftp.size(path)
        ftp.retrbinary('RETR ' + path, tmp.write)
    except:
        if retry > 0:
            tmp.close()
            return retr(
                ftp, path, retry -
                1)  # recursively call retr until retry is 0 (retry times)
        else:
            tmp.close()
            return None
    if size != tmp.tell():  # check if downloaded file size != file size on ftp
        if retry > 0:
            tmp.close()
            return retr(ftp, path, retry - 1)
        else:
            tmp.close()
            return None
    return tmp
Exemplo n.º 32
0
Arquivo: s3.py Projeto: rv404674/TUID
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1

        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception as e:
                e = Except.wrap(e)
                retry -= 1
                if retry == 0 or 'Access Denied' in e or "No space left on device" in e:
                    Log.error("could not push data to s3", cause=e)
                else:
                    Log.warning("could not push data to s3", cause=e)

        if self.settings.public:
            storage.set_acl('public-read')
        return
Exemplo n.º 33
0
 def run(self):
     while self.isRunning:
         tmp = TemporaryFile()
         try:
             masscan = 'masscan' if config.OS == 'windows' else 'sudo masscan'
             self.sub = Popen(f'{masscan} {" ".join(self.parameters)}',
                              stdout=tmp,
                              stderr=STDOUT,
                              shell=True)
             while self.sub.poll() is None:
                 where = tmp.tell()
                 lines = tmp.read()
                 if not lines:
                     sleep(0.01)
                     tmp.seek(where)
                 else:
                     lines = lines.decode().split('\n')
                     for line in lines:
                         if line.startswith('Discovered'):
                             self.analyse_queue.put(
                                 self._line_processing(line))
                             progress.increment('discover_total')
                         elif line.startswith('rate'):
                             state = re.findall(r'\d{1,2}.\d{1,2}%', line)
                             state = ''.join(state).replace('%', '')
                             self.sig.send_change_progressBar(
                                 int(float(state)))
                             self.sig.change_actual_action.emit(line)
             sys.__stdout__.write((tmp.read()).decode())
             sys.__stdout__.flush()
         except Exception as e:
             print(f'{__name__} - {e}')
         finally:
             self.sub.kill()
             progress.increment('action', value='discovering')
             self.sig.change_actual_action.emit('Discovering...')
             break
Exemplo n.º 34
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1
        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer(
                        "Sending {{count}} lines in {{file_length|comma}} bytes",
                    {
                        "file_length": file_length,
                        "count": count
                    },
                        debug=self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception, e:
                Log.warning("could not push data to s3", cause=e)
                retry -= 1
Exemplo n.º 35
0
    def index_html(self, icon=0, preview=0, width=None, height=None, as_attachment=False, REQUEST=None):
        """Return the file with it's corresponding MIME-type.

            @param as_attachment: if not None, return the file as an attachment
                                  using its title or id as a suggested filename;
                                  see RFC 2616 section 19.5.1 for more details
        """

        if REQUEST is not None:
            if self._if_modified_since_request_handler(REQUEST):
                self.ZCacheable_set(None)
                return ""

            if self._redirect_default_view_request_handler(icon, preview, REQUEST):
                return ""

        filename, content_type, icon, preview = self._get_file_to_serve(icon, preview)
        filename = self._get_fsname(filename)

        if _debug > 1:
            LOG(_SUBSYS, INFO, "serving %s, %s, %s, %s" % (filename, content_type, icon, preview))

        cant_read_exc = "Can't read: "
        if filename:
            try:
                size = os.stat(filename)[6]
            except:
                raise cant_read_exc, ("%s (%s)" % (self.id, filename))
        else:
            filename = join(package_home(globals()), "icons", "broken.gif")
            try:
                size = os.stat(filename)[6]
            except:
                raise cant_read_exc, ("%s (%s)" % (self.id, filename))
            content_type = "image/gif"
            icon = 1

        if icon == 0 and width is not None and height is not None:
            data = TemporaryFile()  # hold resized image
            try:
                from PIL import Image

                im = Image.open(filename)
                if im.mode != "RGB":
                    im = im.convert("RGB")
                filter = Image.BICUBIC
                if hasattr(Image, "ANTIALIAS"):  # PIL 1.1.3
                    filter = Image.ANTIALIAS
                im = im.resize((int(width), int(height)), filter)
                im.save(data, "JPEG", quality=85)
            except:
                data = open(filename, "rb")
            else:
                data.seek(0, 2)
                size = data.tell()
                data.seek(0)
                content_type = "image/jpeg"
        else:
            data = open(filename, "rb")

        close_data = 1
        try:
            if REQUEST is not None:
                last_mod = rfc1123_date(self._p_mtime)
                if as_attachment:
                    REQUEST.RESPONSE.setHeader(
                        "Content-Disposition", 'attachment; filename="%s"' % (self.title_or_id(),)
                    )
                REQUEST.RESPONSE.setHeader("Last-Modified", last_mod)
                REQUEST.RESPONSE.setHeader("Content-Type", content_type)
                REQUEST.RESPONSE.setHeader("Content-Length", size)
                self.ZCacheable_set(None)

                # Support Zope 2.7.1 IStreamIterator
                if IStreamIterator is not None:
                    close_data = 0
                    return stream_iterator(data)

                blocksize = 2 << 16
                while 1:
                    buffer = data.read(blocksize)
                    REQUEST.RESPONSE.write(buffer)
                    if len(buffer) < blocksize:
                        break
                return ""
            else:
                return data.read()
        finally:
            if close_data:
                data.close()
Exemplo n.º 36
0
class TestZipSubFile(unittest.TestCase):
    """ Tests ZipSubFile """
    def setUp(self):
        self.zipper = ZipFile(ZIP_TEMP_FILE)
        self.subfile = ZipSubFile(self.zipper, FILE_NAME)
        self.subfile.open()

        # create a file in memory for comparison
        self.compare = TemporaryFile(prefix='oletools-test-ZipSubFile-',
                                     suffix='.bin')
        self.compare.write(FILE_CONTENTS)
        self.compare.seek(0)  # re-position to start

        self.assertEqual(self.subfile.tell(), 0)
        self.assertEqual(self.compare.tell(), 0)
        if DEBUG:
            print('created comparison file {0!r} in memory'.format(
                self.compare.name))

    def tearDown(self):
        self.compare.close()
        self.subfile.close()
        self.zipper.close()
        if DEBUG:
            print('\nall files closed')

    def test_read(self):
        """ test reading """
        # read from start
        self.assertEqual(self.subfile.read(4), self.compare.read(4))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # read a bit more
        self.assertEqual(self.subfile.read(4), self.compare.read(4))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # create difference
        self.subfile.read(1)
        self.assertNotEqual(self.subfile.read(4), self.compare.read(4))
        self.compare.read(1)
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # read all the rest
        self.assertEqual(self.subfile.read(), self.compare.read())
        self.assertEqual(self.subfile.tell(), self.compare.tell())

    def test_seek_forward(self):
        """ test seeking forward """
        self.subfile.seek(10)
        self.compare.seek(10)
        self.assertEqual(self.subfile.read(1), self.compare.read(1))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek 2 forward
        self.subfile.seek(2, os.SEEK_CUR)
        self.compare.seek(2, os.SEEK_CUR)
        self.assertEqual(self.subfile.read(1), self.compare.read(1))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek backward (only implemented case: back to start)
        self.subfile.seek(-self.subfile.tell(), os.SEEK_CUR)
        self.compare.seek(-self.compare.tell(), os.SEEK_CUR)
        self.assertEqual(self.subfile.read(1), self.compare.read(1))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek to end
        self.subfile.seek(0, os.SEEK_END)
        self.compare.seek(0, os.SEEK_END)
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek back to start
        self.subfile.seek(0)
        self.compare.seek(0)
        self.assertEqual(self.subfile.tell(), self.compare.tell())
        self.assertEqual(self.subfile.tell(), 0)

    def test_check_size(self):
        """ test usual size check: seek to end, tell, seek to start """
        # seek to end
        self.subfile.seek(0, os.SEEK_END)
        self.assertEqual(self.subfile.tell(), len(FILE_CONTENTS))

        # seek back to start
        self.subfile.seek(0)

        # read first few bytes
        self.assertEqual(self.subfile.read(10), FILE_CONTENTS[:10])

    def test_error_read(self):
        """ test correct behaviour if read beyond end (no exception) """
        self.subfile.seek(0, os.SEEK_END)
        self.compare.seek(0, os.SEEK_END)

        self.assertEqual(self.compare.read(10), self.subfile.read(10))
        self.assertEqual(self.compare.tell(), self.subfile.tell())

        self.subfile.seek(0)
        self.compare.seek(0)
        self.subfile.seek(len(FILE_CONTENTS) - 1)
        self.compare.seek(len(FILE_CONTENTS) - 1)
        self.assertEqual(self.compare.read(10), self.subfile.read(10))
        self.assertEqual(self.compare.tell(), self.subfile.tell())

    def test_error_seek(self):
        """ test correct behaviour if seek beyond end (no exception) """
        self.subfile.seek(len(FILE_CONTENTS) + 10)
        self.compare.seek(len(FILE_CONTENTS) + 10)
Exemplo n.º 37
0
def results_page(request, campaign_id=None):
    error_title = None
    error_message = None
    result_filter = None
    if campaign_id is not None:
        campaign = models.campaign.objects.get(id=campaign_id)
    else:
        campaign = None
    if request.method == 'GET' and 'view_output' in request.GET and \
            'view_all' not in request.GET and 'select_box' in request.GET:
        result_ids = map(int, dict(request.GET)['select_box'])
        results = models.result.objects.filter(
            id__in=result_ids).order_by('-id')
    else:
        if campaign_id is not None:
            campaign_items_ = campaign_items
            output_file = 'campaign-data/{}/gold_{}'.format(
                campaign_id, campaign.output_file)
            if exists(output_file) and guess_type(output_file)[0] is not None:
                output_file = True
            else:
                output_file = False
            results = campaign.result_set.all()
        else:
            campaign_items_ = None
            output_file = True
            results = models.result.objects.all()
        result_filter = filters.result(request.GET, queryset=results)
        if not result_filter.qs.count() and results.count():
            error_title = 'Filter Error'
            error_message = 'Filter did not return any results and was ignored.'
            result_filter = filters.result(None, queryset=results)
        else:
            results = result_filter.qs.order_by('-id')
    if request.method == 'GET' and 'view_output' in request.GET:
        if 'view_dut_output' in request.GET:
            if 'view_download' in request.GET:
                temp_file = TemporaryFile()
                start = perf_counter()
                with open_tar(fileobj=temp_file, mode='w:gz') as archive:
                    for result in results:
                        with BytesIO(result.dut_output.encode('utf-8')) as \
                                byte_file:
                            info = TarInfo('{}_dut_output.txt'.format(
                                result.id))
                            info.size = len(result.dut_output)
                            archive.addfile(info, byte_file)
                print('archive created', round(perf_counter() - start, 2),
                      'seconds')
                response = FileResponse(
                    temp_file, content_type='application/x-compressed')
                response['Content-Disposition'] = \
                    'attachment; filename=dut_outputs.tar.gz'
                response['Content-Length'] = temp_file.tell()
                temp_file.seek(0)
                return response
            else:
                return render(
                    request, 'output.html', {
                        'campaign': campaign,
                        'campaign_items': campaign_items if campaign else None,
                        'navigation_items': navigation_items,
                        'results': results,
                        'type': 'dut_output'
                    })
        elif 'view_aux_output' in request.GET:
            if 'view_download' in request.GET:
                temp_file = TemporaryFile()
                start = perf_counter()
                with open_tar(fileobj=temp_file, mode='w:gz') as archive:
                    for result in results:
                        with BytesIO(result.aux_output.encode('utf-8')) as \
                                byte_file:
                            info = TarInfo('{}_aux_output.txt'.format(
                                result.id))
                            info.size = len(result.aux_output)
                            archive.addfile(info, byte_file)
                print('archive created', round(perf_counter() - start, 2),
                      'seconds')
                response = FileResponse(
                    temp_file, content_type='application/x-compressed')
                response['Content-Disposition'] = \
                    'attachment; filename=aux_outputs.tar.gz'
                response['Content-Length'] = temp_file.tell()
                temp_file.seek(0)
                return response
            else:
                return render(
                    request, 'output.html', {
                        'campaign': campaign,
                        'campaign_items': campaign_items if campaign else None,
                        'navigation_items': navigation_items,
                        'results': results,
                        'type': 'aux_output'
                    })
        elif 'view_debugger_output' in request.GET:
            if 'view_download' in request.GET:
                temp_file = TemporaryFile()
                start = perf_counter()
                with open_tar(fileobj=temp_file, mode='w:gz') as archive:
                    for result in results:
                        with BytesIO(
                                result.debugger_output.encode('utf-8')) as \
                                byte_file:
                            info = TarInfo('{}_debugger_output.txt'.format(
                                result.id))
                            info.size = len(result.debugger_output)
                            archive.addfile(info, byte_file)
                print('archive created', round(perf_counter() - start, 2),
                      'seconds')
                response = FileResponse(
                    temp_file, content_type='application/x-compressed')
                response['Content-Disposition'] = \
                    'attachment; filename=debugger_outputs.tar.gz'
                response['Content-Length'] = temp_file.tell()
                temp_file.seek(0)
                return response
            else:
                return render(
                    request, 'output.html', {
                        'campaign': campaign,
                        'campaign_items': campaign_items if campaign else None,
                        'navigation_items': navigation_items,
                        'results': results,
                        'type': 'debugger_output'
                    })
        elif 'view_output_file' in request.GET:
            result_ids = []
            for result in results:
                if exists('campaign-data/{}/results/{}/{}'.format(
                        result.campaign_id, result.id,
                        result.campaign.output_file)):
                    result_ids.append(result.id)
            results = models.result.objects.filter(
                id__in=result_ids).order_by('-id')
            if 'view_download' in request.GET:
                temp_file = TemporaryFile()
                start = perf_counter()
                with open_tar(fileobj=temp_file, mode='w:gz') as archive:
                    for result in results:
                        archive.add(
                            'campaign-data/{}/results/{}/{}'.format(
                                result.campaign_id, result.id,
                                result.campaign.output_file),
                            '{}_{}'.format(result.id,
                                           result.campaign.output_file))
                print('archive created', round(perf_counter() - start, 2),
                      'seconds')
                response = FileResponse(
                    temp_file, content_type='application/x-compressed')
                response['Content-Disposition'] = \
                    'attachment; filename=output_files.tar.gz'
                response['Content-Length'] = temp_file.tell()
                temp_file.seek(0)
                return response
            else:
                return render(
                    request, 'output.html', {
                        'campaign': campaign,
                        'campaign_items': campaign_items if campaign else None,
                        'navigation_items': navigation_items,
                        'results': results,
                        'type': 'output_file'
                    })
        elif 'view_log_file' in request.GET:
            if 'view_download' in request.GET:
                temp_file = TemporaryFile()
                start = perf_counter()
                with open_tar(fileobj=temp_file, mode='w:gz') as archive:
                    for result in results:
                        for log_file in result.campaign.log_files:
                            archive.add(
                                'campaign-data/{}/results/{}/{}'.format(
                                    result.campaign_id, result.id, log_file),
                                '{}_{}'.format(result.id, log_file))
                print('archive created', round(perf_counter() - start, 2),
                      'seconds')
                response = FileResponse(
                    temp_file, content_type='application/x-compressed')
                response['Content-Disposition'] = \
                    'attachment; filename=log_files.tar.gz'
                response['Content-Length'] = temp_file.tell()
                temp_file.seek(0)
                return response
            else:
                return render(
                    request, 'output.html', {
                        'campaign': campaign,
                        'campaign_items': campaign_items if campaign else None,
                        'navigation_items': navigation_items,
                        'results': results,
                        'type': 'log_file'
                    })
    elif request.method == 'POST':
        if 'new_outcome_category' in request.POST:
            results.values('outcome_category').update(
                outcome_category=request.POST['new_outcome_category'])
        elif 'new_outcome' in request.POST:
            results.values('outcome').update(
                outcome=request.POST['new_outcome'])
        elif 'delete' in request.POST and 'results[]' in request.POST:
            result_ids = [
                int(result_id) for result_id in dict(request.POST)['results[]']
            ]
            results_to_delete = models.result.objects.filter(id__in=result_ids)
            for result in results_to_delete:
                if exists('campaign-data/{}/results/{}'.format(
                        result.campaign_id, result.id)):
                    rmtree('campaign-data/{}/results/{}'.format(
                        result.campaign_id, result.id))
            results_to_delete.delete()
        elif 'delete_all' in request.POST:
            for result in results:
                if exists('campaign-data/{}/results/{}'.format(
                        result.campaign_id, result.id)):
                    rmtree('campaign-data/{}/results/{}'.format(
                        result.campaign_id, result.id))
            results.delete()
            if campaign_id:
                return redirect('/campaign/{}/results'.format(campaign_id))
            else:
                return redirect('/results')
    result_table = tables.results(results)
    RequestConfig(request, paginate={
        'per_page': table_length
    }).configure(result_table)
    return render(
        request, 'results.html', {
            'campaign': campaign,
            'campaign_items': campaign_items_,
            'error_message': error_message,
            'error_title': error_title,
            'filter': result_filter,
            'filter_tabs': True,
            'navigation_items': navigation_items,
            'output_file': output_file,
            'result_count': '{:,}'.format(results.count()),
            'result_table': result_table
        })
Exemplo n.º 38
0
class ZipNumClusterJob(MRJob):
    HADOOP_INPUT_FORMAT = 'org.apache.hadoop.mapred.lib.CombineTextInputFormat'

    PARTITIONER = 'org.apache.hadoop.mapred.lib.TotalOrderPartitioner'

    INPUT_PROTOCOL = RawValueProtocol
    OUTPUT_PROTOCOL = RawValueProtocol
    INTERNAL_PROTOCOL = RawProtocol

    JOBCONF =  {'mapreduce.task.timeout': '9600000',
                'mapreduce.input.fileinputformat.split.maxsize': '50000000',
                'mapreduce.map.speculative': 'false',
                'mapreduce.reduce.speculative': 'false',
                'mapreduce.output.fileoutputformat.compress': 'false',
                'mapreduce.job.reduce.slowstart.completedmaps': '0.8',
                'mapreduce.job.jvm.numtasks': '-1'
               }

    def configure_options(self):
        """Custom command line options for indexing"""
        super(ZipNumClusterJob, self).configure_options()

        self.add_passthrough_option('--numlines', dest='numlines',
                                    type=int,
                                    default=3000,
                                    help='Number of lines per gzipped block')

        self.add_passthrough_option('--splitfile', dest='splitfile',
                                    help='Split file to use for CDX shard split')

        self.add_passthrough_option('--convert', dest='convert',
                                    action='store_true',
                                    default=False,
                                    help='Convert CDX through _convert_line() function')

        self.add_passthrough_option('--shards', dest='shards',
                                    type=int,
                                    help='Num ZipNum Shards to create, ' +
                                         '= num of entries in splits + 1' +
                                         '= num of reducers used')

    def jobconf(self):
        orig_jobconf = super(ZipNumClusterJob, self).jobconf()
        custom_jobconf = {'mapreduce.job.reduces': self.options.shards,
                          'mapreduce.totalorderpartitioner.path': self.options.splitfile}

        combined = combine_dicts(orig_jobconf, custom_jobconf)
        return combined

    def mapper_init(self):
        pass

    def mapper(self, _, line):
        line = line.split('\t')[-1]
        if not line.startswith(' CDX'):
            if self.options.convert:
                line = self._convert_line(line)
            yield line, ''

    def _convert_line(self, line):
        key, ts, url, length, offset, warc = line.split(' ')
        key = key.replace(')', ',)', 1)

        vals = {'o': offset, 's': length, 'w': warc, 'u': url}

        return key + ' ' + ts + ' ' + json.dumps(vals)

    def _get_prop(self, proplist):
        for p in proplist:
            res = os.environ.get(p)
            if res:
                return res

    def reducer_init(self):
        self.curr_lines = []
        self.curr_key = ''

        self.part_num = self._get_prop(['mapreduce_task_partition', 'mapred_task_partition'])
        assert(self.part_num)

        self.part_name = 'cdx-%05d.gz' % int(self.part_num)

        self.output_dir = self._get_prop(['mapreduce_output_fileoutputformat_outputdir',
                                          'mapred.output.dir',
                                          'mapred_work_output_dir'])

        assert(self.output_dir)
        self.gzip_temp = TemporaryFile(mode='w+b')

    def reducer(self, key, values):
        if key:
            self.curr_lines.append(key)

        for x in values:
            if x:
                self.curr_lines.append(x)

        if len(self.curr_lines) == 1:
            self.curr_key = ' '.join(key.split(' ', 2)[0:2])

        if len(self.curr_lines) >= self.options.numlines:
            yield '', self._write_part()

    def reducer_final(self):
        if len(self.curr_lines) > 0:
            yield '', self._write_part()

        self._do_upload()

    def _do_upload(self):
        self.gzip_temp.flush()
        #TODO: move to generalized put() function
        if self.output_dir.startswith('s3://'):
            import boto
            conn = boto.connect_s3()
            parts = urlparse.urlsplit(self.output_dir)

            bucket = conn.lookup(parts.netloc)

            cdxkey = bucket.new_key(parts.path + '/' + self.part_name)
            cdxkey.set_contents_from_file(self.gzip_temp, rewind=True)
        else:
            path = os.path.join(self.output_dir, self.part_name)

            self.gzip_temp.seek(0)

            with open(path, 'w+b') as target:
                shutil.copyfileobj(self.gzip_temp, target)

        self.gzip_temp.close()

    def _write_part(self):
        z = zlib.compressobj(6, zlib.DEFLATED, zlib.MAX_WBITS + 16)

        offset = self.gzip_temp.tell()

        buff = '\n'.join(self.curr_lines) + '\n'
        self.curr_lines = []

        buff = z.compress(buff)
        self.gzip_temp.write(buff)

        buff = z.flush()
        self.gzip_temp.write(buff)
        self.gzip_temp.flush()

        length = self.gzip_temp.tell() - offset

        partline = '{0}\t{1}\t{2}\t{3}'.format(self.curr_key, self.part_name, offset, length)

        return partline
Exemplo n.º 39
0
files =''
dicts = []
for fil in files:
    dic = json.load(open(fil))
    dicts.append(dic)
    if 'dimensions' in dic:
        dim = dic['dimensions']
        # dim += np.random.randint(0,10000000000000,[len(dim)])  # just to check - should be 6-8x longer than dim
        if len(unique(diff(dim))) > 1:
            print('{fil} has at least {n:,}/{t:,} anomalies'
                  .format(fil=fil, n=len(unique(diff(dim))), t=len(dim)))
        else:
            print('{fil} is clean!!'.format(fil=fil))
        outfile = TemporaryFile()
        savez(outfile, dim=diff(dim))
        print('compressed file length = {l:,}/{raw:,} bytes'.format(l=outfile.tell(),raw=8*len(dim)))

"""
#pyfusion.config.set('global','localdatapath','local_data') 

# this request translates to a json file which is stored locally - see below for complete example
# need to disable all networks
xx=dev.acq.getdata([20160302,23],'W7X_L53_LP10_I',no_cache=1)
http://archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.190_DATASTREAM/5/Channel_5/scaled/_signal.json?from=1457536002136103981&upto=1457536069136103980

# complete example - assuming you have a cache under the working directory e.g.
# /home/bdb112/pyfusion/working/pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.190_DATASTREAM/5/Channel_5/scaled/_signal.json?from=1456930821345103981&upto=1456930888345103980
#
import pyfusion
pyfusion.LAST_DNS_TEST=-1
pyfusion.CACHE=1
Exemplo n.º 40
0
def parseMultipart(fp, pdict, memfile_max=1024 * 1000, len_max=0):
    """
    Parse multipart content
    """

    # TODO: Do not store whole parts contents in the memoty

    boundary = ''
    if 'boundary' in pdict:
        boundary = pdict['boundary']
    if not isBoundaryValid(boundary):
        raise ValueError('Invalid boundary in multipart form: {0}' .
            format(boundary))

    maxlen = 0

    nextpart = b'--' + boundary.encode()
    lastpart = b'--' + boundary.encode() + b'--'
    partdict = {}
    terminator = b''

    while terminator != lastpart:
        nbytes = -1
        data = None
        if terminator:
            # At start of next part.  Read headers first.
            headers = parse_headers(fp, memfile_max)
            clength = headers.get('content-length')
            if clength is not None:
                try:
                    nbytes = int(clength)
                except ValueError:
                    pass
            if nbytes > 0:
                if maxlen and nbytes > len_max:
                    raise ValueError('Maximum content length exceeded')
                data = fp.read(nbytes)
            else:
                data = b''
        # Read lines until end of part.
        part_fp = TemporaryFile(mode='w+b')
        while 1:
            line = fp.readline(memfile_max)

            if line == b'':
                terminator = lastpart  # End outer loop
                break

            if _is_termline(line, nextpart):
                terminator = nextpart
                break

            if _is_termline(line, lastpart):
                terminator = lastpart
                break

            part_fp.write(line)
            while not line.endswith(b"\n"):
                line = fp.readline(memfile_max)

                if line == b'':
                    break

                part_fp.write(line)

        # Done with part.
        if data is None:
            continue
        if nbytes < 0:
            last = pre_last = None

            # Strip final line terminator
            if part_fp.tell() >= 1:
                part_fp.seek(-1, os.SEEK_END)
                last = part_fp.read(1)

            if part_fp.tell() >= 2:
                part_fp.seek(-2, os.SEEK_END)
                pre_last = part_fp.read(1)

            trunc = 0
            if pre_last == b"\r" and last == b"\n":
                trunc = 2
            elif last == b"\n":
                trunk = 1

            if trunc > 0:
                part_fp.seek(-trunc, os.SEEK_END)
                part_fp.truncate()

        line = headers['content-disposition']
        if not line:
            continue
        key, params = parse_header(line)
        if key != 'form-data':
            continue
        if 'name' in params:
            name = params['name']
        else:
            continue

        part_fp.seek(0, os.SEEK_SET)

        part = {'fp': part_fp}
        if 'filename' in params:
            part['filename'] = params['filename']

        if name in partdict:
            partdict[name].append(part)
        else:
            partdict[name] = [part]

    return partdict
Exemplo n.º 41
0
class Agent:
  """
Each agent object contains its own sampled log data. The Agent class is responsible for
collecting and storing data. machine_id is used to identify the agent.

machine_id is supplied by the client class. This allows for multiple agents if desired
"""
  def __init__(self, arguments, machine_id):
    self.arguments = arguments
    self.my_uuid = machine_id
    self.track_process = ''

    # This log object is for stdout purposes
    self.log = TemporaryFile()
    self.log_position = 0

    # Discover if --recover is being used. If so, we need to obtain the
    # timestamp of the last entry in the outfile log... a little bulky
    # to do... and not a very good place to do it.
    if self.arguments.recover:
      if os.path.exists(self.arguments.outfile[-1]):
        memory_list = []
        history_file = open(self.arguments.outfile[-1], 'r')
        reader = csv.reader(history_file, delimiter=',', quotechar='|', escapechar='\\', quoting=csv.QUOTE_MINIMAL)

        # Get last item in list. Unfortunately, no way to do this until
        # we have read the entire file...? Lucky for us, most memory log
        # files are in the single digit megabytes
        for row in reader:
          memory_list.append(row)
        history_file.close()
        last_entry = float(memory_list[-1][0]) + self.arguments.repeat_rate[-1]
        self.delta = (GetTime().now - last_entry)
      else:
        print 'Recovery options detected, but I could not find your previous memory log file.'
        sys.exit(1)
    else:
      self.delta = 0

    # Create the dictionary to which all sampled data will be stored
    # NOTE: REQUEST dictionary items are instructions (arguments) we will
    # ask the server to provide (if we are running with --pbs)
    # Simply add them here. We _can not_ make the arguments match the
    # server exactly, this would cause every agent launched to perform
    # like a server... bad stuff

    # Example: We added repeat_rate (see dictionary below). Now every
    # agent would update their repeat_rate according to what the user
    # supplied as an argument (--repeat_rate 0.02)
    self.agent_data = { self.my_uuid :
                        { 'HOSTNAME'  : socket.gethostname(),
                          'STDOUT'    : '',
                          'STACK'     : '',
                          'MEMORY'    : 0,
                          'TIMESTAMP' : GetTime().now - self.delta,
                          'REQUEST'   : { 'run'          : '',
                                          'pstack'       : '',
                                          'repeat_rate'  : '',
                                          'cwd'          : ''},
                          'STOP'      : False,
                          'TOTAL'     : 0,
                          'DEBUG_LOG' : ''
                        }
                      }

  # NOTE: This is the only function that should be called in this class
  def takeSample(self):
    if self.arguments.pstack:
      self.agent_data[self.my_uuid]['STACK'] = self._getStack()

    # Always do the following
    self.agent_data[self.my_uuid]['MEMORY'] = self._getMemory()
    self.agent_data[self.my_uuid]['STDOUT'] = self._getStdout()
    if self.arguments.recover:
      self.agent_data[self.my_uuid]['TIMESTAMP'] = GetTime().now - self.delta
    else:
      self.agent_data[self.my_uuid]['TIMESTAMP'] = GetTime().now

    # Return the data to whom ever asked for it
    return self.agent_data

  def _getStdout(self):
    self.log.seek(self.log_position)
    output = self.log.read()
    self.log_position = self.log.tell()
    sys.stdout.write(output)
    return output

  def _getMemory(self):
    tmp_pids = self._getPIDs()
    memory_usage = 0
    if tmp_pids != {}:
      for single_pid in tmp_pids.iteritems():
        memory_usage += int(single_pid[1][0])
      if memory_usage == 0:
        # Memory usage hit zero? Then assume the binary being tracked has exited. So lets begin doing the same.
        self.agent_data[self.my_uuid]['DEBUG_LOG'] = 'I found the total memory usage of all my processes hit 0. Stoping'
        self.agent_data[self.my_uuid]['STOP'] = True
        return 0
      return int(memory_usage)
    # No binay even detected? Lets assume it exited, so we should begin doing the same.
    self.agent_data[self.my_uuid]['STOP'] = True
    self.agent_data[self.my_uuid]['DEBUG_LOG'] = 'I found no processes running. Stopping'
    return 0

  def _getStack(self):
    if self._darwin() == True:
      stack_trace = LLDB()
    else:
      stack_trace = GDB()
    tmp_pids = self._getPIDs()
    if tmp_pids != {}:
      last_pid = sorted([x for x in tmp_pids.keys()])[-1]
      return stack_trace.getStackTrace(str(last_pid))
    else:
      return ''

  def _getPIDs(self):
    pid_list = {}

    # Determin the binary to sample and store it. Doing the findCommand is a little expensive.
    if self.track_process == '':
      self.track_process = self._findCommand(''.join(self.arguments.run))

    # A quick way to safely check for the avilability of needed tools
    self._verifyCommand(['ps'])

    # If we are tracking a binary
    if self.arguments.run:
      command = [which('ps'), '-e', '-o', 'pid,rss,user,args']
      tmp_proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
      all_pids = tmp_proc.communicate()[0].split('\n')
      # Figure out what we are allowed to track (strip away mpiexec, processes not owned by us, etc)
      for single_pid in all_pids:
        if single_pid.find(self.track_process) != -1 and \
           single_pid.find(__file__) == -1 and \
           single_pid.find('mpirun') == -1 and \
           single_pid.find(os.getenv('USER')) != -1 and \
           single_pid.find('mpiexec') == -1:
          pid_list[int(single_pid.split()[0])] = []
          pid_list[int(single_pid.split()[0])].extend([single_pid.split()[1], single_pid.split()[3]])
    return pid_list

  def _verifyCommand(self, command_list):
    for command in command_list:
      if which(command) == None:
        print 'Command not found:', command
        sys.exit(1)

  # determine if we are running on a darwin kernel
  def _darwin(self):
   if platform.platform(0, 1).split('-')[:-1][0].find('Darwin') != -1:
     return True

  # Determine the command we are going to track
  # A few things are happening here; first we strip off any MPI commands
  # we then loop through the remaining items until we find a matching path
  # exp:  mpiexec -n 12 ../../../moose_test-opt -i simple_diffusion.i -r 6
  # would first strip off mpiexec, check for the presence of -n in our
  # current directory, then 12, then ../../../moose_test-opt   <- found. It would
  # stop and return the base name (moose_test-opt).
  def _findCommand(self, command):
    if command.find('mpiexec') == 0 or command.find('mpirun') == 0:
      for binary in command.split():
        if os.path.exists(binary):
          return os.path.split(binary)[1]
    elif os.path.exists(command.split()[0]):
      return os.path.split(command.split()[0])[1]
Exemplo n.º 42
0
    def transfer_yaml():
        print(" * Transferring yml")
        upload_folder = os.path.join(app.root_path,
                                     app.config['UPLOAD_FOLDER'])
        if request.method == 'GET':
            tarfile_backend = TemporaryFile(mode='wb+')
            yamlfile = TemporaryFile(mode='wb+')
            tarball = tarfile.open(fileobj=tarfile_backend, mode='w')

            yamlfile.write(
                bytes(
                    export_challenges('export.yaml', 'export.d', upload_folder,
                                      tarball, False), "UTF-8"))

            tarinfo = tarfile.TarInfo('export.yaml')
            tarinfo.size = yamlfile.tell()
            yamlfile.seek(0)
            tarball.addfile(tarinfo, yamlfile)
            tarball.close()
            yamlfile.close()

            gzipfile_backend = TemporaryFile(mode='wb+')
            gzipfile = GzipFile(fileobj=gzipfile_backend, mode='wb')

            tarfile_backend.seek(0)
            shutil.copyfileobj(tarfile_backend, gzipfile)

            tarfile_backend.close()
            gzipfile.close()
            gzipfile_backend.seek(0)
            return send_file(gzipfile_backend,
                             as_attachment=True,
                             attachment_filename='export.tar.gz')

        if request.method == 'POST':
            if 'file' not in request.files:
                abort(400)

            file = request.files['file']

            readmode = 'r:gz'
            if file.filename.endswith('.tar'):
                readmode = 'r'
            if file.filename.endswith('.bz2'):
                readmode = 'r:bz2'

            tempdir = mkdtemp()
            try:
                archive = tarfile.open(fileobj=file.stream, mode=readmode)
                archive.extractall(path=tempdir)
                events = import_challenges(tempdir, upload_folder, move=True)
            except tarfile.TarError:
                print('b')
                abort(400)
            finally:
                shutil.rmtree(tempdir)

            return json.dumps(
                {'log': [{
                    'msg': e.msg,
                    'type': e.type
                } for e in events]})
Exemplo n.º 43
0
def parseMultipart(fp, pdict, memfile_max=1024 * 1000, len_max=0):
    """
    Parse multipart content
    """

    # TODO: Do not store whole parts contents in the memoty

    boundary = ''
    if 'boundary' in pdict:
        boundary = pdict['boundary']
    if not isBoundaryValid(boundary):
        raise ValueError(
            'Invalid boundary in multipart form: {0}'.format(boundary))

    maxlen = 0

    nextpart = b'--' + boundary.encode()
    lastpart = b'--' + boundary.encode() + b'--'
    partdict = {}
    terminator = b''

    while terminator != lastpart:
        nbytes = -1
        data = None
        if terminator:
            # At start of next part.  Read headers first.
            headers = parse_headers(fp, memfile_max)
            clength = headers.get('content-length')
            if clength is not None:
                try:
                    nbytes = int(clength)
                except ValueError:
                    pass
            if nbytes > 0:
                if maxlen and nbytes > len_max:
                    raise ValueError('Maximum content length exceeded')
                data = fp.read(nbytes)
            else:
                data = b''
        # Read lines until end of part.
        part_fp = TemporaryFile(mode='w+b')
        while 1:
            line = fp.readline(memfile_max)

            if line == b'':
                terminator = lastpart  # End outer loop
                break

            if _is_termline(line, nextpart):
                terminator = nextpart
                break

            if _is_termline(line, lastpart):
                terminator = lastpart
                break

            part_fp.write(line)
            while not line.endswith(b"\n"):
                line = fp.readline(memfile_max)

                if line == b'':
                    break

                part_fp.write(line)

        # Done with part.
        if data is None:
            continue
        if nbytes < 0:
            last = pre_last = None

            # Strip final line terminator
            if part_fp.tell() >= 1:
                part_fp.seek(-1, os.SEEK_END)
                last = part_fp.read(1)

            if part_fp.tell() >= 2:
                part_fp.seek(-2, os.SEEK_END)
                pre_last = part_fp.read(1)

            trunc = 0
            if pre_last == b"\r" and last == b"\n":
                trunc = 2
            elif last == b"\n":
                trunk = 1

            if trunc > 0:
                part_fp.seek(-trunc, os.SEEK_END)
                part_fp.truncate()

        line = headers['content-disposition']
        if not line:
            continue
        key, params = parse_header(line)
        if key != 'form-data':
            continue
        if 'name' in params:
            name = params['name']
        else:
            continue

        part_fp.seek(0, os.SEEK_SET)

        part = {'fp': part_fp}
        if 'filename' in params:
            part['filename'] = params['filename']

        if name in partdict:
            partdict[name].append(part)
        else:
            partdict[name] = [part]

    return partdict
Exemplo n.º 44
0
    with app.app_context():
        args = process_args(args)
        from CTFd.models import db

        app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
        db.init_app(app)

        app.db = db

        out_stream.write(
            export_challenges(args.out_file, args.dst_attachments,
                              args.src_attachments, args.visible_only,
                              args.remove_flags, tarfile))

    if args.tar:
        print("Tarballing exported files")
        tarinfo = TarInfo(args.out_file)
        tarinfo.size = out_stream.tell()
        out_stream.seek(0)
        tarfile.addfile(tarinfo, out_stream)
        tarfile.close()

        if args.gz:
            print("Compressing tarball with gzip")
            with gzip.open('export.tar.gz', 'wb') as gz:
                tempfile.seek(0)
                shutil.copyfileobj(tempfile, gz)

    out_stream.close()
Exemplo n.º 45
0
    def index_html(self,
                   icon=0,
                   preview=0,
                   width=None,
                   height=None,
                   REQUEST=None):
        """ Return the file with it's corresponding MIME-type """

        if REQUEST is not None:
            if self._if_modified_since_request_handler(REQUEST):
                self.ZCacheable_set(None)
                return ''

            if self._redirect_default_view_request_handler(
                    icon, preview, REQUEST):
                return ''

        filename, content_type, icon, preview = self._get_file_to_serve(
            icon, preview)
        filename = self._get_fsname(filename)

        if _debug > 1:
            logger.info('serving %s, %s, %s, %s' %
                        (filename, content_type, icon, preview))

        if filename:
            size = os.stat(filename)[6]
        else:
            filename = self._get_icon_file(broken=True)
            size = os.stat(filename)[6]
            content_type = 'image/gif'
            icon = 1

        if icon == 0 and width is not None and height is not None:
            data = TemporaryFile()  # hold resized image
            try:
                from PIL import Image
                im = Image.open(filename)
                if im.mode != 'RGB':
                    im = im.convert('RGB')
                filter = Image.BICUBIC
                if hasattr(Image, 'ANTIALIAS'):  # PIL 1.1.3
                    filter = Image.ANTIALIAS
                im = im.resize((int(width), int(height)), filter)
                im.save(data, 'JPEG', quality=85)
            except:
                data = open(filename, 'rb')
            else:
                data.seek(0, 2)
                size = data.tell()
                data.seek(0)
                content_type = 'image/jpeg'
        else:
            data = open(filename, 'rb')

        if REQUEST is not None:
            last_mod = rfc1123_date(self._p_mtime)
            REQUEST.RESPONSE.setHeader('Last-Modified', last_mod)
            REQUEST.RESPONSE.setHeader('Content-Type', content_type)
            REQUEST.RESPONSE.setHeader('Content-Length', size)
            self.ZCacheable_set(None)
            return stream_iterator(data)

        try:
            return data.read()
        finally:
            data.close()
Exemplo n.º 46
0
class TestZipSubFile(unittest.TestCase):
    """ Tests ZipSubFile """

    def setUp(self):
        self.zipper = ZipFile(ZIP_TEMP_FILE)
        self.subfile = ZipSubFile(self.zipper, FILE_NAME)
        self.subfile.open()

        # create a file in memory for comparison
        self.compare = TemporaryFile(prefix='oletools-test-ZipSubFile-',
                                     suffix='.bin')
        self.compare.write(FILE_CONTENTS)
        self.compare.seek(0)   # re-position to start

        self.assertEqual(self.subfile.tell(), 0)
        self.assertEqual(self.compare.tell(), 0)
        if DEBUG:
            print('created comparison file {0!r} in memory'
                  .format(self.compare.name))

    def tearDown(self):
        self.compare.close()
        self.subfile.close()
        self.zipper.close()
        if DEBUG:
            print('\nall files closed')

    def test_read(self):
        """ test reading """
        # read from start
        self.assertEqual(self.subfile.read(4), self.compare.read(4))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # read a bit more
        self.assertEqual(self.subfile.read(4), self.compare.read(4))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # create difference
        self.subfile.read(1)
        self.assertNotEqual(self.subfile.read(4), self.compare.read(4))
        self.compare.read(1)
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # read all the rest
        self.assertEqual(self.subfile.read(), self.compare.read())
        self.assertEqual(self.subfile.tell(), self.compare.tell())

    def test_seek_forward(self):
        """ test seeking forward """
        self.subfile.seek(10)
        self.compare.seek(10)
        self.assertEqual(self.subfile.read(1), self.compare.read(1))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek 2 forward
        self.subfile.seek(2, os.SEEK_CUR)
        self.compare.seek(2, os.SEEK_CUR)
        self.assertEqual(self.subfile.read(1), self.compare.read(1))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek backward (only implemented case: back to start)
        self.subfile.seek(-self.subfile.tell(), os.SEEK_CUR)
        self.compare.seek(-self.compare.tell(), os.SEEK_CUR)
        self.assertEqual(self.subfile.read(1), self.compare.read(1))
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek to end
        self.subfile.seek(0, os.SEEK_END)
        self.compare.seek(0, os.SEEK_END)
        self.assertEqual(self.subfile.tell(), self.compare.tell())

        # seek back to start
        self.subfile.seek(0)
        self.compare.seek(0)
        self.assertEqual(self.subfile.tell(), self.compare.tell())
        self.assertEqual(self.subfile.tell(), 0)

    def test_check_size(self):
        """ test usual size check: seek to end, tell, seek to start """
        # seek to end
        self.subfile.seek(0, os.SEEK_END)
        self.assertEqual(self.subfile.tell(), len(FILE_CONTENTS))

        # seek back to start
        self.subfile.seek(0)

        # read first few bytes
        self.assertEqual(self.subfile.read(10), FILE_CONTENTS[:10])

    def test_error_read(self):
        """ test correct behaviour if read beyond end (no exception) """
        self.subfile.seek(0, os.SEEK_END)
        self.compare.seek(0, os.SEEK_END)

        self.assertEquals(self.compare.read(10), self.subfile.read(10))
        self.assertEquals(self.compare.tell(), self.subfile.tell())

        self.subfile.seek(0)
        self.compare.seek(0)
        self.subfile.seek(len(FILE_CONTENTS) - 1)
        self.compare.seek(len(FILE_CONTENTS) - 1)
        self.assertEquals(self.compare.read(10), self.subfile.read(10))
        self.assertEquals(self.compare.tell(), self.subfile.tell())

    def test_error_seek(self):
        """ test correct behaviour if seek beyond end (no exception) """
        self.subfile.seek(len(FILE_CONTENTS) + 10)
        self.compare.seek(len(FILE_CONTENTS) + 10)
Exemplo n.º 47
0
class S3File(io.IOBase):
    """File like proxy for s3 files, manages upload and download of locally managed temporary file
    """

    def __init__(self, bucket, key, mode='w+b', *args, **kwargs):
        super(S3File, self).__init__(*args, **kwargs)
        self.bucket = bucket
        self.key = key
        self.mode = mode
        self.path = self.bucket + '/' + self.key

        # converts mode to readable/writable to enable the temporary file to have S3 data
        # read or written to it even if the S3File is read/write/append
        # i.e. "r" => "r+", "ab" => "a+b"
        updatable_mode = re.sub(r'^([rwa]+)(b?)$', r'\1+\2', mode)
        self._tempfile = TemporaryFile(updatable_mode)

        try:
            with s3errors(self.path):
                if 'a' in mode:
                    # File is in an appending mode, start with the content in file
                    s3.Object(bucket, key).download_fileobj(self._tempfile)
                    self.seek(0, os.SEEK_END)
                elif 'a' not in mode and 'w' not in mode and 'x' not in mode:
                    # file is not in a create mode, so it is in read mode
                    # start with the content in the file, and seek to the beginning
                    s3.Object(bucket, key).download_fileobj(self._tempfile)
                    self.seek(0, os.SEEK_SET)
        except Exception:
            self.close()
            raise

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        try:
            if self.writable():
                self.seek(0)
                with s3errors(self.path):
                    s3.Object(self.bucket, self.key).upload_fileobj(self._tempfile)
        finally:
            self._tempfile.close()

    @property
    def closed(self):
        return self._tempfile.closed

    def fileno(self):
        return self._tempfile.fileno()

    def flush(self):
        return self._tempfile.flush()

    def isatty(self):
        return self._tempfile.isatty()

    def readable(self):
        return 'r' in self.mode or '+' in self.mode

    def read(self, n=-1):
        if not self.readable():
            raise IOError('not open for reading')
        return self._tempfile.read(n)

    def readinto(self, b):
        return self._tempfile.readinto(b)

    def readline(self, limit=-1):
        if not self.readable():
            raise IOError('not open for reading')
        return self._tempfile.readline(limit)

    def readlines(self, hint=-1):
        if not self.readable():
            raise IOError('not open for reading')
        return self._tempfile.readlines(hint)

    def seek(self, offset, whence=os.SEEK_SET):
        self._tempfile.seek(offset, whence)
        return self.tell()

    def seekable(self):
        return True

    def tell(self):
        return self._tempfile.tell()

    def writable(self):
        return 'w' in self.mode or 'a' in self.mode or '+' in self.mode or 'x' in self.mode

    def write(self, b):
        if not self.writable():
            raise IOError('not open for writing')
        self._tempfile.write(b)
        return len(b)

    def writelines(self, lines):
        if not self.writable():
            raise IOError('not open for writing')
        return self._tempfile.writelines(lines)

    def truncate(self, size=None):
        if not self.writable():
            raise IOError('not open for writing')

        if size is None:
            size = self.tell()

        self._tempfile.truncate(size)
        return size
Exemplo n.º 48
0
    def transfer_yaml():
        upload_folder = os.path.join(app.root_path, app.config['UPLOAD_FOLDER'])
        if request.method == 'GET':
            tarfile_backend = TemporaryFile(mode='wb+')
            yamlfile = TemporaryFile(mode='wb+')
            tarball = tarfile.open(fileobj=tarfile_backend, mode='w')
            print(upload_folder)

            yamlfile.write(export_challenges('export.yaml', 'export.d', upload_folder, tarball))

            tarinfo = tarfile.TarInfo('export.yaml')
            tarinfo.size = yamlfile.tell()
            yamlfile.seek(0)
            tarball.addfile(tarinfo, yamlfile)
            tarball.close()
            yamlfile.close()


            gzipfile_backend = TemporaryFile(mode='wb+')
            gzipfile = GzipFile(fileobj=gzipfile_backend)

            tarfile_backend.seek(0)
            shutil.copyfileobj(tarfile_backend, gzipfile)

            tarfile_backend.close()
            gzipfile.close()
            gzipfile_backend.seek(0)
            return send_file(gzipfile_backend, as_attachment=True, attachment_filename='export.tar.gz')

        if request.method == 'POST':
            if 'file' not in request.files:
                abort(400)

            file = request.files['file']

            readmode = 'r:gz'
            if file.filename.endswith('.tar'):
                readmode = 'r'
            if file.filename.endswith('.bz2'):
                readmode = 'r:bz2'

            tempdir = mkdtemp()
            try:
                archive = tarfile.open(fileobj=file.stream, mode=readmode)

                if 'export.yaml' not in archive.getnames():
                    shutil.rmtree(tempdir)
                    abort(400)

                # Check for atttempts to escape to higher dirs
                for member in archive.getmembers():
                    memberpath = os.path.normpath(member.name)
                    if memberpath.startswith('/') or '..' in memberpath.split('/'):
                        shutil.rmtree(tempdir)
                        abort(400)

                    if member.linkname:
                        linkpath = os.path.normpath(member.linkname)
                        if linkpath.startswith('/') or '..' in linkpath.split('/'):
                            shutil.rmtree(tempdir)
                            abort(400)


                archive.extractall(path=tempdir)

            except tarfile.TarError:
                shutil.rmtree(tempdir)
                print('b')
                abort(400)

            in_file = os.path.join(tempdir, 'export.yaml')
            import_challenges(in_file, upload_folder, move=True)

            shutil.rmtree(tempdir)

            return '1'
Exemplo n.º 49
0
class ExpandableOutput(Reader, Writer, Seeker):
    """
    Write-only output object.

    Will store data in a BytesIO, until more than ``bufsize`` bytes are
    written, at which point it will switch to storing data in a real file
    object.
    """
    def __init__(self, bufsize=16384):
        """
        Initialize an ``ExpandableOutput`` instance.
        """
        self._raw = BytesIO()
        self.bufsize = bufsize
        self.write = self.write_stringio
        self.exceeded_bufsize = False

    def getstorage(self):
        """\
        Return the underlying stream (either a BytesIO or file object)
        """
        return self._raw

    def seek(self, pos, whence=0):
        return self._raw.seek(pos, whence)

    def tell(self):
        return self._raw.tell()

    def read(self, size=-1):
        return self._raw.read(size)

    def readline(self, size=-1):
        return self._raw.read(size)

    def write_stringio(self, data):
        """
        ``write``, optimized for the BytesIO backend.
        """
        if isinstance(self._raw, BytesIO) \
           and self._raw.tell() + len(data) > self.bufsize:
            self.switch_to_file_storage()
            return self.write_file(data)
        return self._raw.write(data)

    def write_file(self, data):
        """
        ``write``, optimized for the TemporaryFile backend
        """
        return self._raw.write(data)

    def switch_to_file_storage(self):
        """
        Switch the storage backend to an instance of ``TemporaryFile``.
        """
        self.exceeded_bufsize = True
        oldio = self._raw
        try:
            self._raw.seek(0)
            self._raw = TemporaryFile()
            copyfileobj(oldio, self._raw)
        finally:
            oldio.close()
        self.write = self.write_file

    def __enter__(self):
        """
        Support for context manager ``__enter__``/``__exit__`` blocks
        """
        return self

    def __exit__(self, type, value, traceback):
        """
        Support for context manager ``__enter__``/``__exit__`` blocks
        """
        self._raw.close()
        # propagate exceptions
        return False
Exemplo n.º 50
0
def result_page(request, result_id):
    result = models.result.objects.get(id=result_id)
    if request.method == 'GET':
        if 'get_dut_output' in request.GET:
            response = HttpResponse(result.dut_output,
                                    content_type='text/plain')
            response['Content-Disposition'] = \
                'attachment; filename="{}_dut_output.txt"'.format(
                    result_id)
            return response
        elif 'get_debugger_output' in request.GET:
            response = HttpResponse(result.debugger_output,
                                    content_type='text/plain')
            response['Content-Disposition'] = \
                'attachment; filename="{}_debugger_output.txt"'.format(
                    result_id)
            return response
        elif 'get_aux_output' in request.GET:
            response = HttpResponse(result.aux_output,
                                    content_type='text/plain')
            response['Content-Disposition'] = \
                'attachment; filename="{}_aux_output.txt"'.format(
                    result_id)
            return response
        elif 'get_output_file' in request.GET:
            response = get_file(result.campaign.output_file, result_id)
            response['Content-Disposition'] = \
                'attachment; filename={}_{}'.format(
                    result_id, result.campaign.output_file)
            return response
        elif 'get_log_file' in request.GET:
            temp_file = TemporaryFile()
            with open_tar(fileobj=temp_file, mode='w:gz') as archive:
                for log_file in result.campaign.log_files:
                    archive.add(
                        'campaign-data/{}/results/{}/{}'.format(
                            result.campaign_id, result.id, log_file),
                        '{}_{}'.format(result.id, log_file))
            response = FileResponse(temp_file,
                                    content_type='application/x-compressed')
            response['Content-Disposition'] = \
                'attachment; filename={}_log_files.tar.gz'.format(result.id)
            response['Content-Length'] = temp_file.tell()
            temp_file.seek(0)
            return response
    campaign_items_ = [(item[0],
                        '/campaign/{}/{}'.format(result.campaign_id,
                                                 item[1]), item[2], item[3])
                       for item in campaign_items]
    if result.campaign.output_file:
        output_file = 'campaign-data/{}/results/{}/{}'.format(
            result.campaign_id, result_id, result.campaign.output_file)
        output_file = \
            exists(output_file) and guess_type(output_file)[0] is not None
    else:
        output_file = False
    result_table = tables.result(models.result.objects.filter(id=result_id))
    events = result.event_set.all()
    event_table = tables.event(events)
    if request.method == 'POST' and 'launch' in request.POST:
        Popen([
            argv[0], '--campaign_id',
            str(result.campaign_id), 'regenerate', result_id
        ])
    if request.method == 'POST' and 'save' in request.POST:
        result.outcome = request.POST['outcome']
        result.outcome_category = request.POST['outcome_category']
        result.save()
    elif request.method == 'POST' and 'delete' in request.POST:
        if exists('campaign-data/{}/results/{}'.format(result.campaign_id,
                                                       result.id)):
            rmtree('campaign-data/{}/results/{}'.format(
                result.campaign_id, result.id))
        result.delete()
        return HttpResponse('Result deleted')
    injections = result.injection_set.all()
    if result.campaign.simics:
        if injections.count():
            injection_table = tables.injection(injections)
        else:
            injection_table = None
        register_diffs = result.simics_register_diff_set.all()
        register_filter = filters.simics_register_diff(request.GET,
                                                       queryset=register_diffs)
        register_diff_count = register_filter.qs.count()
        register_table = tables.simics_register_diff(register_filter.qs)
        RequestConfig(request, paginate={
            'per_page': table_length
        }).configure(register_table)
        memory_diffs = result.simics_memory_diff_set.all()
        memory_diff_count = memory_diffs.count()
        memory_table = tables.simics_memory_diff(memory_diffs)
        RequestConfig(request, paginate={
            'per_page': table_length
        }).configure(memory_table)
    else:
        register_filter = None
        memory_diff_count = 0
        memory_table = None
        register_diff_count = 0
        register_table = None
        if injections.count():
            injection_table = tables.injection(injections)
        else:
            injection_table = None
    RequestConfig(request, paginate=False).configure(result_table)
    RequestConfig(request, paginate=False).configure(event_table)
    if injection_table:
        RequestConfig(request, paginate=False).configure(injection_table)
    return render(
        request, 'result.html', {
            'campaign_items': campaign_items_,
            'event_count': '{:,}'.format(events.count()),
            'event_table': event_table,
            'filter': register_filter,
            'injection_table': injection_table,
            'memory_diff_count': '{:,}'.format(memory_diff_count),
            'memory_table': memory_table,
            'navigation_items': navigation_items,
            'output_file': output_file,
            'register_diff_count': '{:,}'.format(register_diff_count),
            'register_table': register_table,
            'result': result,
            'result_table': result_table
        })
class ZipNumClusterJob(MRJob):
    HADOOP_INPUT_FORMAT = 'org.apache.hadoop.mapred.lib.CombineTextInputFormat'

    PARTITIONER = 'org.apache.hadoop.mapred.lib.TotalOrderPartitioner'

    INPUT_PROTOCOL = RawValueProtocol
    OUTPUT_PROTOCOL = RawValueProtocol
    INTERNAL_PROTOCOL = RawProtocol

    JOBCONF =  {'mapreduce.task.timeout': '9600000',
                'mapreduce.input.fileinputformat.split.maxsize': '50000000',
                'mapreduce.map.speculative': 'false',
                'mapreduce.reduce.speculative': 'false',
                'mapreduce.output.fileoutputformat.compress': 'false',
                'mapreduce.job.reduce.slowstart.completedmaps': '1.0',
                'mapreduce.job.jvm.numtasks': '-1'
               }

    def configure_args(self):
        """Custom command line options for indexing"""
        super(ZipNumClusterJob, self).configure_args()

        self.add_passthru_arg('--numlines', dest='numlines',
                              type=int,
                              default=3000,
                              help='Number of lines per gzipped block')

        self.add_passthru_arg('--splitfile', dest='splitfile',
                              help='Split file to use for CDX shard split')

        self.add_passthru_arg('--convert', dest='convert',
                              action='store_true',
                              default=False,
                              help='Convert CDX through _convert_line() function')

        self.add_passthru_arg('--shards', dest='shards',
                              type=int,
                              help='Num ZipNum Shards to create, ' +
                                   '= num of entries in splits + 1' +
                                   '= num of reducers used')

        self.add_passthru_arg('--s3-upload-acl', dest='s3acl',
                              help='S3 access permissions (ACL) to be applied to CDX files')

    def jobconf(self):
        orig_jobconf = super(ZipNumClusterJob, self).jobconf()
        custom_jobconf = {'mapreduce.job.reduces': self.options.shards,
                          'mapreduce.totalorderpartitioner.path': self.options.splitfile}

        combined = combine_dicts(orig_jobconf, custom_jobconf)
        return combined

    def mapper_init(self):
        pass

    def mapper(self, _, line):
        line = line.split('\t')[-1]
        if not line.startswith(' CDX'):
            if self.options.convert:
                line = self._convert_line(line)
            yield line, ''

    def _convert_line(self, line):
        key, ts, url, length, offset, warc = line.split(' ')
        key = key.replace(')', ',)', 1)

        vals = {'o': offset, 's': length, 'w': warc, 'u': url}

        return key + ' ' + ts + ' ' + json.dumps(vals)

    def _get_prop(self, proplist):
        for p in proplist:
            res = os.environ.get(p)
            if res:
                return res

    def reducer_init(self):
        self.curr_lines = []
        self.curr_key = ''

        self.part_num = self._get_prop(['mapreduce_task_partition', 'mapred_task_partition'])
        assert(self.part_num)

        self.part_name = 'cdx-%05d.gz' % int(self.part_num)

        self.output_dir = self._get_prop(['mapreduce_output_fileoutputformat_outputdir',
                                          'mapred.output.dir',
                                          'mapred_work_output_dir'])

        assert(self.output_dir)
        self.gzip_temp = TemporaryFile(mode='w+b')

    def reducer(self, key, values):
        if key:
            self.curr_lines.append(key)

        for x in values:
            if x:
                self.curr_lines.append(x)

        if len(self.curr_lines) == 1:
            self.curr_key = ' '.join(key.split(' ', 2)[0:2])

        if len(self.curr_lines) >= self.options.numlines:
            yield '', self._write_part()

    def reducer_final(self):
        if len(self.curr_lines) > 0:
            yield '', self._write_part()

        self._do_upload()

    def _do_upload(self):
        self.gzip_temp.flush()
        self.gzip_temp.seek(0)
        #TODO: move to generalized put() function
        if self.output_dir.startswith('s3://') or self.output_dir.startswith('s3a://'):
            import boto3
            import botocore
            boto_config = botocore.client.Config(
                read_timeout=180,
                retries={'max_attempts' : 20})
            s3client = boto3.client('s3', config=boto_config)
            s3args = None
            if self.options.s3acl:
                s3args = {'ACL': self.options.s3acl}

            parts = urlparse.urlsplit(self.output_dir)
            s3key = parts.path.strip('/') + '/' + self.part_name
            s3url = parts.scheme + '://' + parts.netloc + '/' + s3key

            LOG.info('Uploading index to ' + s3url)
            try:
                s3client.upload_fileobj(self.gzip_temp, parts.netloc, s3key,
                                        ExtraArgs=s3args)
            except botocore.client.ClientError as exception:
                LOG.error('Failed to upload {}: {}'.format(s3url, exception))
                return
            LOG.info('Successfully uploaded index file: ' + s3url)
        else:
            path = os.path.join(self.output_dir, self.part_name)

            with open(path, 'w+b') as target:
                shutil.copyfileobj(self.gzip_temp, target)

        self.gzip_temp.close()

    def _write_part(self):
        z = zlib.compressobj(6, zlib.DEFLATED, zlib.MAX_WBITS + 16)

        offset = self.gzip_temp.tell()

        buff = '\n'.join(self.curr_lines) + '\n'
        self.curr_lines = []

        buff = z.compress(buff)
        self.gzip_temp.write(buff)

        buff = z.flush()
        self.gzip_temp.write(buff)
        self.gzip_temp.flush()

        length = self.gzip_temp.tell() - offset

        partline = '{0}\t{1}\t{2}\t{3}'.format(self.curr_key, self.part_name, offset, length)

        return partline