Пример #1
0
def main(argv):
    logging.basicConfig(filename=options.log_filename,
                        level=logging.INFO + 10 *
                        (options.quiet - options.verbose))
    log = logging.getLogger(os.path.basename(sys.argv[0]))
    FORMAT = '%(asctime)s|%(levelname)s|%(process)d|%(module)s.py|%(funcName)s|%(lineno)d|  %(message)s'
    if 1:
        handler = logging.StreamHandler(sys.stdout)
        handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter(FORMAT, datefmt="%Y-%m-%d %H:%M:%S")
        handler.setFormatter(formatter)
        #pprint(dir(handler))
        log.addHandler(handler)
    #log.info('test')
    #e()
    file_object_cache = FileObjectCache()
    #key_name, value_kwargs = args
    #value = Value(file_object_cache, content=None, filename=None, md5=None, offset=None, path=None, size=None, bucket_name=None)
    start = time.time()
    if 1:
        put_queue = JoinableQueue(1024 * options.processes)
        stat_queue = JoinableQueue()
        walk = {'filesystem': walk_filesystem}[options.walk]
        args = [
            '/auto/fina-data/share/FARepository/prod/CIGActgS11/position/processing/Priority_2/PositionSide/122654_DESK_CDRG183872PositionSide.bcp.SSrvr'
        ]
        walker_process = Process(target=walker,
                                 args=(walk, put_queue, args, options))
        walker_process.start()

    if 1:
        put = {'update': put_update}[options.put]
        #print put
        #e()
        putter_processes = list(
            islice(
                repeatedly(Process,
                           target=putter,
                           args=(put, put_queue, stat_queue, options)),
                options.processes))
        for putter_process in putter_processes:
            #print putter_process
            putter_process.start()
    walker_process.join()
    if 1:
        statter_process = Process(target=statter,
                                  args=(stat_queue, start, options))
        statter_process.start()

    for putter_process in putter_processes:
        put_queue.put(None)
    put_queue.close()
    for putter_process in putter_processes:
        putter_process.join()

    stat_queue.put(None)
    stat_queue.close()
    statter_process.join()
    put_queue.join_thread()
    stat_queue.join_thread()
Пример #2
0
	def insert_files(self, out,cfg, producer,return_dict, skip_header=0, rec_delim=os.linesep):
		self.opt.skip_header = skip_header
		self.opt.rec_delim = rec_delim
		log = logging.getLogger('cli')
		self.scfg, self.tcfg = cfg
		file_object_cache = FileObjectCache()
		start = time.time()
		
		stat_queue = JoinableQueue()
		
		if 1:
			put_queue = JoinableQueue(1024 * self.opt.processes)
			

			
		if 1:
			put = {'update': self.put_update}[self.opt.put]
			putter_processes = list(islice(repeatedly(Process, target=self.putter, args=(put, put_queue, stat_queue, return_dict)), self.opt.processes))
			for putter_process in putter_processes:
				putter_process.start()
		if 1:
			statter_process = Process(target=self.statter, args=(stat_queue, start))
			statter_process.start()
			
		out_names=[]
		#walk = {'filesystem': self.walk_filesystem}[self.opt.walk]
		for file in producer[0](*producer[1]):
			out_names.append(file)
			put_queue.put(file)	
			#time.sleep(3)
		out.dump_files=out_names

		for putter_process in putter_processes:
			put_queue.put(None)
		put_queue.close()
		for putter_process in putter_processes:
			putter_process.join()
			
		stat_queue.put(None)
		stat_queue.close()
		statter_process.join()
		put_queue.join_thread()
		stat_queue.join_thread()
		print 77777, counter.value()
		print 77777, self.total_ins
		print 7777, (return_dict.values())
Пример #3
0
	def insert_files(self, file_names, out,cfg, skip_header=0, rec_delim=os.linesep):
		self.opt.skip_header = skip_header
		self.opt.rec_delim = rec_delim
		log = logging.getLogger('cli')
		self.scfg, self.tcfg = cfg
		file_object_cache = FileObjectCache()
		start = time.time()
		if 1:
			put_queue = JoinableQueue(1024 * self.opt.processes)
			stat_queue = JoinableQueue()
			#walk = {'filesystem': self.walk_filesystem}[self.opt.walk]
			for file in file_names.file_names:
				put_queue.put(file)	
			
		if 1:
			put = {'update': self.put_update}[self.opt.put]
			putter_processes = list(islice(repeatedly(Process, target=self.putter, args=(put, put_queue, stat_queue)), self.opt.processes))
			for putter_process in putter_processes:
				putter_process.start()
		if 1:
			statter_process = Process(target=self.statter, args=(stat_queue, start))
			statter_process.start()
		
		for putter_process in putter_processes:
			put_queue.put(None)
		put_queue.close()
		for putter_process in putter_processes:
			putter_process.join()
			
		stat_queue.put(None)
		stat_queue.close()
		statter_process.join()
		put_queue.join_thread()
		stat_queue.join_thread()
	
		#print(3334,file_names.file_names )
		#e()
		out.file_names = ['%s.gz' % os.path.basename(x[0]) for x in file_names.file_names]
		#pp(out.file_names)
		#e()
		out.file_keys = ['%s.gz' % x[0] for x in file_names.file_names]
		out.file_location = os.path.dirname(file_names.file_names[0][0])
Пример #4
0
def parallel_for(a, cls, args=[], kwargs={}, num_processes=None):
    from multiprocessing import Process, JoinableQueue, cpu_count, Pipe
    if num_processes is None:
        num_processes = cpu_count()
    # Note that JoinableQueue uses an integer for tracking locations in the queue.
    # Because it's using shared memory it's not terribly flexible and gives annoyingly
    # unclear errors if you go over the limit. We'd like the queue to be as large as
    # possible so that we can avoid contention, but without allocating a max possible
    # size queue unless we need it, thus the calculation below. 32767 is a hard limit.
    q = JoinableQueue(maxsize=min(len(a)+num_processes, 2**15 - 1))

    output_pipes = [Pipe(duplex=False) for _ in range(num_processes)]
    send_pipes = [p for _, p in output_pipes]
    recv_pipes = [p for p, _ in output_pipes]
    pool = [Process(target=_parallel_for, args=(q, cls, pipe) + tuple(args), kwargs=kwargs)
            for pipe in send_pipes]
    output_watcher = MultiPipeWatcher(recv_pipes)
    try:
        for p in pool:
            p.start()
        output_watcher.start()
        for x in a:
            q.put(x)
        for _ in range(num_processes):
            q.put(None) # End markers
        q.close()
        q.join_thread()
        q.join()
        for p in pool:
            p.join()
        output_watcher.flush()
        output_watcher.join()
        combined_output = output_watcher.merged
        return combined_output
    except KeyboardInterrupt:
        print "Interrupted -- terminating worker processes"
        for p in pool:
            p.terminate()
        for p in pool:
            p.join()
        raise
Пример #5
0
def __run_chm_test_procs(mems, model, regions, ntasks, nthreads):
    """Starts ntasks processes running __run_chm_test_proc then calls __run_chm_test_parallel."""
    from multiprocessing import JoinableQueue, Process
    from time import sleep
    print("Running CHM test with %d task%s and %d thread%s per task" %
          (ntasks, 's' if ntasks > 1 else '', nthreads,
           's' if nthreads > 1 else ''))
    nthreads_full = ntasks * nthreads

    # Start the child processes
    q = JoinableQueue()
    args = (mems, model, nthreads, q)
    processes = [
        Process(target=__run_chm_test_proc, name="CHM-test-%d" % p, args=args)
        for p in xrange(ntasks)
    ]
    for p in processes:
        p.daemon = True
        p.start()
    sleep(0)

    # Run the CHM-test in parallel
    try:
        out = __run_chm_test_parallel(mems, model, regions, q, processes,
                                      nthreads_full)
    except:
        __clear_queue(q)
        __kill_processes(processes)
        raise

    # Tell all processes we are done and make sure they all actually terminate
    for _ in xrange(ntasks):
        q.put_nowait(None)
    q.close()
    q.join()
    q.join_thread()
    for p in processes:
        p.join()

    # Done! Return the output image
    return out
Пример #6
0
def parallel_for(a, cls, args=[], kwargs={}, num_processes=None):
    from multiprocessing import Process, JoinableQueue, cpu_count, Pipe
    if num_processes is None:
        num_processes = cpu_count()
    # Note that JoinableQueue uses an integer for tracking locations in the queue.
    # Because it's using shared memory it's not terribly flexible and gives annoyingly
    # unclear errors if you go over the limit. We'd like the queue to be as large as
    # possible so that we can avoid contention, but without allocating a max possible
    # size queue unless we need it, thus the calculation below. 32767 is a hard limit.
    q = JoinableQueue(maxsize=min(len(a)+num_processes, 2**15 - 1))

    output_pipes = [Pipe(duplex=False) for _ in range(num_processes)]
    send_pipes = [p for _, p in output_pipes]
    recv_pipes = [p for p, _ in output_pipes]
    pool = [Process(target=_parallel_for, args=(q, cls, pipe) + tuple(args), kwargs=kwargs)
            for pipe in send_pipes]
    output_watcher = MultiPipeWatcher(recv_pipes)
    try:
        for p in pool:
            p.start()
        output_watcher.start()
        for x in a:
            q.put(x)
        for _ in range(num_processes):
            q.put(None) # End markers
        q.close()
        q.join_thread()
        q.join()
        for p in pool:
            p.join()
        output_watcher.flush()
        output_watcher.join()
        combined_output = output_watcher.merged
        return combined_output
    except KeyboardInterrupt:
        print("Interrupted -- terminating worker processes")
        for p in pool:
            p.terminate()
        for p in pool:
            p.join()
        raise
Пример #7
0
def parexec(signal, out, num_consumers, iterator):
    t = time.time()
    tasks = JoinableQueue()
    results = Queue()
    print 'starting consumers'
    consumers = [Consumer(tasks, results, [signal]) for _ in range(num_consumers)]
    for w in consumers:
        w.start()
    print 'adding tasks'
    for i in iterator:
        tasks.put(Task(i, signal))
    for i in range(num_consumers):
        tasks.put(None)
    print 'collecting'
    for _ in range(len(iterator)):
        out.append(results.get())
        if _%100000 == 0:
            print _
    tasks.close()
    tasks.join_thread()
    print 'closing'
    for w in consumers:
        w.join()
    print time.time() - t
Пример #8
0
def main(argv):
    parser = OptionParser()
    group = OptionGroup(parser, 'S3 options')
    group.add_option('--bucket', metavar='BUCKET',
            help='set bucket')
    group.add_option('--insecure', action='store_false', dest='secure',
            help='use insecure connection')
    group.add_option('--secure', action='store_true', default=True, dest='secure',
            help='use secure connection')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Source options')
    group.add_option('--walk', choices=('filesystem', 'tar'), default='filesystem', metavar='MODE',
            help='set walk mode (filesystem or tar)')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Put options')
    group.add_option('--content-type', metavar='CONTENT-TYPE',
            help='set content type')
    group.add_option('--gzip', action='store_true',
            help='gzip values and set content encoding')
    group.add_option('--put', choices=('add', 'stupid', 'update'), default='update', metavar='MODE',
            help='set put mode (add, stupid, or update)')
    group.add_option('--prefix', default='', metavar='PREFIX',
            help='set key prefix')
    group.add_option('--resume', action='append', default=[], metavar='FILENAME',
            help='resume from log file')
    group.add_option('--grant', metavar='GRANT', default=None, choices=CannedACLStrings,
            help='A canned ACL policy to be applied to each file uploaded.\nChoices: %s' %
            ', '.join(CannedACLStrings))
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Logging options')
    group.add_option('--log-filename', metavar='FILENAME',
            help='set log filename')
    group.add_option('--quiet', '-q', action='count', default=0,
            help='less output')
    group.add_option('--verbose', '-v', action='count', default=0,
            help='more output')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Debug and performance tuning options')
    group.add_option('--dry-run', action='store_true',
            help='don\'t write to S3')
    group.add_option('--limit', metavar='N', type=int,
            help='set maximum number of keys to put')
    group.add_option('--processes', default=8, metavar='PROCESSES', type=int,
            help='set number of putter processes')
    parser.add_option_group(group)
    options, args = parser.parse_args(argv[1:])
    logging.basicConfig(filename=options.log_filename, level=logging.INFO + 10 * (options.quiet - options.verbose))
    logger = logging.getLogger(os.path.basename(sys.argv[0]))
    if len(args) < 1:
        logger.error('missing source operand')
        return 1
    if not options.bucket:
        logger.error('missing bucket')
        return 1
    connection = S3Connection(is_secure=options.secure)
    bucket = connection.get_bucket(options.bucket)
    del bucket
    del connection
    start = time.time()
    put_queue = JoinableQueue(1024 * options.processes)
    stat_queue = JoinableQueue()
    walk = {'filesystem': walk_filesystem, 'tar': walk_tar}[options.walk]
    walker_process = Process(target=walker, args=(walk, put_queue, args, options))
    walker_process.start()
    put = {'add': put_add, 'stupid': put_stupid, 'update': put_update}[options.put]
    putter_processes = list(islice(repeatedly(Process, target=putter, args=(put, put_queue, stat_queue, options)), options.processes))
    for putter_process in putter_processes:
        putter_process.start()
    statter_process = Process(target=statter, args=(stat_queue, start, options))
    statter_process.start()
    walker_process.join()
    for putter_process in putter_processes:
        put_queue.put(None)
    put_queue.close()
    for putter_process in putter_processes:
        putter_process.join()
    stat_queue.put(None)
    stat_queue.close()
    statter_process.join()
    put_queue.join_thread()
    stat_queue.join_thread()
Пример #9
0
def main(argv):
    parser = OptionParser()
    group = OptionGroup(parser, 'S3 options')
    group.add_option('--bucket', metavar='BUCKET', help='set bucket')
    group.add_option('--host',
                     default='s3.amazonaws.com',
                     help='set AWS host name')
    group.add_option('--insecure',
                     action='store_false',
                     dest='secure',
                     help='use insecure connection')
    group.add_option('--secure',
                     action='store_true',
                     default=True,
                     dest='secure',
                     help='use secure connection')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Source options')
    group.add_option('--walk',
                     choices=('filesystem', 'tar'),
                     default='filesystem',
                     metavar='MODE',
                     help='set walk mode (filesystem or tar)')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Put options')
    group.add_option(
        '--content-type',
        metavar='CONTENT-TYPE',
        help='set content type, set to "guess" to guess based on file name')
    group.add_option('--gzip',
                     action='store_true',
                     help='gzip values and set content encoding')
    group.add_option('--put',
                     choices=('add', 'stupid', 'update'),
                     default='update',
                     metavar='MODE',
                     help='set put mode (add, stupid, or update)')
    group.add_option('--prefix',
                     default='',
                     metavar='PREFIX',
                     help='set key prefix')
    group.add_option('--resume',
                     action='append',
                     default=[],
                     metavar='FILENAME',
                     help='resume from log file')
    group.add_option(
        '--grant',
        metavar='GRANT',
        default=None,
        choices=CannedACLStrings,
        help=
        'A canned ACL policy to be applied to each file uploaded.\nChoices: %s'
        % ', '.join(CannedACLStrings))
    group.add_option(
        '--header',
        metavar='HEADER:VALUE',
        dest='headers',
        action='append',
        help='extra headers to add to the file, can be specified multiple times'
    )
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Logging options')
    group.add_option('--log-filename',
                     metavar='FILENAME',
                     help='set log filename')
    group.add_option('--quiet',
                     '-q',
                     action='count',
                     default=0,
                     help='less output')
    group.add_option('--verbose',
                     '-v',
                     action='count',
                     default=0,
                     help='more output')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Debug and performance tuning options')
    group.add_option('--dry-run',
                     action='store_true',
                     help='don\'t write to S3')
    group.add_option('--limit',
                     metavar='N',
                     type=int,
                     help='set maximum number of keys to put')
    group.add_option('--processes',
                     default=8,
                     metavar='PROCESSES',
                     type=int,
                     help='set number of putter processes')
    parser.add_option_group(group)
    options, args = parser.parse_args(argv[1:])
    logging.basicConfig(filename=options.log_filename,
                        level=logging.INFO + 10 *
                        (options.quiet - options.verbose))
    logger = logging.getLogger(os.path.basename(sys.argv[0]))
    if len(args) < 1:
        logger.error('missing source operand')
        return 1
    if not options.bucket:
        logger.error('missing bucket')
        return 1
    connection = S3Connection(is_secure=options.secure)
    bucket = connection.get_bucket(options.bucket)
    del bucket
    del connection
    start = time.time()
    put_queue = JoinableQueue(1024 * options.processes)
    stat_queue = JoinableQueue()
    walk = {'filesystem': walk_filesystem, 'tar': walk_tar}[options.walk]
    walker_process = Process(target=walker,
                             args=(walk, put_queue, args, options))
    walker_process.start()
    put = {
        'add': put_add,
        'stupid': put_stupid,
        'update': put_update
    }[options.put]
    putter_processes = list(
        islice(
            repeatedly(Process,
                       target=putter,
                       args=(put, put_queue, stat_queue, options)),
            options.processes))
    for putter_process in putter_processes:
        putter_process.start()

    statter_process = Process(target=statter,
                              args=(stat_queue, start, options))
    statter_process.start()
    walker_process.join()
    for putter_process in putter_processes:
        put_queue.put(None)
    put_queue.close()
    for putter_process in putter_processes:
        putter_process.join()
    stat_queue.put(None)
    stat_queue.close()
    statter_process.join()
    put_queue.join_thread()
    stat_queue.join_thread()
Пример #10
0
def main(argv=None):
    if argv is None:
        argv = sys.argv
    parser = OptionParser()
    group = OptionGroup(parser, 'S3 options')
    group.add_option('--bucket', metavar='BUCKET', help='set bucket')
    group.add_option(
        '--bucket_region',
        default='us-east-1',
        help='set bucket region if not in us-east-1 (default new bucket region)'
    )
    group.add_option('--host',
                     default='s3.amazonaws.com',
                     help='set AWS host name')
    group.add_option('--insecure',
                     action='store_false',
                     dest='secure',
                     help='use insecure connection')
    group.add_option('--secure',
                     action='store_true',
                     default=True,
                     dest='secure',
                     help='use secure connection')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Source options')
    group.add_option('--walk',
                     choices=('filesystem', 'tar', 's3'),
                     default='filesystem',
                     metavar='MODE',
                     help='set walk mode (filesystem or tar)')
    group.add_option('--exclude',
                     action='append',
                     default=[],
                     metavar='PATTERN',
                     help='exclude files matching PATTERN')
    group.add_option('--include',
                     action='append',
                     default=[],
                     metavar='PATTERN',
                     help='don\'t exclude files matching PATTERN')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Put options')
    group.add_option(
        '--content-type',
        default='guess',
        metavar='CONTENT-TYPE',
        help='set content type, set to "guess" to guess based on file name '
        'or "magic" to guess by filename and libmagic.')
    group.add_option('--gzip',
                     action='store_true',
                     help='gzip values and set content encoding')
    group.add_option(
        '--gzip-type',
        action='append',
        default=[],
        help='if --gzip is set, sets what content-type to gzip, defaults '
        'to a list of known text content types, "all" will gzip everything.'
        ' Specify multiple times for multiple content types. '
        '[default: "guess"]')
    group.add_option('--put',
                     choices=('add', 'stupid', 'update', 'copy'),
                     default='update',
                     metavar='MODE',
                     help='set put mode (add, stupid, copy or update)')
    group.add_option('--prefix',
                     default='',
                     metavar='PREFIX',
                     help='set key prefix')
    group.add_option('--resume',
                     action='append',
                     default=[],
                     metavar='FILENAME',
                     help='resume from log file')
    group.add_option(
        '--grant',
        metavar='GRANT',
        default=None,
        choices=CannedACLStrings,
        help=
        'A canned ACL policy to be applied to each file uploaded.\nChoices: %s'
        % ', '.join(CannedACLStrings))
    group.add_option(
        '--header',
        metavar='HEADER:VALUE',
        dest='headers',
        action='append',
        help='extra headers to add to the file, can be specified multiple times'
    )
    group.add_option('--encrypt-key',
                     action='store_true',
                     default=False,
                     dest='encrypt_key',
                     help='use server side encryption')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Logging options')
    group.add_option('--log-filename',
                     metavar='FILENAME',
                     help='set log filename')
    group.add_option('--quiet',
                     '-q',
                     action='count',
                     default=0,
                     help='less output')
    group.add_option('--verbose',
                     '-v',
                     action='count',
                     default=0,
                     help='more output')
    parser.add_option_group(group)
    group = OptionGroup(parser, 'Debug and performance tuning options')
    group.add_option('--dry-run',
                     action='store_true',
                     help='don\'t write to S3')
    group.add_option('--limit',
                     metavar='N',
                     type=int,
                     help='set maximum number of keys to put')
    group.add_option('--processes',
                     default=8,
                     metavar='PROCESSES',
                     type=int,
                     help='set number of putter processes')
    parser.add_option_group(group)
    options, args = parser.parse_args(argv[1:])
    logging.basicConfig(filename=options.log_filename,
                        level=logging.INFO + 10 *
                        (options.quiet - options.verbose))
    logger = logging.getLogger(os.path.basename(sys.argv[0]))
    if len(args) < 1:
        logger.error('missing source operand')
        return 1
    if not options.bucket:
        logger.error('missing bucket')
        return 1
    if not options.bucket_region:
        options.bucket_region = 'us-east-1'
    connection = boto.s3.connect_to_region(
        options.bucket_region,
        aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'),
        aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
        is_secure=True,
        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
    )

    import ssl
    if hasattr(ssl, '_create_unverified_context'):
        ssl._create_default_https_context = ssl._create_unverified_context

    bucket = connection.get_bucket(options.bucket)
    del bucket
    del connection
    start = time.time()
    put_queue = JoinableQueue(1024 * options.processes)
    stat_queue = JoinableQueue()
    walk = {
        'filesystem': walk_filesystem,
        'tar': walk_tar,
        's3': walk_s3
    }[options.walk]
    walker_process = Process(target=walker,
                             args=(walk, put_queue, args, options))
    walker_process.start()
    put = {
        'add': put_add,
        'stupid': put_stupid,
        'update': put_update,
        'copy': put_copy
    }[options.put]
    putter_processes = list(
        islice(
            repeatedly(Process,
                       target=putter,
                       args=(put, put_queue, stat_queue, options)),
            options.processes))
    for putter_process in putter_processes:
        putter_process.start()
    statter_process = Process(target=statter,
                              args=(stat_queue, start, options))
    statter_process.start()
    walker_process.join()
    for putter_process in putter_processes:
        put_queue.put(None)
    put_queue.close()
    for putter_process in putter_processes:
        putter_process.join()
    stat_queue.put(None)
    stat_queue.close()
    statter_process.join()
    put_queue.join_thread()
    stat_queue.join_thread()