def OpenDeduplicateConnection(dedupe_namespace): """Open a connection to the isolate server for Dedupe use. Args: dedupe_namespace: String id for the comparison namespace. Returns: Connection proxy, or None on failure. """ try: with timeout_util.Timeout(DEDUPE_TIMEOUT): return isolateserver.get_storage_api(constants.ISOLATESERVER, dedupe_namespace) except Exception: logging.warning('initializing isolate server connection failed', exc_info=True) return None
def SymbolDeduplicatorNotify(dedupe_namespace, dedupe_queue): """Send a symbol file to the swarming service Notify the swarming service of a successful upload. If the notification fails for any reason, we ignore it. We don't care as it just means we'll upload it again later on, and the symbol server will handle that graciously. This func runs in a different process from the main one, so we cannot share the storage object. Instead, we create our own. This func stays alive for the life of the process, so we only create one here overall. Args: dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols. dedupe_queue: The queue to read SymbolElements from """ if dedupe_queue is None: return sym_file = '' try: with timeout_util.Timeout(DEDUPE_TIMEOUT): storage = isolateserver.get_storage_api(constants.ISOLATESERVER, dedupe_namespace) for symbol_element in iter(dedupe_queue.get, None): if not symbol_element or not symbol_element.symbol_item: continue symbol_item = symbol_element.symbol_item push_state = symbol_element.opaque_push_state sym_file = symbol_item.sym_file if symbol_item.sym_file else '' if push_state is not None: with timeout_util.Timeout(DEDUPE_TIMEOUT): logging.debug('sending %s to dedupe server', sym_file) symbol_item.prepare(SymbolItem.ALGO) storage.push(symbol_item, push_state, symbol_item.content()) logging.debug('sent %s', sym_file) logging.info('dedupe notification finished; exiting') except Exception: logging.warning('posting %s to dedupe server failed', os.path.basename(sym_file), exc_info=True) # Keep draining the queue though so it doesn't fill up. while dedupe_queue.get() is not None: continue
def UploadSymbols(board=None, official=False, server=None, breakpad_dir=None, file_limit=DEFAULT_FILE_LIMIT, sleep=DEFAULT_SLEEP_DELAY, upload_limit=None, sym_paths=None, failed_list=None, root=None, retry=True, dedupe_namespace=None, product_name='ChromeOS'): """Upload all the generated symbols for |board| to the crash server You can use in a few ways: * pass |board| to locate all of its symbols * pass |breakpad_dir| to upload all the symbols in there * pass |sym_paths| to upload specific symbols (or dirs of symbols) Args: board: The board whose symbols we wish to upload official: Use the official symbol server rather than the staging one server: Explicit server to post symbols to breakpad_dir: The full path to the breakpad directory where symbols live file_limit: The max file size of a symbol file before we try to strip it sleep: How long to sleep in between uploads upload_limit: If set, only upload this many symbols (meant for testing) sym_paths: Specific symbol files (or dirs of sym files) to upload, otherwise search |breakpad_dir| failed_list: Write the names of all sym files we did not upload; can be a filename or file-like object. root: The tree to prefix to |breakpad_dir| (if |breakpad_dir| is not set) retry: Whether we should retry failures. dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols. product_name: A string for stats purposes. Usually 'ChromeOS' or 'Android'. Returns: The number of errors that were encountered. """ if server is None: if official: upload_url = OFFICIAL_UPLOAD_URL else: logging.warning('unofficial builds upload to the staging server') upload_url = STAGING_UPLOAD_URL else: upload_url = server if sym_paths: logging.info('uploading specified symbols to %s', upload_url) else: if breakpad_dir is None: if root is None: raise ValueError('breakpad_dir requires root to be set') breakpad_dir = os.path.join( root, cros_generate_breakpad_symbols.FindBreakpadDir(board).lstrip('/')) logging.info('uploading all symbols to %s from %s', upload_url, breakpad_dir) sym_paths = [breakpad_dir] # We use storage_query to ask the server about existing symbols. The # storage_notify_proc process is used to post updates to the server. We # cannot safely share the storage object between threads/processes, but # we also want to minimize creating new ones as each object has to init # new state (like server connections). storage_query = None if dedupe_namespace: dedupe_limit = DEDUPE_LIMIT dedupe_queue = multiprocessing.Queue() try: with timeout_util.Timeout(DEDUPE_TIMEOUT): storage_query = isolateserver.get_storage_api(constants.ISOLATESERVER, dedupe_namespace) except Exception: logging.warning('initializing dedupe server connection failed', exc_info=True) else: dedupe_limit = 1 dedupe_queue = None # Can't use parallel.BackgroundTaskRunner because that'll create multiple # processes and we want only one the whole time (see comment above). storage_notify_proc = multiprocessing.Process( target=SymbolDeduplicatorNotify, args=(dedupe_namespace, dedupe_queue)) bg_errors = multiprocessing.Value('i') watermark_errors = multiprocessing.Value('f') failed_queue = multiprocessing.Queue() uploader = functools.partial( UploadSymbol, upload_url, product_name=product_name, file_limit=file_limit, sleep=sleep, num_errors=bg_errors, watermark_errors=watermark_errors, failed_queue=failed_queue, passed_queue=dedupe_queue) start_time = datetime.datetime.now() Counters = cros_build_lib.Collection( 'Counters', upload_limit=upload_limit, uploaded_count=0, deduped_count=0) counters = Counters() def _Upload(queue, counters, files): if not files: return missing_count = 0 for item in SymbolDeduplicator(storage_query, files): missing_count += 1 if counters.upload_limit == 0: continue queue.put((item,)) counters.uploaded_count += 1 if counters.upload_limit is not None: counters.upload_limit -= 1 counters.deduped_count += (len(files) - missing_count) try: storage_notify_proc.start() with osutils.TempDir(prefix='upload_symbols.') as tempdir: # For the first run, we collect the symbols that failed. If the # overall failure rate was low, we'll retry them on the second run. for retry in (retry, False): # We need to limit ourselves to one upload at a time to avoid the server # kicking in DoS protection. See these bugs for more details: # http://crbug.com/209442 # http://crbug.com/212496 with parallel.BackgroundTaskRunner(uploader, processes=1) as queue: dedupe_list = [] for sym_file in SymbolFinder(tempdir, sym_paths): dedupe_list.append(sym_file) dedupe_len = len(dedupe_list) if dedupe_len < dedupe_limit: if (counters.upload_limit is None or dedupe_len < counters.upload_limit): continue # We check the counter before _Upload so that we don't keep talking # to the dedupe server. Otherwise, we end up sending one symbol at # a time to it and that slows things down a lot. if counters.upload_limit == 0: break _Upload(queue, counters, dedupe_list) dedupe_list = [] _Upload(queue, counters, dedupe_list) # See if we need to retry, and if we haven't failed too many times yet. if not retry or ErrorLimitHit(bg_errors, watermark_errors): break sym_paths = [] failed_queue.put(None) while True: sym_path = failed_queue.get() if sym_path is None: break sym_paths.append(sym_path) if sym_paths: logging.warning('retrying %i symbols', len(sym_paths)) if counters.upload_limit is not None: counters.upload_limit += len(sym_paths) # Decrement the error count in case we recover in the second pass. assert bg_errors.value >= len(sym_paths), \ 'more failed files than errors?' bg_errors.value -= len(sym_paths) else: # No failed symbols, so just return now. break # If the user has requested it, save all the symbol files that we failed to # upload to a listing file. This should help with recovery efforts later. failed_queue.put(None) WriteQueueToFile(failed_list, failed_queue, breakpad_dir) finally: logging.info('finished uploading; joining background process') if dedupe_queue: dedupe_queue.put(None) # The notification might be slow going, so give it some time to finish. # We have to poll here as the process monitor is watching for output and # will kill us if we go silent for too long. wait_minutes = DEDUPE_NOTIFY_TIMEOUT while storage_notify_proc.is_alive() and wait_minutes > 0: if dedupe_queue: qsize = str(dedupe_queue.qsize()) else: qsize = '[None]' logging.info('waiting up to %i minutes for ~%s notifications', wait_minutes, qsize) storage_notify_proc.join(60) wait_minutes -= 1 # The process is taking too long, so kill it and complain. if storage_notify_proc.is_alive(): logging.warning('notification process took too long') logging.PrintBuildbotStepWarnings() # Kill it gracefully first (traceback) before tacking it down harder. pid = storage_notify_proc.pid for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGKILL): logging.warning('sending %s to %i', signals.StrSignal(sig), pid) # The process might have exited between the last check and the # actual kill below, so ignore ESRCH errors. try: os.kill(pid, sig) except OSError as e: if e.errno == errno.ESRCH: break else: raise time.sleep(5) if not storage_notify_proc.is_alive(): break # Drain the queue so we don't hang when we finish. try: logging.warning('draining the notify queue manually') with timeout_util.Timeout(60): try: while dedupe_queue.get_nowait(): pass except Queue.Empty: pass except timeout_util.TimeoutError: logging.warning('draining the notify queue failed; trashing it') dedupe_queue.cancel_join_thread() logging.info('uploaded %i symbols (%i were deduped) which took: %s', counters.uploaded_count, counters.deduped_count, datetime.datetime.now() - start_time) return bg_errors.value
def main(): colorama.init() parser = optparse.OptionParser(description=sys.modules[__name__].__doc__) parser.add_option('-I', '--isolate-server', metavar='URL', default='', help='Isolate server to use') parser.add_option('--namespace', default='temporary%d-gzip' % time.time(), metavar='XX', help='Namespace to use on the server, default: %default') parser.add_option('--threads', type='int', default=16, metavar='N', help='Parallel worker threads to use, default:%default') graph.unit_option(parser, '--items', default=0, help='Number of items to upload') graph.unit_option(parser, '--max-size', default=0, help='Loop until this amount of data was transferred') graph.unit_option(parser, '--mid-size', default=100 * 1024, help='Rough average size of each item, default:%default') parser.add_option('--columns', type='int', default=graph.get_console_width(), metavar='N', help='For histogram display, default:%default') parser.add_option( '--buckets', type='int', default=20, metavar='N', help='Number of buckets for histogram display, default:%default') parser.add_option('--dump', metavar='FOO.JSON', help='Dumps to json file') parser.add_option('--dry-run', action='store_true', help='Do not send anything') parser.add_option('-v', '--verbose', action='store_true', help='Enable logging') options, args = parser.parse_args() logging.basicConfig( level=logging.INFO if options.verbose else logging.FATAL) if args: parser.error('Unsupported args: %s' % args) if bool(options.max_size) == bool(options.items): parser.error( 'Use one of --max-size or --items.\n' ' Use --max-size if you want to run it until NN bytes where ' 'transfered.\n' ' Otherwise use --items to run it for NN items.') if not options.dry_run: options.isolate_server = options.isolate_server.rstrip('/') if not options.isolate_server: parser.error('--isolate-server is required.') print(' - Using %d thread, items=%d, max-size=%d, mid-size=%d' % (options.threads, options.items, options.max_size, options.mid_size)) if options.dry_run: print(' - %sDRY RUN MODE%s' % (colorama.Fore.GREEN, colorama.Fore.RESET)) start = time.time() random_pool = Randomness() print(' - Generated pool after %.1fs' % (time.time() - start)) columns = [('index', 0), ('data', 0), ('size', options.items)] progress = Progress(columns) api = isolateserver.get_storage_api(options.isolate_server, options.namespace) do_item = functools.partial( send_and_receive, random_pool, options.dry_run, isolateserver.is_namespace_with_compression(options.namespace), api, progress) # TODO(maruel): Handle Ctrl-C should: # - Stop adding tasks. # - Stop scheduling tasks in ThreadPool. # - Wait for the remaining ungoing tasks to complete. # - Still print details and write the json file. with threading_utils.ThreadPoolWithProgress(progress, options.threads, options.threads, 0) as pool: if options.items: for _ in xrange(options.items): pool.add_task(0, do_item, gen_size(options.mid_size)) progress.print_update() elif options.max_size: # This one is approximate. total = 0 while True: size = gen_size(options.mid_size) progress.update_item('', size=1) progress.print_update() pool.add_task(0, do_item, size) total += size if total >= options.max_size: break results = sorted(pool.join()) print('') print(' - Took %.1fs.' % (time.time() - start)) print('') print_results(results, options.columns, options.buckets) if options.dump: with open(options.dump, 'w') as f: json.dump(results, f, separators=(',', ':')) return 0
def main(): colorama.init() parser = optparse.OptionParser(description=sys.modules[__name__].__doc__) parser.add_option( '-I', '--isolate-server', metavar='URL', default='', help='Isolate server to use') parser.add_option( '--namespace', default='temporary%d-gzip' % time.time(), metavar='XX', help='Namespace to use on the server, default: %default') parser.add_option( '--threads', type='int', default=16, metavar='N', help='Parallel worker threads to use, default:%default') graph.unit_option( parser, '--items', default=0, help='Number of items to upload') graph.unit_option( parser, '--max-size', default=0, help='Loop until this amount of data was transferred') graph.unit_option( parser, '--mid-size', default=100*1024, help='Rough average size of each item, default:%default') parser.add_option( '--columns', type='int', default=graph.get_console_width(), metavar='N', help='For histogram display, default:%default') parser.add_option( '--buckets', type='int', default=20, metavar='N', help='Number of buckets for histogram display, default:%default') parser.add_option( '--dump', metavar='FOO.JSON', help='Dumps to json file') parser.add_option( '--dry-run', action='store_true', help='Do not send anything') parser.add_option( '-v', '--verbose', action='store_true', help='Enable logging') options, args = parser.parse_args() logging.basicConfig(level=logging.INFO if options.verbose else logging.FATAL) if args: parser.error('Unsupported args: %s' % args) if bool(options.max_size) == bool(options.items): parser.error( 'Use one of --max-size or --items.\n' ' Use --max-size if you want to run it until NN bytes where ' 'transfered.\n' ' Otherwise use --items to run it for NN items.') if not options.dry_run: options.isolate_server = options.isolate_server.rstrip('/') if not options.isolate_server: parser.error('--isolate-server is required.') print( ' - Using %d thread, items=%d, max-size=%d, mid-size=%d' % ( options.threads, options.items, options.max_size, options.mid_size)) if options.dry_run: print(' - %sDRY RUN MODE%s' % (colorama.Fore.GREEN, colorama.Fore.RESET)) start = time.time() random_pool = Randomness() print(' - Generated pool after %.1fs' % (time.time() - start)) columns = [('index', 0), ('data', 0), ('size', options.items)] progress = Progress(columns) api = isolateserver.get_storage_api(options.isolate_server, options.namespace) do_item = functools.partial( send_and_receive, random_pool, options.dry_run, isolateserver.is_namespace_with_compression(options.namespace), api, progress) # TODO(maruel): Handle Ctrl-C should: # - Stop adding tasks. # - Stop scheduling tasks in ThreadPool. # - Wait for the remaining ungoing tasks to complete. # - Still print details and write the json file. with threading_utils.ThreadPoolWithProgress( progress, options.threads, options.threads, 0) as pool: if options.items: for _ in xrange(options.items): pool.add_task(0, do_item, gen_size(options.mid_size)) progress.print_update() elif options.max_size: # This one is approximate. total = 0 while True: size = gen_size(options.mid_size) progress.update_item('', size=1) progress.print_update() pool.add_task(0, do_item, size) total += size if total >= options.max_size: break results = sorted(pool.join()) print('') print(' - Took %.1fs.' % (time.time() - start)) print('') print_results(results, options.columns, options.buckets) if options.dump: with open(options.dump, 'w') as f: json.dump(results, f, separators=(',',':')) return 0
def UploadSymbols(board=None, official=False, server=None, breakpad_dir=None, file_limit=DEFAULT_FILE_LIMIT, sleep=DEFAULT_SLEEP_DELAY, upload_limit=None, sym_paths=None, failed_list=None, root=None, retry=True, dedupe_namespace=None, product_name='ChromeOS'): """Upload all the generated symbols for |board| to the crash server You can use in a few ways: * pass |board| to locate all of its symbols * pass |breakpad_dir| to upload all the symbols in there * pass |sym_paths| to upload specific symbols (or dirs of symbols) Args: board: The board whose symbols we wish to upload official: Use the official symbol server rather than the staging one server: Explicit server to post symbols to breakpad_dir: The full path to the breakpad directory where symbols live file_limit: The max file size of a symbol file before we try to strip it sleep: How long to sleep in between uploads upload_limit: If set, only upload this many symbols (meant for testing) sym_paths: Specific symbol files (or dirs of sym files) to upload, otherwise search |breakpad_dir| failed_list: Write the names of all sym files we did not upload; can be a filename or file-like object. root: The tree to prefix to |breakpad_dir| (if |breakpad_dir| is not set) retry: Whether we should retry failures. dedupe_namespace: The isolateserver namespace to dedupe uploaded symbols. product_name: A string for stats purposes. Usually 'ChromeOS' or 'Android'. Returns: The number of errors that were encountered. """ if server is None: if official: upload_url = OFFICIAL_UPLOAD_URL else: logging.warning('unofficial builds upload to the staging server') upload_url = STAGING_UPLOAD_URL else: upload_url = server if sym_paths: logging.info('uploading specified symbols to %s', upload_url) else: if breakpad_dir is None: if root is None: raise ValueError('breakpad_dir requires root to be set') breakpad_dir = os.path.join( root, cros_generate_breakpad_symbols.FindBreakpadDir(board).lstrip( '/')) logging.info('uploading all symbols to %s from %s', upload_url, breakpad_dir) sym_paths = [breakpad_dir] # We use storage_query to ask the server about existing symbols. The # storage_notify_proc process is used to post updates to the server. We # cannot safely share the storage object between threads/processes, but # we also want to minimize creating new ones as each object has to init # new state (like server connections). storage_query = None if dedupe_namespace: dedupe_limit = DEDUPE_LIMIT dedupe_queue = multiprocessing.Queue() try: with timeout_util.Timeout(DEDUPE_TIMEOUT): storage_query = isolateserver.get_storage_api( constants.ISOLATESERVER, dedupe_namespace) except Exception: logging.warning('initializing dedupe server connection failed', exc_info=True) else: dedupe_limit = 1 dedupe_queue = None # Can't use parallel.BackgroundTaskRunner because that'll create multiple # processes and we want only one the whole time (see comment above). storage_notify_proc = multiprocessing.Process( target=SymbolDeduplicatorNotify, args=(dedupe_namespace, dedupe_queue)) bg_errors = multiprocessing.Value('i') watermark_errors = multiprocessing.Value('f') failed_queue = multiprocessing.Queue() uploader = functools.partial(UploadSymbol, upload_url, product_name=product_name, file_limit=file_limit, sleep=sleep, num_errors=bg_errors, watermark_errors=watermark_errors, failed_queue=failed_queue, passed_queue=dedupe_queue) start_time = datetime.datetime.now() Counters = cros_build_lib.Collection('Counters', upload_limit=upload_limit, uploaded_count=0, deduped_count=0) counters = Counters() def _Upload(queue, counters, files): if not files: return missing_count = 0 for item in SymbolDeduplicator(storage_query, files): missing_count += 1 if counters.upload_limit == 0: continue queue.put((item, )) counters.uploaded_count += 1 if counters.upload_limit is not None: counters.upload_limit -= 1 counters.deduped_count += (len(files) - missing_count) try: storage_notify_proc.start() with osutils.TempDir(prefix='upload_symbols.') as tempdir: # For the first run, we collect the symbols that failed. If the # overall failure rate was low, we'll retry them on the second run. for retry in (retry, False): # We need to limit ourselves to one upload at a time to avoid the server # kicking in DoS protection. See these bugs for more details: # http://crbug.com/209442 # http://crbug.com/212496 with parallel.BackgroundTaskRunner(uploader, processes=1) as queue: dedupe_list = [] for sym_file in SymbolFinder(tempdir, sym_paths): dedupe_list.append(sym_file) dedupe_len = len(dedupe_list) if dedupe_len < dedupe_limit: if (counters.upload_limit is None or dedupe_len < counters.upload_limit): continue # We check the counter before _Upload so that we don't keep talking # to the dedupe server. Otherwise, we end up sending one symbol at # a time to it and that slows things down a lot. if counters.upload_limit == 0: break _Upload(queue, counters, dedupe_list) dedupe_list = [] _Upload(queue, counters, dedupe_list) # See if we need to retry, and if we haven't failed too many times yet. if not retry or ErrorLimitHit(bg_errors, watermark_errors): break sym_paths = [] failed_queue.put(None) while True: sym_path = failed_queue.get() if sym_path is None: break sym_paths.append(sym_path) if sym_paths: logging.warning('retrying %i symbols', len(sym_paths)) if counters.upload_limit is not None: counters.upload_limit += len(sym_paths) # Decrement the error count in case we recover in the second pass. assert bg_errors.value >= len(sym_paths), \ 'more failed files than errors?' bg_errors.value -= len(sym_paths) else: # No failed symbols, so just return now. break # If the user has requested it, save all the symbol files that we failed to # upload to a listing file. This should help with recovery efforts later. failed_queue.put(None) WriteQueueToFile(failed_list, failed_queue, breakpad_dir) finally: logging.info('finished uploading; joining background process') if dedupe_queue: dedupe_queue.put(None) # The notification might be slow going, so give it some time to finish. # We have to poll here as the process monitor is watching for output and # will kill us if we go silent for too long. wait_minutes = DEDUPE_NOTIFY_TIMEOUT while storage_notify_proc.is_alive() and wait_minutes > 0: if dedupe_queue: qsize = str(dedupe_queue.qsize()) else: qsize = '[None]' logging.info('waiting up to %i minutes for ~%s notifications', wait_minutes, qsize) storage_notify_proc.join(60) wait_minutes -= 1 # The process is taking too long, so kill it and complain. if storage_notify_proc.is_alive(): logging.warning('notification process took too long') logging.PrintBuildbotStepWarnings() # Kill it gracefully first (traceback) before tacking it down harder. pid = storage_notify_proc.pid for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGKILL): logging.warning('sending %s to %i', signals.StrSignal(sig), pid) # The process might have exited between the last check and the # actual kill below, so ignore ESRCH errors. try: os.kill(pid, sig) except OSError as e: if e.errno == errno.ESRCH: break else: raise time.sleep(5) if not storage_notify_proc.is_alive(): break # Drain the queue so we don't hang when we finish. try: logging.warning('draining the notify queue manually') with timeout_util.Timeout(60): try: while dedupe_queue.get_nowait(): pass except Queue.Empty: pass except timeout_util.TimeoutError: logging.warning( 'draining the notify queue failed; trashing it') dedupe_queue.cancel_join_thread() logging.info('uploaded %i symbols (%i were deduped) which took: %s', counters.uploaded_count, counters.deduped_count, datetime.datetime.now() - start_time) return bg_errors.value