Exemplo n.º 1
0
def ingest(
        dataset,
        cls,
        skip_if_exists=True,
        multi_process=False,
        multi_threaded=False,
        cores=None):

    pool = None

    if multi_process:
        pool = Pool(cores or cpu_count())
        map_func = pool.imap_unordered
    elif multi_threaded:
        pool = ThreadPool(cores or cpu_count())
        map_func = pool.imap_unordered
    else:
        map_func = map

    cls_args = repeat(cls)
    skip_args = repeat(skip_if_exists)

    map_func(ingest_one, zip(dataset, cls_args, skip_args))

    if pool is not None:
        # if we're ingesting using multiple processes or threads, the processing
        # should be parallel, but this method should be synchronous from the
        # caller's perspective
        pool.close()
        pool.join()
Exemplo n.º 2
0
    def test2_pReader(self):
        print("test2 pReader r")
        reader = PlainReader("{}/trace.txt".format(DAT_FOLDER))
        cH = CHeatmap()
        bpv = cH.get_breakpoints(reader, 'v', time_interval=1000)
        self.assertEqual(bpv[10], 10000)

        cH.heatmap(reader, 'v', "hit_ratio_start_time_end_time",
                   time_interval=1000, num_of_threads=os.cpu_count(), cache_size=2000,
                   figname="pReader_hr_st_et_LRU.png")
        cH.heatmap(reader, 'v', "rd_distribution",
                   time_interval=1000, num_of_threads=os.cpu_count(),
                   figname="pReader_rd_dist.png")
        cH.heatmap(reader, 'v', "future_rd_distribution",
                   time_interval=1000, num_of_threads=os.cpu_count(),
                   figname="pReader_frd_dist.png")
        cH.heatmap(reader, 'v', "hit_ratio_start_time_end_time",
                   time_interval=10000, algorithm="FIFO",
                   num_of_threads=os.cpu_count(), cache_size=2000,
                   figname="pReader_hr_st_et_FIFO.png")

        cH.diff_heatmap(reader, 'v', "hit_ratio_start_time_end_time",
                        time_interval=10000, cache_size=200,
                        algorithm1="LFU", algorithm2="Optimal",
                        cache_params2=None, num_of_threads=os.cpu_count(),
                        figname="pReader_diff_hr_st_et.png")
Exemplo n.º 3
0
def _export_datasets(dataset, features, classes, origin, sufix):
    from itertools import combinations
    from tasks.linker import params
    from multiprocessing.pool import Pool
    
    nfolds = 3
    folds = [i for i in range(nfolds)]
    partitions = [list(c) + list((set(folds) - set(c))) for c in combinations(folds, 2)]
    datasets = _fold(dataset, nfolds)
    
    for pt in partitions:
        training = []
        for i in pt[:-1]: 
            training.extend(datasets[i])
        test = datasets[pt[-1]]
         
        name_ = 'all{}{}{}'.format(origin, sufix + '_tr', pt[-1])
        filename = get_path('datasets', '{}.arff'.format(name_))
        classes_ = [next((v['short_name'] for k, v in params.items() if v['metadata_uri'] == c), None) for c in classes]    
        dataset_ = ([d, classes_] for d in _chunks(training, os.cpu_count()))
        with Pool(os.cpu_count()) as p: sets_ = p.starmap(_expand, dataset_); dataset_ = []
        for s in sets_: dataset_.extend(s)
        with Pool(os.cpu_count()) as p: dataset_ = p.map(_flatten, dataset_)
        _save(dataset_, features, 'class', name_, filename)
        
        
        name_ = 'all{}{}{}'.format(origin, sufix + '_tt', pt[-1])
        filename = get_path('datasets', '{}.arff'.format(name_))
        dataset_ = ([l, classes_] for l in test)
        with Pool(os.cpu_count()) as p: dataset_ = p.starmap(_concat, dataset_)
        with Pool(os.cpu_count()) as p: dataset_ = p.map(_flatten, dataset_)
        _save_test(dataset_, features, 'class', name_, filename)
Exemplo n.º 4
0
    def test5_bReader(self):
        print("bReader")
        reader = BinaryReader("{}/trace.vscsi".format(DAT_FOLDER),
                              init_params={"label":6, "real_time":7, "fmt": "<3I2H2Q"})

        cH = CHeatmap()
        bpr = cH.get_breakpoints(reader, 'r', time_interval=1000000)
        self.assertEqual(bpr[10], 53)
        bpv = cH.get_breakpoints(reader, 'v', time_interval=1000)
        self.assertEqual(bpv[10], 10000)

        cH.heatmap(reader, 'r', "hit_ratio_start_time_end_time",
                   num_of_pixel_of_time_dim=100, num_of_threads=os.cpu_count(), cache_size=2000,
                   figname="hr_st_et_LRU_bReader.png")

        cH.heatmap(reader, 'r', "rd_distribution",
                   num_of_pixel_of_time_dim=1000, num_of_threads=os.cpu_count())
        cH.heatmap(reader, 'r', "future_rd_distribution",
                   num_of_pixel_of_time_dim=1000, num_of_threads=os.cpu_count())
        cH.heatmap(reader, 'r', "hit_ratio_start_time_end_time",
                   num_of_pixel_of_time_dim=100, algorithm="FIFO",
                   num_of_threads=os.cpu_count(), cache_size=200)

        cH.diff_heatmap(reader, 'r', "hit_ratio_start_time_end_time",
                        num_of_pixel_of_time_dim=24, cache_size=200,
                        algorithm1="LRU", algorithm2="Optimal",
                        cache_params2=None, num_of_threads=os.cpu_count())
Exemplo n.º 5
0
def run_in_parallel(input_list, args, func, kwargs_dict = None, workers = None, onebyone = False):
    '''
    Take an input list, divide into chunks and then apply a function to each of the chunks in parallel.
    '''
    if not workers:
        #divide by two to get the number of physical cores
        #subtract one to leave one core free
        workers = int(os.cpu_count()/2 - 1)
    elif workers == "all":
        workers = os.cpu_count()
    arg_to_parallelize = args.index("foo")
    if not onebyone:
        chunk_list = [input_list[i::workers] for i in range(workers)]
    else:
        #each element in the input list will constitute a chunk of its own.
        chunk_list = input_list
    pool = mp.Pool(workers)
    results = []
    for i in chunk_list:
        current_args = args.copy()
        current_args[arg_to_parallelize] = i
        if kwargs_dict:
            process = pool.apply_async(func, tuple(current_args), kwargs_dict)
        else:
            process = pool.apply_async(func, tuple(current_args))            
        results.append(process)
    pool.close()
    pool.join()
    return(results)
Exemplo n.º 6
0
 def __init__(self, size):
     self.size = size
     self._time = pyfftw.empty_aligned(size, 'float64')
     self._freq = pyfftw.empty_aligned(size//2 + 1, 'complex128')
     self.fft = pyfftw.FFTW(self._time, self._freq, threads=os.cpu_count(),
                            direction='FFTW_FORWARD')
     self.ifft = pyfftw.FFTW(self._freq, self._time, threads=os.cpu_count(),
                             direction='FFTW_BACKWARD')
Exemplo n.º 7
0
def _main(filename):
    import os
    import csv
    import sys
    import math
    import concurrent.futures
    from datetime import datetime
    from datetime import timedelta
    from utils.db.schema import session_execute
    from utils.db.schema import session_close
    
    users = session_execute('select distinct user_id from "Twitter"."Trajectory" where trajectory_id in (select distinct trajectory_id from "Twitter"."Stop")', {})
    users = [r[0] for r in users]
    # session_close()
    
    with open(filename, 'w', encoding='utf-8', errors='replace') as csvfile:
        fieldnames = ['trajectory_id', 'tweet_id', 'created_at', 'coordinates', 'text', 'place', 'category_name', 'category_uri', 'section']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, dialect='unix')
        # writer.writeheader()        
        
        counter1 = 0; counter2 = 0; total = len(users); is_ = 0; nsets = 3; return_code = -1
        split_factor = 1 / nsets; block_size = math.ceil(total * split_factor); blocks = math.ceil(total / block_size)
        partition = [users[i * block_size: (i + 1) * block_size] for i in range(0, blocks)]
        users = None
        
        for subset in partition:
            is_ += 1
            subtotal = len(subset)
            split_factor = 1 / os.cpu_count(); block_size = math.ceil(subtotal * split_factor); blocks = math.ceil(subtotal / block_size)
            partition2 = [subset[i * block_size: (i + 1) * block_size] for i in range(0, blocks)]
            
            start = datetime.now()
            with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
            # with concurrent.futures.ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:    
                
                tasks = []
                print ('Start exporting.')
                for subset2 in partition2: tasks.append(executor.submit(_task, subset2))
            
                print ('Start waiting subprocesses.')        
                for t in concurrent.futures.as_completed(tasks):
                    rows, counter2_, counter1_ = t.result()
                    counter2 += counter2_; counter1 += counter1_; 
                    if return_code < 0 and counter2 > 0: return_code = 0
                    for row in rows: writer.writerow(row)
                    
                    print (str(counter2) + '/' + str(counter1) + '/' + str(total) + ' done. ') 
                
            current = datetime.now() 
            elapsed = (current - start).total_seconds() / 3600
            timeleft = ((total - counter1) * elapsed) / subtotal
            finish = current + timedelta(seconds=timeleft * 3600)
            print ('Subset {}/{} done.'.format(is_, nsets)) 
            print ('Elapsed time: {} hours'.format(round(elapsed, 1)))
            print ('Remaining time: {} hours'.format(round(timeleft, 1))) 
            print ('Estimated finish time: {}\n'.format(finish.strftime('%H:%M')))
             
    sys.exit(0) 
Exemplo n.º 8
0
def match(label):
    result = []; 
    query = (str(n) for n in re.findall('\d+', label))
    answer = [i  for q in query for i in (_index[q] if q in _index else [])]
    answer = (x for x, c in collections.Counter(answer).items() if c > 1)
    patterns = [[_patterns[j], label] for j in answer]
    with Pool(os.cpu_count()) as p: results = p.map(submatch, _chunks2(patterns, os.cpu_count()))
    for r in results: result += r 
    return result
Exemplo n.º 9
0
    def fitPeaks(self, fitwidth, nproc=0, par_func=None, **kwargs):
        """
        Fit all peaks found by peak finder, has the ability to split the peaks
        among multiple processors

        Parameters
        ----------
        fitwidth : int
            Sets the size of the fitting window
        nproc : int
            number of processors to use

        Returns
        -------
        list : list of DataFrames
            A list of DataFrames with each DataFrame holding the fits of
            one peak
        """
        blobs = self.peakfinder.blobs

        if nproc > 1:
            # make sure we don't try to use more processors than we have
            if nproc > os.cpu_count():
                nproc = os.cpu_count()
            # save the data type character
            dtype_char = self.stack.dtype.char
            # allocate shared memory for the array
            shared_array_base = mp.RawArray(dtype_char, self.stack.size)
            # assign the array, this opertates through a memoryview
            # so it's very fast but has no checking
            mv_array = memoryview(shared_array_base)
            # we have to cast through bytes because of Py3 peculiarities
            mv_array.cast("B").cast(dtype_char)[:] = self.stack.ravel()
            # start pool, initilize shared array on each worker.
            with mp.Pool(nproc, _init_func,
                         (par_func, shared_array_base, self.stack.shape)) as p:
                print('Multiprocessing engaged with {} cores'.format(nproc))
                # farm out the tasks
                results = [p.apply_async(
                    par_func,
                    args=(fitwidth, blob, None),
                    kwds=kwargs
                ) for blob in blobs]
                # collect results
                fits = [pp.get() for pp in results]
        else:
            # serial version, just list comprehension
            fits = [par_func(
                fitwidth, blob, self.stack, **kwargs)
                for blob in blobs]

        # clear nones (i.e. unsuccessful fits)
        fits = [fit for fit in fits if fit is not None]
        self.fits = fits
        return fits
Exemplo n.º 10
0
 def compress():
     #Get corecount, with fallback
     cores = str(os.cpu_count()) #thank you Python 3.4
     if os.cpu_count() == None:
         cores = str(1)
     for file in os.listdir(localdir):
         if file.endswith(".exe") and file.startswith(("Q10", "Z10", "Z30", "Z3", "Passport")):
             print("\nCOMPRESSING: " + os.path.splitext(os.path.basename(file))[0] + ".exe @mmt" + cores)
             if amd64 == True:
                 os.system(sevenzip + " a -mx9 -mmt" + cores + " -m0=lzma2:d128m:fb128 " + '"' + os.path.splitext(os.path.basename(file))[0]   + '.7z" "' + file + '"')
             else:
                 os.system(sevenzip + " a -mx9 -mmt" + cores + " " + '"' + os.path.splitext(os.path.basename(file))[0]   + '.7z" "' + file + '"')
Exemplo n.º 11
0
def _export_datasets(dataset, features, classes, origin, sufix):
    from tasks.linker import params
    from multiprocessing.pool import Pool
    
    name_ = 'all' + str(origin) + sufix
    filename = get_path('datasets', '{}.arff'.format(name_))
    classes_ = [next((v['short_name'] for k, v in params.items() if v['metadata_uri'] == c), None) for c in classes]
        
    dataset_ = ([d, classes_] for d in _chunks(dataset, os.cpu_count()))
    with Pool(os.cpu_count()) as p: sets_ = p.starmap(_expand, dataset_); dataset_ = []
    for s in sets_: dataset_.extend(s)
    with Pool(os.cpu_count()) as p: dataset_ = p.map(_flatten, dataset_)
    _save(dataset_, features, 'class', name_, filename)
Exemplo n.º 12
0
def _export_datasets(dataset, features, classes, origin, sufix):
    from copy import deepcopy 
    from tasks.linker import params
    from multiprocessing.pool import Pool
    
    for name in  ['schools', 'ies', 'attractions', 'museums', 'theaters', 'hospitals', 'hotels', 'offices']: 
        name_ = name + str(origin) + sufix
        filename = get_path('datasets', '{}.arff'.format(name_))
        class_ = next((v['metadata_uri'] for k, v in params.items() if v['short_name'] == name), None)
        index_ = classes.index(class_)
        
        dataset_ = ([l, index_] for l in deepcopy(dataset))
        with Pool(os.cpu_count()) as p: dataset_ = p.starmap(_crop, dataset_)
        with Pool(os.cpu_count()) as p: dataset_ = p.map(_flatten, dataset_)
        _save(dataset_, features, class_, name_, filename)
Exemplo n.º 13
0
 async def _execute_queuing_tasks(self, queue, loop, executor, func):
     """Execute queue tasks"""
     tasks = [
         self._set_task_queue_for_executor(queue, loop, executor, func)
         for i in range(os.cpu_count())
     ]
     return await asyncio.wait(tasks)
Exemplo n.º 14
0
    def __init__(self, uid, access_token, output_folder, album):
        self.uid = uid
        self.access_token = access_token
        self.output_folder = output_folder
        self.album = album
        self.cpu_count = os.cpu_count()

        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)

        # '{04}_{}.mp3
        self.folder_aids = {x[5:-4] for x in os.listdir(self.output_folder)}

        url = (
            "https://api.vkontakte.ru/method/audio.get.json?"
            "uid={uid}&access_token={access_token}"
        ).format(uid=self.uid, access_token=self.access_token)
        response = urllib.request.urlopen(url)
        content = response.read()
        self._content = json.loads(content.decode('utf-8'))
        self.music_list = self._content['response']

        self.tracks_map = {}
        for ind, track in enumerate(reversed(self.music_list)):
            self.tracks_map[str(track['aid'])] = {
                'index': ind,
                'artist': unescape(track['artist']),
                'title': unescape(track['title']),
                'url': track['url'],
                'output_path': os.path.join(output_folder, '{}_{}.mp3'.format(format(ind, '04'), track['aid'])),
            }
Exemplo n.º 15
0
def load_patterns(tablename):
    global _patterns, _index
    
    print ('Loading patterns...')
    _patterns = (p for p in _db[tablename].find(fields={'_id':False, 'label':True, 'expansion':True}))
    with Pool(200) as p: _patterns = p.map(_prepare_rules, _patterns)
    print ('Done.')
    
    print ('Indexing patterns...')
    with Pool(os.cpu_count()) as p: keywords = p.map(_extract_keywords, _patterns)
    with Pool(os.cpu_count()) as p: indexes = (index for index in p.starmap(_subindex, _chunks(keywords, os.cpu_count())))
    for index in indexes: 
        for k in index:
            if k in _index:  _index[k].extend(index[k])
            else: _index[k] = index[k]
    print ('Done.')
Exemplo n.º 16
0
def get_num_workers(jobs):
    """
    Parameters
    ----------
    jobs How many jobs to be paralleled. Negative or 0 means number of cpu cores left.

    Returns
    -------
    How many subprocess to be used
    """
    num_workers = jobs
    if num_workers <= 0:
        num_workers = os.cpu_count() + jobs
    if num_workers < 0 or num_workers > os.cpu_count():
        raise RuntimeError("System doesn't have so many cpu cores: {} vs {}".format(jobs, os.cpu_count()))
    return num_workers
Exemplo n.º 17
0
    def create_arg_parser() -> argparse.ArgumentParser:
        parser = argparse.ArgumentParser()

        parser.add_argument('-bp',
                            '--build-path',
                            help='Path to build directory, automatically detected if possible.',
                            metavar='<path>')

        parser.add_argument('-j',
                            '--jobs',
                            default=os.cpu_count() or 1,
                            type=int_argument_greater_than_zero,
                            help='Run N jobs in parallel. Defaults to number of logical cores '
                                 '(%(default)s detected).',
                            metavar='N')

        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            help='More verbose output.')

        subparsers = parser.add_subparsers(dest='command',
                                           help='-h or --help after <command> for more help',
                                           metavar='<command>')
        # Fix for bug introduced in 3.3.5. See http://bugs.python.org/issue9253#msg186387 .
        subparsers.required = True

        commands.analyze.add_argparse_subparser(subparsers, _SOURCE_PATHS_ARG_NAME)
        commands.assembler.add_argparse_subparser(subparsers, _SOURCE_PATHS_ARG_NAME)
        commands.check.add_argparse_subparser(subparsers, _SOURCE_PATHS_ARG_NAME)

        return parser
Exemplo n.º 18
0
 def __init__(self, key, repository, manifest, archive, cached_repo):
     super().__init__()
     self._inode_count = 0
     self.key = key
     self.repository = cached_repo
     self.items = {}
     self.parent = {}
     self.contents = defaultdict(dict)
     self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
     self.pending_archives = {}
     self.accounted_chunks = {}
     self.cache = ItemCache()
     data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
     logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
     self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
     if archive:
         self.process_archive(archive)
     else:
         # Create root inode
         self.parent[1] = self.allocate_inode()
         self.items[1] = self.default_dir
         for archive_name in manifest.archives:
             # Create archive placeholder inode
             archive_inode = self.allocate_inode()
             self.items[archive_inode] = self.default_dir
             self.parent[archive_inode] = 1
             self.contents[1][os.fsencode(archive_name)] = archive_inode
             self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
Exemplo n.º 19
0
def tests_for_sha(path, disable_blacklist=False):
    """List all tests wich evaluate in the repo, as a set of (attr, drvPath)"""
    num_jobs = 32
    # at this size, each job takes 1~1.7 GB mem
    max_workers = max(1, psutil.virtual_memory().available//(1700*1024*1024))
    # a job is also cpu hungry
    try:
        max_workers = min(max_workers, os.cpu_count())
    except: pass

    def eval(i):
        output = subprocess.check_output(['nix-instantiate', '--eval',
            '--json', '--strict', '-I', "nixpkgs="+str(path), enumerate_tests,
            '--arg', "jobIndex", str(i), '--arg', 'numJobs', str(num_jobs),
            '--arg', 'disableBlacklist', str(disable_blacklist).lower(),
            '--show-trace'], universal_newlines=True)
        return json.loads(output)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        evals = executor.map(eval, range(num_jobs))

    path = ("<nixpkgs/nixos/release.nix>", "--arg", "supportedSystems", "[builtins.currentSystem]")
    attrs = set()
    for partial in evals:
        for test in partial:
            b = Buildable(test["attr"], test["drv"], path=path)
            attrs.add(b)

    return attrs
Exemplo n.º 20
0
    def build_libuv(self):
        env = os.environ.copy()

        env['CFLAGS'] = ('-fPIC ' +
                         env.get('CFLAGS', '-O2') +
                         ' ' +
                         env.get('ARCHFLAGS', ''))

        j_flag = '-j{}'.format(os.cpu_count() or 1)

        if not os.path.exists(os.path.join(LIBUV_DIR, 'configure')):
            subprocess.run(['/bin/sh', 'autogen.sh'], cwd=LIBUV_DIR, env=env,
                           check=True)

        # Sometimes pip fails to preserve the timestamps correctly,
        # in which case, make will try to run autotools again.
        subprocess.run(['touch', 'configure.ac', 'aclocal.m4',
                        'configure', 'Makefile.am', 'Makefile.in'],
                       cwd=LIBUV_DIR, env=env, check=True)

        subprocess.run(['./configure'], cwd=LIBUV_DIR, env=env, check=True)

        c_flag = "CFLAGS={}".format(env['CFLAGS'])
        subprocess.run(['make', j_flag, c_flag],
                       cwd=LIBUV_DIR, env=env, check=True)
Exemplo n.º 21
0
def batch(func, iterable, chunksize=1000, max_value=None):
    bar = Bar(max_value=max_value).start()
    workers = int(config.get("WORKERS", os.cpu_count()))
    with ThreadPoolExecutor(max_workers=workers) as executor:
        for i, res in enumerate(executor.map(func, iterable)):
            bar.update(i)
        bar.finish()
Exemplo n.º 22
0
def run_tests(args):
    log_file = "test-suite.log"
    tasks = []
    task_queue = queue.Queue()

    prefix = ""
    if args.valgrind:
        prefix = " ".join([args.valgrind, args.valgrind_supp, VALGRIND_OPTS])
        log_file = "test-suite-memcheck.log"

    for test in args.tests.split():
        cmd = test
        if prefix:
            cmd = prefix + " " + cmd
        task = Task(test, cmd)
        tasks.append(task)
        task_queue.put(task)

    for i in range(os.cpu_count()):
        threading.Thread(target=run_test_program, args=(task_queue,)).start()

    task_queue.join()
    print_log(log_file, tasks)

    for task in tasks:
        if not task.success:
            sys.exit(1)
Exemplo n.º 23
0
 def __init__(self, outdir, lang="de"):
     self.lang = lang
     # Create output directory
     self.outdir = os.path.join(outdir, lang)
     os.makedirs(self.outdir, exist_ok=True)
     # Async executor
     self.executor = concurrent.futures.ThreadPoolExecutor(os.cpu_count())
     # Modified queue behaviour so new rules are run before reading new PO files,
     # in effect saving a ton of RAM
     self.executor._work_queue = queue.LifoQueue(512)
     # Load rules for language
     rules, rule_errors = importRulesForLanguage(lang)
     self.rules = sorted(rules, reverse=True)
     self.rule_errors = rule_errors
     # Get timestamp
     self.timestamp = datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S")
     # Process lastdownload date (copied to the templated)
     lastdownloadPath = os.path.join("cache", "lastdownload-{0}.txt".format(lang))
     if os.path.isfile(lastdownloadPath):
         with open(lastdownloadPath) as infile:
             self.downloadTimestamp = infile.read().strip()
     else:
         self.downloadTimestamp = None
     # Initialize translation ID/URL map
     translationFilemapCache = getTranslationFilemapCache(lang)
     self.translationURLs = {
         v["path"]:
             "https://crowdin.com/translate/khanacademy/{0}/enus-{1}".format(v["id"], lang)
         for v in translationFilemapCache.values()
     }
Exemplo n.º 24
0
def _prepare_dataset():
    import math
    import concurrent.futures
    from utils.db.schema import session_execute
    from utils.db.schema import session_close
    
    dataset = []
    trajectories = session_execute('select distinct trajectory_id from "Twitter"."Stop"', {})
    trajectories = [r[0] for r in trajectories]
    total = len(trajectories)
    session_close()
    print ('Trajectories loaded.')
    
    split_factor = 1 / 3; block_size = math.ceil(total * split_factor); blocks = math.ceil(total / block_size)
    partition = [trajectories[i * block_size: (i + 1) * block_size] for i in range(0, blocks)]
    trajectories = None
    
    counter1, counter2 = 0, 0
    for subset in partition:
        
        with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
            
            tasks = []
            for trajectory_id in subset: tasks.append(executor.submit(_prepare_trajectory, trajectory_id))
            
            print('Waiting for processes termination.')
            for t in concurrent.futures.as_completed(tasks):
                counter1 += 1; trajectory = t.result()
                if trajectory:      
                    dataset.append(trajectory)
                    if trajectory['datasets']:
                        counter2 += 1
                        if (counter2 % 200) == 0: print((str(trajectory['datasets']) + ' (' + str(counter2) + '/' + str(counter1) + '/' + str(total) + ')')) 
                        
    return dataset
Exemplo n.º 25
0
    def find_all_control_points():
        # Create control points for each ring
        # TODO: use order from settings
        project.control_points.clear()

        quickypano.lowpriority()

        if args.debug:
            exec_class = DummyExecutor
        else:
            exec_class = concurrent.futures.ThreadPoolExecutor

        with exec_class(os.cpu_count()) as executor:
            sett = project.settings
            find_cpoints_for_ring(sett.ROW_MIDDLE, sett.start_offset('MIDDLE'), executor)
            find_cpoints_for_ring(sett.ROW_DOWN, sett.start_offset('DOWN'), executor)
            find_cpoints_for_ring(sett.ROW_UP, sett.start_offset('UP'), executor)

            # Connect rings
            connect_rings('MIDDLE', 'DOWN', executor)
            connect_rings('MIDDLE', 'UP', executor)

            # TODO: zenith & nadir shots

        sys.stderr.flush()
        sys.stdout.flush()
        quickypano.normalpriority()

        log.info('Found a total of %i control points', len(project.control_points))
Exemplo n.º 26
0
 def __init__(self, config, atStorage, userStorage,
         filterRequest=False, threadPoolNumber=None):
     # init configurations
     self.config = config
     self.atStorage = atStorage or TestStorage()
     self.userStorage = userStorage
     self.filterRequest = filterRequest
     self.threadPoolNumber = threadPoolNumber or ((None
         if not hasattr(os, 'cpu_count') else os.cpu_count()) or 1) * 5
     try:
         self.ioLoop = tornado.ioloop.IOLoop.current()
     except:
         self.ioLoop = None
     self.isWsgi = True
     self.debug = True
     self._replyFnDict = {}
     # init apis
     self.application = Application(self)
     self.chat = Chat(self)
     self.common = Common(self)
     self.customerservice = CustomerService(self)
     self.menu = Menu(self)
     self.messages = Messages(self)
     self.oauth2 = Oauth2(self)
     self.statistics = Statistics(self)
     self.templatemsgs = TemplateMsgs(self)
     self.users = Users(self)
     self.utils = Utils(self)
     self.wrapped = Wrapped(self)
Exemplo n.º 27
0
 def cpu_count(self):
     '''Returns the number of CPUs in the system'''
     num = os.cpu_count()
     if num is None:
         raise NotImplementedError('cannot determine number of cpus')
     else:
         return num
Exemplo n.º 28
0
 def start_working(self, start_time, count):
     for i in range(os.cpu_count()):
         threading.Thread(target=self.workerbee_do, daemon=True, name="workerbee " + str(i + 1)).start()
     stop_all = time.time() - start_time
     queue.join()
     with lock:
         print("Adding", str(count), "tracks took", str(stop_all.__round__(5)) + "seconds")
Exemplo n.º 29
0
    def build_libuv(self):
        env = _libuv_build_env()

        # Make sure configure and friends are present in case
        # we are building from a git checkout.
        _libuv_autogen(env)

        # Copy the libuv tree to build/ so that its build
        # products don't pollute sdist accidentally.
        if os.path.exists(LIBUV_BUILD_DIR):
            shutil.rmtree(LIBUV_BUILD_DIR)
        shutil.copytree(LIBUV_DIR, LIBUV_BUILD_DIR)

        # Sometimes pip fails to preserve the timestamps correctly,
        # in which case, make will try to run autotools again.
        subprocess.run(
            ['touch', 'configure.ac', 'aclocal.m4', 'configure',
             'Makefile.am', 'Makefile.in'],
            cwd=LIBUV_BUILD_DIR, env=env, check=True)

        if 'LIBUV_CONFIGURE_HOST' in env:
            cmd = ['./configure', '--host=' + env['LIBUV_CONFIGURE_HOST']]
        else:
            cmd = ['./configure']
        subprocess.run(
            cmd,
            cwd=LIBUV_BUILD_DIR, env=env, check=True)

        j_flag = '-j{}'.format(os.cpu_count() or 1)
        c_flag = "CFLAGS={}".format(env['CFLAGS'])
        subprocess.run(
            ['make', j_flag, c_flag],
            cwd=LIBUV_BUILD_DIR, env=env, check=True)
Exemplo n.º 30
0
def list_configs():
    rc_list = []
    for c in range(1, os.cpu_count() - 1):
        rc = get_config('localhost_{}'.format(c))
        rc_list.append(rc)

    return rc_list
    def __init__(self):
        # set up basic geometry of main window
        super().__init__()
        self.title = 'yt2mp3 - Your simple Youtube to MP3 converter'
        self.left = 0
        self.top = 0
        self.width = 900
        self.height = 385
        self.setWindowTitle(self.title)
        self.setGeometry(self.left, self.top, self.width, self.height)

        ###################################################################################
        # set up threading work backend, leaving at least one cpu for the OS and other crap
        ###################################################################################
        self.worker_thread_pool = ThreadPoolExecutor(
            max_workers=max(1,
                            os.cpu_count() - 1))
        self.gui_communicator_thread_pool = ThreadPoolExecutor(
            max_workers=max(1,
                            os.cpu_count() - 1))

        ###############################
        # set up layout of main window
        ###############################

        # first, some buttons to the right
        self.add_tab_button = QPushButton(
            '+ Tab')  # button for adding new tabs (see below)
        self.run_all_jobs_button = QPushButton(
            'Run all (?)'
        )  # button to run all runnable (not yet running and un-run) jobs
        self.stop_all_jobs_button = QPushButton(
            'Stop all (?)')  # button to terminate all running jobs

        # labels for summarizing job stati
        self.previous_job_stati = None  # memorize the previously received status to avoid uneccessary rendering.
        self.status_widget = QWidget()
        self.job_status_layout = QGridLayout()
        self.status_widget.setLayout(self.job_status_layout)

        self.n_jobs_idle_label = QLabel('Idle:')
        self.n_jobs_idle_number_label = QLabel('?')
        self.n_jobs_submitted_label = QLabel('Submitted:')
        self.n_jobs_submitted_number_label = QLabel('?')
        self.n_jobs_running_label = QLabel('Running:')
        self.n_jobs_running_number_label = QLabel('?')
        self.n_jobs_finished_label = QLabel('Finished:')
        self.n_jobs_finished_number_label = QLabel('?')
        self.n_jobs_stopped_label = QLabel('Stopped:')
        self.n_jobs_stopped_number_label = QLabel('?')
        self.n_jobs_failed_label = QLabel('Failed:')
        self.n_jobs_failed_number_label = QLabel('?')

        self.job_status_layout.addWidget(self.n_jobs_idle_label, 0, 0)
        self.job_status_layout.addWidget(self.n_jobs_idle_number_label, 0, 1)
        self.job_status_layout.addWidget(self.n_jobs_submitted_label, 1, 0)
        self.job_status_layout.addWidget(self.n_jobs_submitted_number_label, 1,
                                         1)
        self.job_status_layout.addWidget(self.n_jobs_running_label, 2, 0)
        self.job_status_layout.addWidget(self.n_jobs_running_number_label, 2,
                                         1)
        self.job_status_layout.addWidget(self.n_jobs_stopped_label, 3, 0)
        self.job_status_layout.addWidget(self.n_jobs_stopped_number_label, 3,
                                         1)
        self.job_status_layout.addWidget(self.n_jobs_failed_label, 4, 0)
        self.job_status_layout.addWidget(self.n_jobs_failed_number_label, 4, 1)
        self.job_status_layout.addWidget(self.n_jobs_finished_label, 5, 0)
        self.job_status_layout.addWidget(self.n_jobs_finished_number_label, 5,
                                         1)

        self.button_panel = QWidget()  # widget and layout to group buttons
        button_layout = QVBoxLayout(self)

        button_layout.addWidget(self.add_tab_button)  # assemble buttons
        button_layout.addStretch()  # add some spacing
        button_layout.addWidget(self.status_widget)
        button_layout.addStretch()  # add some spacing
        button_layout.addWidget(self.run_all_jobs_button)
        button_layout.addWidget(self.stop_all_jobs_button)
        self.button_panel.setLayout(button_layout)

        # second, a tab panel for job specification to the left
        # list of icons for the tabs to use.
        self.tab_icons = {
            JobPanel.STATUS_IDLE: QIcon('resources/edit.png'),
            JobPanel.STATUS_SUBMITTED: QIcon('resources/stopwatch.png'),
            JobPanel.STATUS_RUNNING: QIcon('resources/download.png'),
            JobPanel.STATUS_STOPPED: QIcon('resources/stop.png'),
            JobPanel.STATUS_FINISHED: QIcon('resources/finish-flag.png'),
            JobPanel.STATUS_FAILED: QIcon('resources/alarm.png')
        }

        self.tab_status = {}  # keep track of each tab's job's status
        self.tab_icon_size = QSize(24, 24)
        self.tab_panel = QTabWidget()
        self.tab_panel.setTabsClosable(True)  # make tabs closable.
        self.tab_panel.setIconSize(self.tab_icon_size)
        self.tabs_created = 0  # count how many tabs have been created

        # assemble gui elements
        window_layout = QHBoxLayout(self)
        window_layout.addWidget(self.tab_panel)  # tab panel to the left
        window_layout.addWidget(self.button_panel)  # controls to the right
        self.setLayout(window_layout)

        # add initial tab from argparse_namespace input and show
        self.add_tab()

        # create process output monitor
        self.process_output_monitor = ProcessOutputMonitor(self.tab_panel)

        ################################
        # add functionality and controls
        ################################
        self.add_tab_button.clicked.connect(self.add_tab)
        self.tab_panel.tabCloseRequested.connect(self.close_tab)
        self.run_all_jobs_button.clicked.connect(self.run_all_jobs)
        self.stop_all_jobs_button.clicked.connect(self.stop_all_jobs)

        self.process_output_monitor.update_output.connect(
            self.handle_process_output)
        self.process_output_monitor.update_tabinfo.connect(
            self.handle_tabinfo_change)
        self.process_output_monitor.update_stati.connect(
            self.handle_process_status_summary)
        self.process_output_monitor.update_runnable_stoppable_count.connect(
            self.handle_runnable_stoppable_count)
        self.process_output_monitor.monitor_outputs()

        self.show()
Exemplo n.º 32
0
def main():
    cpu_count = os.cpu_count() or 8
    version = "20191107"
    packages = [
        "hummingbot",
        "hummingbot.client",
        "hummingbot.client.command",
        "hummingbot.client.config",
        "hummingbot.client.liquidity_bounty",
        "hummingbot.client.ui",
        "hummingbot.core",
        "hummingbot.core.data_type",
        "hummingbot.core.event",
        "hummingbot.core.management",
        "hummingbot.core.utils",
        "hummingbot.data_feed",
        "hummingbot.logger",
        "hummingbot.market",
        "hummingbot.market.bamboo_relay",
        "hummingbot.market.binance",
        "hummingbot.market.bittrex",
        "hummingbot.market.coinbase_pro",
        "hummingbot.market.ddex",
        "hummingbot.market.huobi",
        "hummingbot.market.idex",
        "hummingbot.market.radar_relay",
        "hummingbot.strategy",
        "hummingbot.strategy.arbitrage",
        "hummingbot.strategy.cross_exchange_market_making",
        "hummingbot.strategy.discovery",
        "hummingbot.strategy.pure_market_making",
        "hummingbot.templates",
        "hummingbot.wallet",
        "hummingbot.wallet.ethereum",
        "hummingbot.wallet.ethereum.uniswap",
        "hummingbot.wallet.ethereum.watcher",
        "hummingbot.wallet.ethereum.zero_ex",
    ]
    package_data = {
        "hummingbot": [
            "core/cpp/*", "client/liquidity_bounty/*.txt",
            "wallet/ethereum/zero_ex/zero_ex_coordinator_abi.json",
            "wallet/ethereum/zero_ex/zero_ex_coordinator_registry_abi.json",
            "wallet/ethereum/zero_ex/zero_ex_exchange_abi.json",
            "wallet/ethereum/token_abi/*.json",
            "wallet/ethereum/erc20_tokens.json", "VERSION",
            "templates/*TEMPLATE.yml"
        ],
    }
    install_requires = [
        "aioconsole",
        "aiokafka",
        "attrdict",
        "cytoolz",
        "eth-abi",
        "eth-account",
        "eth-hash",
        "eth-keyfile",
        "eth-keys",
        "eth-rlp",
        "eth-utils",
        "hexbytes",
        "kafka-python",
        "lru-dict",
        "parsimonious",
        "pycryptodome",
        "requests",
        "rlp",
        "toolz",
        "tzlocal",
        "urllib3",
        "web3",
        "websockets",
        "aiohttp",
        "async-timeout",
        "attrs",
        "certifi",
        "chardet",
        "cython==0.29.5",
        "idna",
        "idna_ssl",
        "multidict",
        "numpy",
        "pandas",
        "pytz",
        "pyyaml",
        "python-binance==0.6.9",
        "sqlalchemy",
        "ujson",
        "yarl",
    ]

    if "DEV_MODE" in os.environ:
        version += ".dev1"
        package_data[""] = ["*.pxd", "*.pyx", "*.h"]
        package_data["hummingbot"].append("core/cpp/*.cpp")

    if len(sys.argv) > 1 and sys.argv[1] == "build_ext":
        sys.argv.append(f"--parallel={cpu_count}")

    setup(
        name="hummingbot",
        version=version,
        description="CoinAlpha Hummingbot",
        url="https://github.com/CoinAlpha/hummingbot",
        author="Martin Kou",
        author_email="*****@*****.**",
        license="Proprietary",
        packages=packages,
        package_data=package_data,
        install_requires=install_requires,
        ext_modules=cythonize(["hummingbot/**/*.pyx"],
                              language="c++",
                              language_level=3,
                              nthreads=cpu_count),
        include_dirs=[
            np.get_include(),
        ],
        scripts=["bin/hummingbot.py", "bin/hummingbot_quickstart.py"],
    )
Exemplo n.º 33
0
        try:
            testing_names = os.listdir(args.testingSetPath)
        except OSError:
            log.error(
                "No such directory {}. Check if the directory exists".format(
                    args.testingSetPath))
            exit()
        for testing_name in testing_names:
            directory_name = os.path.join(args.testingSetPath, testing_name)
            class_path = imutils.imlist(directory_name)
            image_paths += class_path
    else:
        image_paths = [args.image]

    # Get the amount of cpus
    cpus = os.cpu_count()

    # Take the set size
    set_size = len(image_paths)

    # Calculates the number of subsets required for the quantity of cpus
    subset_size = int(numpy.ceil(set_size / cpus))

    # Divide the set into subsets according to the quantity of cpus
    log.info("Dividing feature detection and extraction between {} processes".
             format(cpus))
    image_paths_parts = [
        image_paths[i:i + subset_size] for i in range(0, set_size, subset_size)
    ]

    # Create feature extraction and keypoint detector objects
Exemplo n.º 34
0
async def main() -> None:
    parser = argparse.ArgumentParser(description="Runs CP2K regression test suite.")
    parser.add_argument("--mpiranks", type=int, default=2)
    parser.add_argument("--ompthreads", type=int)
    parser.add_argument("--maxtasks", type=int, default=os.cpu_count())
    parser.add_argument("--timeout", type=int, default=400)
    parser.add_argument("--maxerrors", type=int, default=50)
    parser.add_argument("--mpiexec", default="mpiexec")
    parser.add_argument("--keepalive", dest="keepalive", action="store_true")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--restrictdir", action="append")
    parser.add_argument("--workbasedir", type=Path)
    parser.add_argument("arch")
    parser.add_argument("version")
    cfg = Config(parser.parse_args())

    print("*************************** Testing started ****************************")
    start_time = time.perf_counter()

    # Query CP2K binary for feature flags.
    version_bytes, _ = await (await cfg.launch_exe("cp2k", "--version")).communicate()
    version_output = version_bytes.decode("utf8", errors="replace")
    flags_line = re.search(r" cp2kflags:(.*)\n", version_output)
    if not flags_line:
        print(version_output + "\nCould not parse feature flags.")
        sys.exit(1)
    else:
        flags = flags_line.group(1).split()

    print("\n----------------------------- Settings ---------------------------------")
    print(f"MPI ranks:      {cfg.mpiranks}")
    print(f"OpenMP threads: {cfg.ompthreads}")
    print(f"GPU devices:    {cfg.num_gpus}")
    print(f"Workers:        {cfg.num_workers}")
    print(f"Timeout [s]:    {cfg.timeout}")
    print(f"Work base dir:  {cfg.work_base_dir}")
    print(f"MPI exec:       {cfg.mpiexec}")
    print(f"Keepalive:      {cfg.keepalive}")
    print(f"Debug:          {cfg.debug}")
    print(f"ARCH:           {cfg.arch}")
    print(f"VERSION:        {cfg.version}")
    print(f"Flags:          " + ",".join(flags))

    # Have to copy everything upfront because the test dirs are not self-contained.
    print("------------------------------------------------------------------------")
    print("Copying test files ...", end="")
    shutil.copytree(cfg.cp2k_root / "tests", cfg.work_base_dir)
    print(" done")

    # Discover unit tests.
    unittest_batch = Batch("UNIT", cfg)
    unittest_batch.workdir.mkdir()
    unittest_glob = (cfg.cp2k_root / "exe" / cfg.arch).glob(f"*_unittest.{cfg.version}")
    for exe in unittest_glob:
        unittest_batch.unittests.append(Unittest(exe.stem, unittest_batch.workdir))

    # Read TEST_TYPES.
    test_types_fn = cfg.cp2k_root / "tests" / "TEST_TYPES"
    test_types: List[Optional[TestType]] = [None]  # test type zero
    lines = test_types_fn.read_text(encoding="utf8").split("\n")
    test_types += [TestType(l) for l in lines[1 : int(lines[0]) + 1]]

    # Read TEST_DIRS.
    batches: List[Batch] = [unittest_batch]
    test_dirs_fn = cfg.cp2k_root / "tests" / "TEST_DIRS"
    for line in test_dirs_fn.read_text(encoding="utf8").split("\n"):
        line = line.split("#", 1)[0].strip()
        if not line:
            continue
        batch = Batch(line, cfg)

        # Read TEST_FILES.
        test_files_fn = Path(batch.src_dir / "TEST_FILES")
        for line in test_files_fn.read_text(encoding="utf8").split("\n"):
            line = line.split("#", 1)[0].strip()
            if not line:
                continue
            batch.regtests.append(Regtest(line, test_types, batch.workdir))
        batches.append(batch)

    # Create async tasks.
    tasks = []
    num_restrictdirs = 0
    for batch in batches:
        if not batch.requirements_satisfied(flags, cfg.mpiranks):
            print(f"Skipping {batch.name} because its requirements are not satisfied.")
        elif not any(re.match(p, batch.name) for p in cfg.restrictdirs):
            num_restrictdirs += 1
        else:
            tasks.append(asyncio.get_event_loop().create_task(run_batch(batch, cfg)))

    if num_restrictdirs:
        print(f"Skipping {num_restrictdirs} test directories because of --restrictdir.")
    if not tasks:
        print("\nNo test directories selected, check --restrictdir filter.")
        sys.exit(1)

    # Wait for tasks to finish and print their results.
    print(f"Launched {len(tasks)} test directories and {cfg.num_workers} worker...\n")
    all_results: List[TestResult] = []
    with open(cfg.error_summary, "wt", encoding="utf8", errors="replace") as err_fh:
        for num_done, task in enumerate(asyncio.as_completed(tasks)):
            batch_result = await task
            all_results += batch_result.results
            print(f">>> {batch_result.batch.workdir}")
            print("\n".join(str(r) for r in batch_result.results))
            print(f"<<< {batch_result.batch.workdir} ({num_done + 1}", end="")
            print(f" of {len(tasks)}) done in {batch_result.duration:.2f} sec")
            sys.stdout.flush()
            err_fh.write("\n".join(r.error for r in batch_result.results if r.error))
            err_fh.flush()
            if sum(r.status != "OK" for r in all_results) > cfg.max_errors:
                print(f"\nGot more than {cfg.max_errors} errors, aborting...")
                break

    print("------------------------------- Errors ---------------------------------")
    print("\n".join(r.error for r in all_results if r.error))

    print("\n------------------------------- Timings --------------------------------")
    timings = sorted(r.duration for r in all_results)
    print('Plot: name="timings", title="Timing Distribution", ylabel="time [s]"')
    for p in (100, 99, 98, 95, 90, 80):
        v = percentile(timings, p / 100.0)
        print(f'PlotPoint: name="{p}th_percentile", plot="timings", ', end="")
        print(f'label="{p}th %ile", y={v:.2f}, yerr=0.0')

    print("\n------------------------------- Summary --------------------------------")
    total_duration = time.perf_counter() - start_time
    num_tests = len(all_results)
    num_failed = sum(r.status in ("TIMED OUT", "RUNTIME FAIL") for r in all_results)
    num_wrong = sum(r.status == "WRONG RESULT" for r in all_results)
    num_ok = sum(r.status == "OK" for r in all_results)
    print(f"Number of FAILED  tests {num_failed}")
    print(f"Number of WRONG   tests {num_wrong}")
    print(f"Number of CORRECT tests {num_ok}")
    print(f"Total number of   tests {num_tests}")
    summary = f"\nSummary: correct: {num_ok} / {num_tests}"
    summary += f"; wrong: {num_wrong}" if num_wrong > 0 else ""
    summary += f"; failed: {num_failed}" if num_failed > 0 else ""
    summary += f"; {total_duration/60.0:.0f}min"
    print(summary)
    print("Status: " + ("OK" if num_ok == num_tests else "FAILED") + "\n")

    print("*************************** Testing ended ******************************")
    sys.exit(num_tests - num_ok)
Exemplo n.º 35
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')
  random.seed(2077)
  log.set_level(FLAGS.log_level)

  n_jobs = os.cpu_count()
  assert FLAGS.max_threads != 0
  if FLAGS.max_threads > 0:
    n_jobs = FLAGS.max_threads

  mesh_directory = FLAGS.mesh_directory
  if mesh_directory[-1] == '/':
    mesh_directory = mesh_directory[:-1]

  files = glob.glob(f'{mesh_directory}/*/*/*.ply')

  if not files and not FLAGS.optimize_only:
    raise ValueError(f"Didn't find any ply files in {mesh_directory}. "
                     "Please make sure the directory structure is "
                     "[mesh_directory]/[splits]/[class names]/[ply files]")

  # Make the directories first because it's not threadsafe and also might fail.
  if files and not FLAGS.optimize_only:
    log.info('Creating directories...')
    for i, f in tqdm.tqdm(enumerate(files)):
      relpath = f.replace(mesh_directory, '')
      # log.info(f'Relpath: {relpath}')
      assert relpath[0] == '/'
      relpath = relpath[1:]
      split, synset = relpath.split('/')[:2]
      if not os.path.isdir(f'{FLAGS.dataset_directory}/{split}'):
        os.makedirs(f'{FLAGS.dataset_directory}/{split}')
      if not os.path.isdir(f'{FLAGS.dataset_directory}/{split}/{synset}'):
        os.mkdir(f'{FLAGS.dataset_directory}/{split}/{synset}')
    log.info('Making dataset...')
    # Flags can't be pickled:
    output_dirs = Parallel(n_jobs=n_jobs)(
        delayed(process_one)(f, mesh_directory, FLAGS.dataset_directory,
                             FLAGS.skip_existing, FLAGS.log_level) for f in tqdm.tqdm(files))
    log.info('Making dataset registry...')
  else:
    output_dirs = glob.glob(f'{FLAGS.dataset_directory}/*/*/*/surface_samples_from_dodeca.pts')
    output_dirs = [os.path.dirname(f) + '/' for f in output_dirs]
  output_dirs.sort()  # So randomize with a fixed seed always results in the same order
  splits = {x.split('/')[-4] for x in output_dirs}
  if 'optimized' in splits:
    raise ValueError(f'The keyword "optimized" cannot be used for a split name, it is reserved.')
  for split in splits:
    elements_of_split = [x for x in output_dirs if x.split('/')[-4] == split]
    with open(f'{FLAGS.dataset_directory}/{split}.txt', 'wt') as f:
      f.write('\n'.join(elements_of_split) + '\n')
  log.info('Done!')

  if FLAGS.optimize:
    log.info('Precomputing optimized tfrecord files...')
    opt_dir = f'{FLAGS.dataset_directory}/optimized'
    if FLAGS.trample_optimized and os.path.isdir(opt_dir):
      for f in os.listdir(opt_dir):
        if f.endswith('.tfrecords'):
          os.remove(os.path.join(opt_dir, f))
    if not os.path.isdir(opt_dir):
      os.mkdir(opt_dir)
    for split in splits:
      log.info(f'Optimizing split {split}...')
      elements_of_split = [x for x in output_dirs if x.split('/')[-4] == split]
      examples_per_shard=64
      # Make sure shards are totally random:
      random.shuffle(elements_of_split)
      n_shards = int(len(elements_of_split) / examples_per_shard)
      if len(elements_of_split) % examples_per_shard:
        n_shards += 1
      shard_dir = f'{FLAGS.dataset_directory}/optimized/{split}'
      if not os.path.isdir(shard_dir):
        os.mkdir(shard_dir)
      for shard_idx in tqdm.tqdm(range(n_shards)):
        shard_name = f'{shard_dir}/{split}-%.5d-of-%.5d.tfrecords' % (shard_idx, n_shards)
        if not FLAGS.trample_optimized and os.path.isfile(shard_name):
          continue
        start_idx = shard_idx * examples_per_shard
        end_idx = (shard_idx + 1) * examples_per_shard
        options = tf.io.TFRecordOptions(tf.compat.v1.io.TFRecordCompressionType.GZIP)
        with tf.io.TFRecordWriter(shard_name, options=options) as writer:
          to_process = elements_of_split[start_idx:end_idx]
          serialized = Parallel(n_jobs=n_jobs)(delayed(serialize)(d, FLAGS.log_level)
                for d in to_process)
          for s in serialized:
            writer.write(s)
Exemplo n.º 36
0
def create_all_dags(args, neural_factory):
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)
    vocab = jasper_params['labels']
    sample_rate = jasper_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # perturb_config = jasper_params.get('perturb', None)
    train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    data_layer = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=args.train_dataset,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        **train_dl_params,
        # normalize_transcripts=False
    )

    N = len(data_layer)
    steps_per_epoch = math.ceil(N / (args.batch_size * args.iter_per_step * args.num_gpus))
    nemo.logging.info('Have {0} examples to train on.'.format(N))

    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
        sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"],
    )

    multiply_batch_config = jasper_params.get('MultiplyBatch', None)
    if multiply_batch_config:
        multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config)

    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)

    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layers_eval = []

    if args.eval_datasets:
        for eval_datasets in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToTextDataLayer(
                manifest_filepath=eval_datasets,
                sample_rate=sample_rate,
                labels=vocab,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        nemo.logging.warning("There were no val datasets passed")

    jasper_encoder = nemo_asr.JasperEncoder(
        feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"],
    )

    jasper_decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab)
    )

    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))

    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    nemo.logging.info('================================')
    nemo.logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    nemo.logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    nemo.logging.info(
        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
    )
    nemo.logging.info('================================')

    # Train DAG
    (audio_signal_t, a_sig_length_t, transcript_t, transcript_len_t,) = data_layer()
    processed_signal_t, p_length_t = data_preprocessor(input_signal=audio_signal_t, length=a_sig_length_t)

    if multiply_batch_config:
        (processed_signal_t, p_length_t, transcript_t, transcript_len_t,) = multiply_batch(
            in_x=processed_signal_t, in_x_len=p_length_t, in_y=transcript_t, in_y_len=transcript_len_t,
        )

    if spectr_augment_config:
        processed_signal_t = data_spectr_augmentation(input_spec=processed_signal_t)

    encoded_t, encoded_len_t = jasper_encoder(audio_signal=processed_signal_t, length=p_length_t)
    log_probs_t = jasper_decoder(encoder_output=encoded_t)
    predictions_t = greedy_decoder(log_probs=log_probs_t)
    loss_t = ctc_loss(
        log_probs=log_probs_t, targets=transcript_t, input_length=encoded_len_t, target_length=transcript_len_t,
    )

    # Callbacks needed to print info to console and Tensorboard
    train_callback = nemo.core.SimpleLossLoggerCallback(
        tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
        print_func=partial(monitor_asr_train_progress, labels=vocab),
        get_tb_values=lambda x: [("loss", x[0])],
        tb_writer=neural_factory.tb_writer,
    )

    chpt_callback = nemo.core.CheckpointCallback(
        folder=neural_factory.checkpoint_dir, load_from_folder=args.load_dir, step_freq=args.checkpoint_save_freq,
    )

    callbacks = [train_callback, chpt_callback]

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        (audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e,) = eval_dl()
        processed_signal_e, p_length_e = data_preprocessor(input_signal=audio_signal_e, length=a_sig_length_e)
        encoded_e, encoded_len_e = jasper_encoder(audio_signal=processed_signal_e, length=p_length_e)
        log_probs_e = jasper_decoder(encoder_output=encoded_e)
        predictions_e = greedy_decoder(log_probs=log_probs_e)
        loss_e = ctc_loss(
            log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e,
        )

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[loss_e, predictions_e, transcript_e, transcript_len_e,],
            user_iter_callback=partial(process_evaluation_batch, labels=vocab),
            user_epochs_done_callback=partial(process_evaluation_epoch, tag=tagname),
            eval_step=args.eval_freq,
            tb_writer=neural_factory.tb_writer,
        )

        callbacks.append(eval_callback)
    return loss_t, callbacks, steps_per_epoch
Exemplo n.º 37
0
def main(args):

    # Prepare for distributed training
    yolo.init_distributed_mode(args)
    begin_time = time.time()
    print(time.asctime(time.localtime(begin_time)))

    device = torch.device(
        "cuda" if torch.cuda.is_available() and args.use_cuda else "cpu")
    cuda = device.type == "cuda"
    if cuda: yolo.get_gpu_prop(show=True)
    print("\ndevice: {}".format(device))

    # Automatic mixed precision
    args.amp = False
    if cuda and torch.__version__ >= "1.6.0":
        capability = torch.cuda.get_device_capability()[0]
        if capability >= 7:  # 7 refers to RTX series GPUs, e.g. 2080Ti, 2080, Titan RTX
            args.amp = True
            print("Automatic mixed precision (AMP) is enabled!")

    # ---------------------- prepare data loader ------------------------------- #

    # NVIDIA DALI, much faster data loader.
    DALI = cuda & yolo.DALI & args.dali & (args.dataset == "coco")

    # The code below is for COCO 2017 dataset
    # If you're using VOC dataset or COCO 2012 dataset, remember to revise the code
    splits = ("train2017", "val2017")
    file_roots = [os.path.join(args.data_dir, x) for x in splits]
    ann_files = [
        os.path.join(args.data_dir, "annotations/instances_{}.json".format(x))
        for x in splits
    ]
    if DALI:
        # Currently only support COCO dataset; support distributed training

        # DALICOCODataLoader behaves like PyTorch's DataLoader.
        # It consists of Dataset, DataLoader and DataPrefetcher. Thus it outputs CUDA tensor.
        print("Nvidia DALI is utilized!")
        d_train = yolo.DALICOCODataLoader(file_roots[0],
                                          ann_files[0],
                                          args.batch_size,
                                          collate_fn=yolo.collate_wrapper,
                                          drop_last=True,
                                          shuffle=True,
                                          device_id=args.gpu,
                                          world_size=args.world_size)

        d_test = yolo.DALICOCODataLoader(file_roots[1],
                                         ann_files[1],
                                         args.batch_size,
                                         collate_fn=yolo.collate_wrapper,
                                         device_id=args.gpu,
                                         world_size=args.world_size)
    else:
        #transforms = yolo.RandomAffine((0, 0), (0.1, 0.1), (0.9, 1.1), (0, 0, 0, 0))
        dataset_train = yolo.datasets(args.dataset,
                                      file_roots[0],
                                      ann_files[0],
                                      train=True)
        dataset_test = yolo.datasets(args.dataset,
                                     file_roots[1],
                                     ann_files[1],
                                     train=True)  # set train=True for eval

        if args.distributed:
            sampler_train = torch.utils.data.distributed.DistributedSampler(
                dataset_train)
            sampler_test = torch.utils.data.distributed.DistributedSampler(
                dataset_test)
        else:
            sampler_train = torch.utils.data.RandomSampler(dataset_train)
            sampler_test = torch.utils.data.SequentialSampler(dataset_test)

        batch_sampler_train = yolo.GroupedBatchSampler(
            sampler_train,
            dataset_train.aspect_ratios,
            args.batch_size,
            drop_last=True)
        batch_sampler_test = yolo.GroupedBatchSampler(
            sampler_test, dataset_test.aspect_ratios, args.batch_size)

        args.num_workers = min(os.cpu_count() // 2, 8,
                               args.batch_size if args.batch_size > 1 else 0)
        data_loader_train = torch.utils.data.DataLoader(
            dataset_train,
            batch_sampler=batch_sampler_train,
            num_workers=args.num_workers,
            collate_fn=yolo.collate_wrapper,
            pin_memory=cuda)

        data_loader_test = torch.utils.data.DataLoader(
            dataset_test,
            batch_sampler=batch_sampler_test,
            num_workers=args.num_workers,
            collate_fn=yolo.collate_wrapper,
            pin_memory=cuda)

        # cuda version of DataLoader, it behaves like DataLoader, but faster
        # DataLoader's pin_memroy should be True
        d_train = yolo.DataPrefetcher(
            data_loader_train) if cuda else data_loader_train
        d_test = yolo.DataPrefetcher(
            data_loader_test) if cuda else data_loader_test

    args.warmup_iters = max(1000, 3 * len(d_train))

    # -------------------------------------------------------------------------- #

    print(args)
    yolo.setup_seed(args.seed)

    model_sizes = {
        "small": (0.33, 0.5),
        "medium": (0.67, 0.75),
        "large": (1, 1),
        "extreme": (1.33, 1.25)
    }
    num_classes = len(d_train.dataset.classes)
    model = yolo.YOLOv5(num_classes,
                        model_sizes[args.model_size],
                        img_sizes=args.img_sizes).to(device)
    model.transformer.mosaic = args.mosaic

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = {"conv_weights": [], "biases": [], "others": []}
    for n, p in model_without_ddp.named_parameters():
        if p.requires_grad:
            if p.dim() == 4:
                params["conv_weights"].append(p)
            elif ".bias" in n:
                params["biases"].append(p)
            else:
                params["others"].append(p)

    args.accumulate = max(1, round(64 / args.batch_size))
    wd = args.weight_decay * args.batch_size * args.accumulate / 64
    optimizer = torch.optim.SGD(params["biases"],
                                lr=args.lr,
                                momentum=args.momentum,
                                nesterov=True)
    optimizer.add_param_group({
        "params": params["conv_weights"],
        "weight_decay": wd
    })
    optimizer.add_param_group({"params": params["others"]})
    lr_lambda = lambda x: math.cos(math.pi * x / (
        (x // args.period + 1) * args.period) / 2)**2 * 0.9 + 0.1

    print("Optimizer param groups: ", end="")
    print(", ".join("{} {}".format(len(v), k) for k, v in params.items()))
    del params
    torch.cuda.empty_cache()

    ema = yolo.ModelEMA(model)
    ema_without_ddp = ema.ema.module if args.distributed else ema.ema

    start_epoch = 0
    ckpts = yolo.find_ckpts(args.ckpt_path)
    if ckpts:
        checkpoint = torch.load(ckpts[-1],
                                map_location=device)  # load last checkpoint
        model_without_ddp.load_state_dict(checkpoint["model"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        start_epoch = checkpoint["epochs"]
        ema_without_ddp.load_state_dict(checkpoint["ema"][0])
        ema.updates = checkpoint["ema"][1]
        del checkpoint
        torch.cuda.empty_cache()

    since = time.time()
    print("\nalready trained: {} epochs; to {} epochs".format(
        start_epoch, args.epochs))

    # ------------------------------- train ------------------------------------ #

    for epoch in range(start_epoch, args.epochs):
        print("\nepoch: {}".format(epoch + 1))

        if not DALI and args.distributed:
            sampler_train.set_epoch(epoch)

        A = time.time()
        args.lr_epoch = lr_lambda(epoch) * args.lr
        print("lr_epoch: {:.4f}, factor: {:.4f}".format(
            args.lr_epoch, lr_lambda(epoch)))
        iter_train = yolo.train_one_epoch(model, optimizer, d_train, device,
                                          epoch, args, ema)
        A = time.time() - A

        B = time.time()
        eval_output, iter_eval = yolo.evaluate(ema.ema, d_test, device, args)
        B = time.time() - B

        trained_epoch = epoch + 1
        if yolo.get_rank() == 0:
            print("training: {:.2f} s, evaluation: {:.2f} s".format(A, B))
            yolo.collect_gpu_info(
                "yolov5s",
                [args.batch_size / iter_train, args.batch_size / iter_eval])
            print(eval_output.get_AP())

            yolo.save_ckpt(model_without_ddp,
                           optimizer,
                           trained_epoch,
                           args.ckpt_path,
                           eval_info=str(eval_output),
                           ema=(ema_without_ddp.state_dict(), ema.updates))

            # It will create many checkpoint files during training, so delete some.
            ckpts = yolo.find_ckpts(args.ckpt_path)
            remaining = 60
            if len(ckpts) > remaining:
                for i in range(len(ckpts) - remaining):
                    os.system("rm {}".format(ckpts[i]))

    # -------------------------------------------------------------------------- #

    print("\ntotal time of this training: {:.2f} s".format(time.time() -
                                                           since))
    if start_epoch < args.epochs:
        print("already trained: {} epochs\n".format(trained_epoch))
Exemplo n.º 38
0
import torchvision
import yaml

from utils.google_utils import gsutil_getsize
from utils.metrics import fitness
from utils.torch_utils import init_torch_seeds

# Settings
torch.set_printoptions(linewidth=320, precision=5, profile='long')
np.set_printoptions(linewidth=320,
                    formatter={'float_kind': '{:11.5g}'.format
                               })  # format short g, %precision=5
cv2.setNumThreads(
    0
)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(),
                                            8))  # NumExpr max threads


def set_logging(rank=-1):
    logging.basicConfig(
        format="%(message)s",
        level=logging.INFO if rank in [-1, 0] else logging.WARN)


def init_seeds(seed=0):
    # Initialize random number generator (RNG) seeds
    random.seed(seed)
    np.random.seed(seed)
    init_torch_seeds(seed)
Exemplo n.º 39
0
    for i in range(0, 5):
        start_time = time.perf_counter_ns()
        contextSwitch()
        resultsw.append(time.perf_counter_ns() - start_time)
        print(resultsw[i], "ns which is ", resultsw[i] * (10 ** (-9)), "seconds")
    sumThreadsw = 0.0;

    for i in range(0, 5):
        sumThreadsw = sumThreadsw + resultsw[i]

    averageTimesw= sumThreadsw/ 5

    return averageTimesw


print("Number of CPUs:", os.cpu_count())

def migration():
    id1=1
    id2=2
    thread1 = thread("Thread" + str(id1), id1)
    print(str(thread1.thread_name) + " " + str(thread1.thread_ID));

    thread2 = thread("Thread" + str(id2), id2)
    print(str(thread2.thread_name) + " " + str(thread2.thread_ID));
    pid=0
    aff1 = affinity.get_process_affinity_mask(pid)
    print("Thread1 is eligibl to run on:", aff1)

    affinity.set_process_affinity_mask(0, 100)
    print("CPU affinity mask is modified for process id % s" % pid)
Exemplo n.º 40
0
def read_config(path, analyse):
    config = {}

    if not os.path.exists(path):
        _LOGGER.error('%s does not exist' % path)
        exit(-1)
    try:
        with open(path, 'r') as configFile:
            config = json.load(configFile)
    except ValueError:
        _LOGGER.error('Failed to parse config file')
        exit(-1)
    except IOError:
        _LOGGER.error('Failed to read config file')
        exit(-1)

    for key in ['libmusly', 'paths']:
        if not key in config:
            _LOGGER.error("'%s' not in config file" % key)
            exit(-1)

    for key in ['musly', 'lms', 'db']:
        if not key in config['paths']:
            _LOGGER.error("'paths.%s' not in config file" % key)
            exit(-1)
        if (key == 'db' and not os.path.exists(config['paths'][key])) or (
                analyse and key == 'musly'
                and not os.path.exists(config['paths'][key])):
            _LOGGER.error("'%s' does not exist" % config['paths'][key])
            exit(-1)

    for key in config['paths']:
        if not config['paths'][key].endswith('/'):
            config['paths'][key] = config['paths'][key] + '/'

    if 'tmp' in config['paths'] and not os.path.exists(config['paths']['tmp']):
        _LOGGER.error("'%s' does not exist" % config['paths']['tmp'])
        exit(-1)

    if not 'port' in config:
        config['port'] = 11000

    if not 'host' in config:
        config['host'] = '0.0.0.0'

    if not 'threads' in config:
        config['threads'] = os.cpu_count()

    if not 'extractlen' in config:
        config['extractlen'] = 30

    if not 'extractstart' in config:
        config['extractstart'] = -48

    if not 'styletracks' in config:
        config['styletracks'] = 1000

    if 'genres' in config:
        config['all_genres'] = []
        for genres in config['genres']:
            for g in genres:
                if not g in config['all_genres']:
                    config['all_genres'].append(g)

    if 'ignoregenre' in config:
        if isinstance(config['ignoregenre'], list):
            ignore = []
            for item in config['ignoregenre']:
                ignore.append(metadata_db.normalize_artist(item))
            config['ignoregenre'] = ignore
        else:
            config['ignoregenre'] = [config['ignoregenre']]

    if 'normalize' in config:
        metadata_db.set_normalize_options(config['normalize'])

    return config
Exemplo n.º 41
0
from typing import Any, Dict, List, NamedTuple

import click
import yaml
from dotenv import load_dotenv
from pygitguardian.models import PolicyBreak

from .git_shell import get_git_root, is_git_dir
from .text_utils import display_error

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])

# max file size to accept
MAX_FILE_SIZE = 1048576

CPU_COUNT = os.cpu_count() or 1


class Attribute(NamedTuple):
    name: str
    default: Any


def replace_in_keys(data: Dict, old_char: str, new_char: str) -> None:
    """ Replace old_char with new_char in data keys. """
    for key in list(data):
        if old_char in key:
            new_key = key.replace(old_char, new_char)
            data[new_key] = data.pop(key)

import os
import time

import boost_histogram as bh
import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
import pandas as pd
import uproot
from awkward import to_pandas

hep.style.use('ATLAS')

N_THREADS = os.cpu_count()
OUT_DIR = '../outputs/quick_script_outputs/'
DATAFILE = '../data/mc16a_wmintaunu_SLICES/*.root'
LUMI_DATA = 32988.1 + 3219.56
BRANCHES = [
    'MC_WZmu_el_pt_born',
    'MC_WZneutrino_pt_born',
    'MC_WZmu_el_phi_born',
    'MC_WZneutrino_phi_born',
    'MC_WZ_dilep_m_born',
    'mcChannelNumber',
    'weight_mc',
    'KFactor_weight_truth',
    'weight_pileup',
    'eventNumber',
]

# pull root data
Exemplo n.º 43
0
# var='zhangyu'
# if var == 'zhangyu':
#     print("good")

# testname = input("input a name\n")
# print("hi ",testname.upper())

pi = 3.14159
import inspect
inspect.getmembers(pi, predicate=inspect.isbuiltin)
pi.is_integer()
pi.as_integer_ratio()

import os  # importing module
print(os.getcwd())  # gets current working directory
os.cpu_count()
Exemplo n.º 44
0
from sys import platform
if platform == 'darwin': # OSX backend does not support blitting
    import matplotlib
    matplotlib.use('Qt5Agg')
import pickle
import argparse
from multiprocessing import Pool
from SALib.sample import saltelli
from SALib.analyze import sobol
import numpy as np
import pandas as pd
from matplotlib import gridspec
import matplotlib.pyplot as plt
import analytic

default_N = os.cpu_count()
parser = argparse.ArgumentParser()
parser.add_argument("-N", type=int, default=1000,
                    help="obtain N*(2D+2) samples from parameter space")
parser.add_argument("-n", "--ncores", type=int,
                    help="number of cores, defaults to {} on this machine".format(default_N))
parser.add_argument("-o", "--filename", type=str, 
                    help="filename to write output to, no extension",
                    default='analysis')

def run_model(N, Rplus, v, loglam, logDelta, eta_alpha, theta_beta, beta,
              gamma, delta):
    '''This is a wrapper around the analytic traveling wave solution based on the
    parameter space that we are examining. It takes in each parameter we are
    testing and calls the solver.
    It then parses the result into whatever we are testing for sensitivity, and
Exemplo n.º 45
0
def log_execution_env_state(config_paths=None, logdir=None):
    """Log information about the execution environment.

    Files in 'config_paths' will be copied to directory 'logdir'. A common use-case
    is passing the path to a (compression) schedule YAML file. Storing a copy
    of the schedule file, with the experiment logs, is useful in order to
    reproduce experiments.

    Args:
        config_paths: path(s) to config file(s), used only when logdir is set
        logdir: log directory
        git_root: the path to the .git root directory
    """
    def log_git_state():
        """Log the state of the git repository.

        It is useful to know what git tag we're using, and if we have outstanding code.
        """
        try:
            repo = Repo(os.path.join(os.path.dirname(__file__), '..', '..'))
            assert not repo.bare
        except InvalidGitRepositoryError:
            logger.debug(
                "Cannot find a Git repository.  You probably downloaded an archive of CACP."
            )
            return

        if repo.is_dirty():
            logger.debug("Git is dirty")
        try:
            branch_name = repo.active_branch.name
        except TypeError:
            branch_name = "None, Git is in 'detached HEAD' state"
        logger.debug("Active Git branch: %s", branch_name)
        logger.debug("Git commit: %s" % repo.head.commit.hexsha)

    try:
        num_cpus = len(os.sched_getaffinity(0))
    except AttributeError:
        num_cpus = os.cpu_count()
    logger.debug("Number of CPUs: %d", num_cpus)
    logger.debug("Number of GPUs: %d", torch.cuda.device_count())
    logger.debug("CUDA version: %s", torch.version.cuda)
    logger.debug("CUDNN version: %s", torch.backends.cudnn.version())
    logger.debug("Kernel: %s", platform.release())
    if HAVE_LSB:
        logger.debug("OS: %s",
                     lsb_release.get_lsb_information()['DESCRIPTION'])
    logger.debug("Python: %s", sys.version)
    try:
        logger.debug("PYTHONPATH: %s", os.environ['PYTHONPATH'])
    except KeyError:
        pass

    def _pip_freeze():
        return {
            x.key: x.version
            for x in sorted(pkg_resources.working_set,
                            key=operator.attrgetter('key'))
        }

    logger.debug("pip freeze: {}".format(_pip_freeze()))
    log_git_state()
    logger.debug("Command line: %s", " ".join(sys.argv))

    if (logdir is None) or (config_paths is None):
        return

    # clone configuration files to output directory
    configs_dest = os.path.join(logdir, 'configs')

    if isinstance(config_paths, str) or not hasattr(config_paths, '__iter__'):
        config_paths = [config_paths]
    for cpath in config_paths:
        os.makedirs(configs_dest, exist_ok=True)

        if os.path.exists(os.path.join(configs_dest, os.path.basename(cpath))):
            logger.debug('{} already exists in logdir'.format(
                os.path.basename(cpath) or cpath))
        else:
            try:
                shutil.copy(cpath, configs_dest)
            except OSError as e:
                logger.debug('Failed to copy of config file: {}'.format(
                    str(e)))
Exemplo n.º 46
0
    def __init__(self, worker_procs, backend_socket):
        if isinstance(worker_procs, (int, str)):
            self._worker_procs = [str(worker_procs)]
        elif worker_procs is None:
            # from sos notebook with no value specified
            self._worker_procs = [str(min(max(os.cpu_count() // 2, 2), 8))]
        else:
            # should be a sequence
            self._worker_procs = worker_procs

        # the first item in self._worker_procs is always considered to be the localhost, which is where
        # the router lives. The rest of the hosts will be considered as remote workers.
        try:

            self._worker_hosts = []
            self._max_workers = []
            for worker_proc in self._worker_procs:
                if ":" in worker_proc:
                    worker_host, max_workers = worker_proc.rsplit(":", 1)
                    if not max_workers.isdigit():
                        raise ValueError(
                            f'Invalid worker specification {worker_proc}: number of process expected after ":"'
                        )
                    self._worker_hosts.append(worker_host)
                    self._max_workers.append(int(max_workers))
                elif worker_proc.isdigit():
                    self._worker_hosts.append(get_localhost_ip())
                    self._max_workers.append(int(worker_proc))
                else:
                    self._worker_hosts.append(worker_proc)
                    # here we assume that all nodes have the same number of cores so that we use
                    # the default value for master node for all computing nodes
                    self._max_workers.append(min(max(os.cpu_count() // 2, 2), 8))

            self._num_workers = [0 for x in self._worker_procs]
        except Exception:
            raise RuntimeError(
                f"Incorrect format for option -j ({self._worker_procs}), which should be one or more [host:]nproc"
            )

        self._local_workers = []
        self._remote_connections = []

        self._num_remote_workers = {}

        self._n_requested = 0
        self._n_processed = 0

        self._local_worker_alive_time = time.time()
        # self._last_pending_time = {}

        self._substep_requests = []
        self._task_requests = []
        self._step_requests = {}

        self._worker_backend_socket = backend_socket

        # ports of workers working for blocking workflow
        self._blocking_ports = set()

        self._available_ports = set()
        self._claimed_ports = set()

        self._last_pending_msg = {}

        # start a worker, note that we do not start all workers for performance
        # considerations
        self.start_worker()
Exemplo n.º 47
0
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataloader import default_collate
from tqdm import tqdm

import learnergy4video.utils.logging as l
from learnergy4video.utils.collate import collate_fn
from learnergy4video.models.binary import ConvRBM
from learnergy4video.models.real import GaussianConvRBM, SpecConvRBM
from learnergy4video.core import Dataset, Model
import time

import os
workers = os.cpu_count()
if workers == None:
    workers = 0
else:
    workers -= 2

logger = l.get_logger(__name__)

MODELS = {
    'conv_rbm': ConvRBM,
    'cont_conv_rbm': GaussianConvRBM,
    'spec_conv_rbm': SpecConvRBM
}


class SpecCDBN(Model):
Exemplo n.º 48
0
    parser.add_argument('--jobs',
                        '-j',
                        action="store",
                        default=default_num_jobs,
                        type=int,
                        help="Number of jobs to use for running the tests")
    args = parser.parse_args()

    print_progress = print_status_verbose if args.verbose else print_progress_succint

    custom_seastar_args = {
        "sstable_test": ['-c1'],
        'sstable_datafile_test': ['-c1'],
        "sstable_3_x_test": ['-c1'],
        "mutation_reader_test":
        ['-c{}'.format(min(os.cpu_count(), 3)), '-m2G'],
    }

    test_to_run = []
    modes_to_run = all_modes if not args.modes else args.modes
    for mode in modes_to_run:
        prefix = os.path.join('build', mode, 'tests')
        standard_args = '--overprovisioned --unsafe-bypass-fsync 1 --blocked-reactor-notify-ms 2000000'.split(
        )
        seastar_args = '-c2 -m2G'.split()
        for test in other_tests:
            test_to_run.append(
                (os.path.join(prefix, test), 'other',
                 custom_seastar_args.get(test, seastar_args) + standard_args))
        for test in boost_tests:
            test_to_run.append(