Exemplo n.º 1
0
def dump_to_file(entries, package, version_code, version_name):
    lock_acquired = False
    while not lock_acquired:
        try:
            filename = dest + LOCK_PREFIX
            lock = Lock(filename, lifetime=datetime.timedelta(seconds=6000))  # expires in 10 minutes
            if not lock.is_locked:
                lock.lock(timeout=datetime.timedelta(milliseconds=350))
                lock_acquired = True
                with open(dest, 'a') as f:
                    first = True
                    if os.path.exists(dest) and os.path.getsize(dest) > 0:
                        first = False
                    for entry in entries:
                        entry_dict = entry.__dict__
                        entry_dict['package'] = package
                        entry_dict['versionCode'] = version_code
                        entry_dict['versionName'] = version_name
                        if first:
                            first = False
                        else:
                            f.write(",\n")
                        json.dump(entry_dict, f, indent=4)
                if lock.is_locked:
                    lock.unlock()
        except (AlreadyLockedError, TimeOutError):
            # some other process is analyzing the file; go ahead and look for another file
            pass
 def dump_strings(self, entries):
     lock_acquired = False
     while not lock_acquired:
         try:
             filename = self.strings_dest + LOCK_PREFIX
             lock = Lock(filename,
                         lifetime=datetime.timedelta(
                             seconds=6000))  # expires in 10 minutes
             if not lock.is_locked:
                 lock.lock(timeout=datetime.timedelta(milliseconds=350))
                 lock_acquired = True
                 with open(self.strings_dest, 'a') as f:
                     first = True
                     if os.path.exists(
                             self.strings_dest) and os.path.getsize(
                                 self.strings_dest) > 0:
                         first = False
                     for entry in entries:
                         entry_dict = entry.__dict__
                         if first:
                             first = False
                         else:
                             f.write("\n")
                         json.dump(entry_dict, f)
                 if lock.is_locked:
                     lock.unlock()
         except (AlreadyLockedError, TimeOutError):
             # some other process is analyzing the file; go ahead and look for another file
             pass
Exemplo n.º 3
0
def acquire_lock_1(force, lock_file=None):
    """Try to acquire the master lock.

    :param force: Flag that controls whether to force acquisition of the lock.
    :type force: bool
    :param lock_file: Path to the lock file, otherwise `config.LOCK_FILE`.
    :type lock_file: str
    :return: The master lock.
    :raises: `TimeOutError` if the lock could not be acquired.
    """
    if lock_file is None:
        lock_file = config.LOCK_FILE
    lock = Lock(lock_file, LOCK_LIFETIME)
    try:
        lock.lock(timedelta(seconds=0.1))
        return lock
    except TimeOutError:
        if not force:
            raise
        # Force removal of lock first.
        hostname, pid, tempfile = lock.details
        os.unlink(lock_file)
        # Also remove any stale claim files.
        dname = os.path.dirname(lock_file)
        for fname in os.listdir(dname):
            fpath = os.path.join(dname, fname)
            if fpath.startswith(lock_file):
                os.unlink(fpath)
        return acquire_lock_1(force=False)
Exemplo n.º 4
0
def get_next_apk(apks_dir):
    """
    Gets the first available (not-locked) apk files and locks it

    :param apks_dir: directory to scan
    :return: a tuple containing the apk's path and the locked lock
    :rtype: (str, lockfile.LockFile)
    """
    try:
        files = os.listdir(apks_dir)
    except FileNotFoundError:
        # folder doesn't exist
        return None, None
    for f in files:
        if not f.endswith(".apk"):
            continue
        f = os.path.join(apks_dir, f)
        try:
            # lock file should not exist
            filename = f + LOCK_PREFIX

            lock = Lock(filename, lifetime=datetime.timedelta(
                seconds=6000))  # expires in 10 minutes
            if not lock.is_locked:
                lock.lock(timeout=datetime.timedelta(milliseconds=350))
                if os.path.isfile(
                        f
                ):  # the original file could be deleted in the meantime
                    return f, lock
                if lock.is_locked:
                    lock.unlock()
        except (AlreadyLockedError, TimeOutError):
            # some other process is analyzing the file; go ahead and look for another file
            pass
    return None, None
Exemplo n.º 5
0
def load_json(path, require_exclusive=True, lock_path=None):
    """
    Shortcut for loading json from a file path.

    :param path: The full path to the file
    :type: path: str
    :param require_exclusive: lock file for exclusive read
    :type require_exclusive: bool
    :param lock_path: path for the lock file to use
    :type lock_path: string
    :returns: loaded json
    :rtype: dict
    :raises: IOError, ValueError
    """
    lock = None
    if require_exclusive:
        if not lock_path:
            lock_path = get_lock_path(path)
        lock = Lock(lock_path)
        lock.lock()
    try:
        with open(path) as f:
            return json.load(f)
    finally:
        if lock:
            lock.unlock(unconditionally=True)
Exemplo n.º 6
0
    def run_capsule(self, run_info, gpu_idx):
        lock_path = os.path.join(run_info['trial_dir'], 'lock')
        lock = Lock(lock_path, datetime.timedelta(days=365))
        lock.lock(timeout=datetime.timedelta(seconds=1))
        if not lock.is_locked:
            self.print_log('locking failed for', run_info['trial_id'])
            return None

        # run capsule

        env = {'CUDA_VISIBLE_DEVICES': str(gpu_idx),
               'INFR_TRIAL': run_info['trial_id'],
               'INFR_EXP_PATH': self.experiment_dir,
               'INFR_MODE': self.mode,
               'INFR_REDIRECT_IO': '1',
               'INFR_START_STATE': os.path.join(run_info['trial_dir'], 'start_state.json')}
        if self.cuda_sync:
            env['CUDA_LAUNCH_BLOCKING'] = '1'

        proc = subprocess.Popen([sys.executable, '-m', run_info['start_state']['module_name']],
                                env=env)

        self.print_log('started worker', proc.pid, 'for', run_info['trial_id'], self.mode)

        return {'trial_dir': run_info['trial_dir'],
                'trial_id': run_info['trial_id'],
                'start_at': time.time(),
                'lock': lock,
                'gpu_idx': gpu_idx,
                'proc': proc,
                'pid': proc.pid,
                'ret_code': None}
Exemplo n.º 7
0
class NFSFileLock(object):

    def __init__(self, filename):
        self.lock_obj = Lock(filename + '.lock', timedelta(days = 999))

    def lock(self):
        while True:
            try:
                self.lock_obj.lock()
                break
            except OSError as error:
                if errno.ESTALE != error.errno:
                    raise

        # Disowning the lock allows us to delete the flufl.lock.Lock object
        # without it calling the object destructor, which releases the lock. We
        # want to be able to delete the object but keep the lock in order to
        # store the lock inside a file.
        self.lock_obj.disown()

    def unlock(self):
        self.lock_obj.unlock()

    def is_locked(self):
        return self.lock_obj.is_locked
 def test_acquire_lock_1_force(self):
     # Create the lock and lock it.
     my_lock = Lock(self.lock_file)
     my_lock.lock(timedelta(seconds=60))
     # Try to aquire it again with force.
     lock = master.acquire_lock_1(True, self.lock_file)
     self.assertTrue(lock.is_locked)
     lock.unlock()
Exemplo n.º 9
0
    def setupTest(self, param, testDataName):

        # Lock to prevent race condition
        lock_file = "lift/lib/.locks/setup.lock"
        lock = Lock(lock_file)
        lock.lifetime = timedelta(days=2)
        lock.lock()

        # ---------------------------------------------------------------------
        # Base path
        # for dataset
        self.dataset = os.getenv('PROJ_DATA_DIR', '')
        if self.dataset == '':
            self.dataset = os.path.expanduser("~/Datasets")
        self.dataset += "/" + testDataName
        # for temp
        self.temp = os.getenv('PROJ_TEMP_DIR', '')
        if self.temp == '':
            self.temp = os.path.expanduser("~/Temp")
        self.temp += "/" + testDataName
        # for volatile temp
        self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '')
        if self.volatile_temp == '':
            self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp"
        self.volatile_temp += "/" + testDataName

        # ---------------------------------------------------------------------
        # Path for data loading
        self.train_data = None  # block these as they should not be used
        self.train_mask = None  # block these as they should not be used
        self.debug = self.dataset + "/debug/" + self.prefix_dataset(param)

        # ---------------------------------------------------------------------
        # Path for the model learning
        resdir = os.getenv('PROJ_RES_DIR', '')
        if resdir == '':
            resdir = os.path.expanduser("~/Results")
        self.result = (resdir + "/" + self.getResPrefix(param) +
                       self.prefix_dataset(param) + self.prefix_patch(param) +
                       self.prefix_learning(param))
        # Check if the un-sorted prefix exists
        unsorted_hash_path = (resdir + "/" +
                              self.getResPrefix(param, do_sort=False) +
                              self.prefix_dataset(param, do_sort=False) +
                              self.prefix_patch(param, do_sort=False) +
                              self.prefix_learning(param, do_sort=False))

        if os.path.exists(unsorted_hash_path):
            shutil.copytree(unsorted_hash_path, self.result)
            shutil.rmtree(unsorted_hash_path)

        lock.unlock()
Exemplo n.º 10
0
 def test_master_state(self):
     my_lock = Lock(self.lock_file)
     # Mailman is not running.
     state, lock = master.master_state(self.lock_file)
     self.assertEqual(state, master.WatcherState.none)
     # Acquire the lock as if another process had already started the
     # master.  Use a timeout to avoid this test deadlocking.
     my_lock.lock(timedelta(seconds=60))
     try:
         state, lock = master.master_state(self.lock_file)
     finally:
         my_lock.unlock()
     self.assertEqual(state, master.WatcherState.conflict)
Exemplo n.º 11
0
 def test_master_state(self):
     my_lock = Lock(self.lock_file)
     # Mailman is not running.
     state, lock = master.master_state(self.lock_file)
     self.assertEqual(state, master.WatcherState.none)
     # Acquire the lock as if another process had already started the
     # master.
     my_lock.lock()
     try:
         state, lock = master.master_state(self.lock_file)
     finally:
         my_lock.unlock()
     self.assertEqual(state, master.WatcherState.conflict)
Exemplo n.º 12
0
 def test_master_state(self):
     my_lock = Lock(self.lock_file)
     # Mailman is not running.
     state, lock = master.master_state(self.lock_file)
     self.assertEqual(state, master.WatcherState.none)
     # Acquire the lock as if another process had already started the
     # master.
     my_lock.lock()
     try:
         state, lock = master.master_state(self.lock_file)
     finally:
         my_lock.unlock()
     self.assertEqual(state, master.WatcherState.conflict)
Exemplo n.º 13
0
    def setupTrain(self, param, setID):

        # Lock to prevent race condition
        lock_file = "lift/lib/.locks/setup.lock"
        lock = Lock(lock_file)
        lock.lifetime = timedelta(days=2)
        # print(lock.is_locked)
        lock.lock()

        # ---------------------------------------------------------------------
        # Base path
        # for dataset
        self.dataset = os.getenv('PROJ_DATA_DIR', '')
        if self.dataset == '':
            self.dataset = os.path.expanduser("~/Datasets")
        self.dataset += "/" + param.dataset.trainSetList[setID]
        # for temp
        self.temp = os.getenv('PROJ_TEMP_DIR', '')
        if self.temp == '':
            self.temp = os.path.expanduser("~/Temp")
        self.temp += "/" + param.dataset.trainSetList[setID]
        # for volatile temp
        self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '')
        if self.volatile_temp == '':
            self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp"
        self.volatile_temp += "/" + param.dataset.trainSetList[setID]

        # ---------------------------------------------------------------------
        # Path for the model learning
        resdir = os.getenv('PROJ_RES_DIR', '')
        if resdir == '':
            resdir = os.path.expanduser("~/Results")
        self.result = (resdir + "/" + self.getResPrefix(param) +
                       self.prefix_dataset(param) + self.prefix_patch(param) +
                       self.prefix_learning(param))
        if not os.path.exists(self.result):
            # Check if the un-sorted prefix exists
            unsorted_hash_path = (resdir + "/" +
                                  self.getResPrefix(param, do_sort=False) +
                                  self.prefix_dataset(param, do_sort=False) +
                                  self.prefix_patch(param, do_sort=False) +
                                  self.prefix_learning(param, do_sort=False))

            if os.path.exists(unsorted_hash_path):
                shutil.copytree(unsorted_hash_path, self.result)
                shutil.rmtree(unsorted_hash_path)

        lock.unlock()
Exemplo n.º 14
0
    def archive_message(mlist, message):
        """See `IArchiver`.

        This archiver saves messages into a maildir.
        """
        archive_dir = os.path.join(config.ARCHIVE_DIR, 'prototype')
        try:
            os.makedirs(archive_dir, 0o775)
        except OSError as error:
            # If this already exists, then we're fine
            if error.errno != errno.EEXIST:
                raise

        # Maildir will throw an error if the directories are partially created
        # (for instance the toplevel exists but cur, new, or tmp do not)
        # therefore we don't create the toplevel as we did above.
        list_dir = os.path.join(archive_dir, mlist.fqdn_listname)
        mailbox = Maildir(list_dir, create=True, factory=None)
        lock_file = os.path.join(
            config.LOCK_DIR, '{0}-maildir.lock'.format(mlist.fqdn_listname))
        # Lock the maildir as Maildir.add() is not threadsafe.  Don't use the
        # context manager because it's not an error if we can't acquire the
        # archiver lock.  We'll just log the problem and continue.
        #
        # XXX 2012-03-14 BAW: When we extend the chain/pipeline architecture
        # to other runners, e.g. the archive runner, it would be better to let
        # any TimeOutError propagate up.  That would cause the message to be
        # re-queued and tried again later, rather than being discarded as
        # happens now below.
        lock = Lock(lock_file)
        try:
            lock.lock(timeout=timedelta(seconds=1))
            # Add the message to the maildir.  The return value could be used
            # to construct the file path if necessary.  E.g.
            #
            # os.path.join(archive_dir, mlist.fqdn_listname, 'new',
            #              message_key)
            mailbox.add(message)
        except TimeOutError:
            # Log the error and go on.
            log.error('Unable to acquire prototype archiver lock for {0}, '
                      'discarding: {1}'.format(
                          mlist.fqdn_listname,
                          message.get('message-id', 'n/a')))
        finally:
            lock.unlock(unconditionally=True)
        # Can we get return the URL of the archived message?
        return None
Exemplo n.º 15
0
    def archive_message(mlist, message):
        """See `IArchiver`.

        This archiver saves messages into a maildir.
        """
        archive_dir = os.path.join(config.ARCHIVE_DIR, 'prototype')
        try:
            os.makedirs(archive_dir, 0o775)
        except OSError as error:
            # If this already exists, then we're fine
            if error.errno != errno.EEXIST:
                raise

        # Maildir will throw an error if the directories are partially created
        # (for instance the toplevel exists but cur, new, or tmp do not)
        # therefore we don't create the toplevel as we did above.
        list_dir = os.path.join(archive_dir, mlist.fqdn_listname)
        mailbox = Maildir(list_dir, create=True, factory=None)
        lock_file = os.path.join(
            config.LOCK_DIR, '{0}-maildir.lock'.format(mlist.fqdn_listname))
        # Lock the maildir as Maildir.add() is not threadsafe.  Don't use the
        # context manager because it's not an error if we can't acquire the
        # archiver lock.  We'll just log the problem and continue.
        #
        # XXX 2012-03-14 BAW: When we extend the chain/pipeline architecture
        # to other runners, e.g. the archive runner, it would be better to let
        # any TimeOutError propagate up.  That would cause the message to be
        # re-queued and tried again later, rather than being discarded as
        # happens now below.
        lock = Lock(lock_file)
        try:
            lock.lock(timeout=timedelta(seconds=1))
            # Add the message to the maildir.  The return value could be used
            # to construct the file path if necessary.  E.g.
            #
            # os.path.join(archive_dir, mlist.fqdn_listname, 'new',
            #              message_key)
            mailbox.add(message)
        except TimeOutError:
            # Log the error and go on.
            log.error('Unable to acquire prototype archiver lock for {0}, '
                      'discarding: {1}'.format(
                          mlist.fqdn_listname,
                          message.get('message-id', 'n/a')))
        finally:
            lock.unlock(unconditionally=True)
        # Can we get return the URL of the archived message?
        return None
Exemplo n.º 16
0
 def lock(self, timeout=None):
     while True:
         try:
             result = Lock.lock(self, timeout)
         except AlreadyLockedError, e:
             self._sleep()
         else:
             return result
Exemplo n.º 17
0
def set_lock(check_lock_file):
    check_lock = None
    if os.name == "posix":
        check_lock = Lock(check_lock_file)
        check_lock.lifetime = timedelta(hours=1)
        frameinfo = getframeinfo(currentframe())
        print("-- {}/{}: waiting to obtain lock --".format(
            frameinfo.filename, frameinfo.lineno))
        check_lock.lock()
        print(">> obtained lock for posix system<<")
    elif os.name == "nt":
        import filelock
        check_lock = filelock.FileLock(check_lock_file)
        check_lock.timeout = 100  # 100s
        check_lock.acquire()
        if check_lock.is_locked:
            print(">> obtained lock for windows system <<")
    else:
        print("Unknown operating system, lock unavailable")
    return check_lock
class NFSLock:
    def __init__(self, path):
        # Specify the path to a file that will be used to synchronize the lock.
        # Per the flufl.lock documentation, use a file that does not exist.
        self._lock = Lock(path)

        # Locks have a lifetime (default 15 seconds) which is the period of time that the process expects
        # to keep the lock once it has been acquired. We set the lifetime to be 5 minutes as we expect
        # all operations that require locks to be completed within that time.
        self._lock.lifetime = timedelta(minutes=5)

        # Ensure multiple threads within a process run NFSLock operations one at a time.
        # We must acquire the reentrant lock before acquiring the flufl lock and only release after
        # the flufl lock is released.
        self._r_lock = threading.RLock()

    def acquire(self):
        self._r_lock.acquire()
        try:
            self._lock.lock()
        except AlreadyLockedError:
            # Safe to re-attempt to acquire a lock
            pass

    def release(self):
        try:
            self._lock.unlock()
        except NotLockedError:
            # Safe to re-attempt to release a lock
            pass
        self._r_lock.release()

    def __enter__(self):
        self.acquire()

    def __exit__(self, t, v, tb):
        self.release()

    @property
    def is_locked(self):
        return self._lock.is_locked
Exemplo n.º 19
0
def acquire_lock_1(force, lock_file=None):
    """Try to acquire the master lock.

    :param force: Flag that controls whether to force acquisition of the lock.
    :type force: bool
    :param lock_file: Path to the lock file, otherwise `config.LOCK_FILE`.
    :type lock_file: str
    :return: The master lock.
    :raises: `TimeOutError` if the lock could not be acquired.
    """
    if lock_file is None:
        lock_file = config.LOCK_FILE
    lock = Lock(lock_file, LOCK_LIFETIME)
    try:
        lock.lock(timedelta(seconds=0.1))
        return lock
    except TimeOutError:
        if not force:
            raise
        # Force removal of lock first.
        lock.disown()
        hostname, pid, tempfile = lock.details
        os.unlink(lock_file)
        return acquire_lock_1(force=False)
Exemplo n.º 20
0
def acquire_lock_1(force, lock_file=None):
    """Try to acquire the master lock.

    :param force: Flag that controls whether to force acquisition of the lock.
    :type force: bool
    :param lock_file: Path to the lock file, otherwise `config.LOCK_FILE`.
    :type lock_file: str
    :return: The master lock.
    :raises: `TimeOutError` if the lock could not be acquired.
    """
    if lock_file is None:
        lock_file = config.LOCK_FILE
    lock = Lock(lock_file, LOCK_LIFETIME)
    try:
        lock.lock(timedelta(seconds=0.1))
        return lock
    except TimeOutError:
        if not force:
            raise
        # Force removal of lock first.
        lock.disown()
        hostname, pid, tempfile = lock.details
        os.unlink(lock_file)
        return acquire_lock_1(force=False)
Exemplo n.º 21
0
def dmg_signfile(filename,
                 keychain,
                 signing_identity,
                 code_resources,
                 identifier,
                 subject_ou,
                 lockfile,
                 fake=False,
                 passphrase=None):
    """ Sign a mac .app folder
    """
    from flufl.lock import Lock, TimeOutError, NotLockedError
    from datetime import timedelta
    import pexpect

    basename = os.path.basename(filename)
    dirname = os.path.dirname(filename)
    stdout = tempfile.TemporaryFile()

    sign_command = [
        'codesign', '-s', signing_identity, '-fv', '--keychain', keychain,
        '--resource-rules', code_resources, '--requirement',
        MAC_DESIGNATED_REQUIREMENTS % locals(), basename
    ]

    # pexpect requires a string as input
    unlock_command = 'security unlock-keychain ' + keychain
    lock_command = ['security', 'lock-keychain', keychain]
    try:
        sign_lock = None
        try:
            # Acquire a lock for the signing command, to ensure we don't have a
            # race condition where one process locks the keychain immediately after another
            # unlocks it.
            log.debug("Try to acquire %s", lockfile)
            sign_lock = Lock(lockfile)
            # Put a 30 second timeout on waiting for the lock.
            sign_lock.lock(timedelta(0, 30))

            # Unlock the keychain so that we do not get a user-interaction prompt to use
            # the keychain for signing. This operation requires a password.
            child = pexpect.spawn(unlock_command)
            child.expect('password to unlock .*')
            child.sendline(passphrase)
            # read output until child exits
            child.read()
            child.close()
            if child.exitstatus != 0:
                raise ValueError("keychain unlock failed")

            # Execute the signing command
            check_call(sign_command, cwd=dirname, stdout=stdout, stderr=STDOUT)

        except TimeOutError, error:
            # timed out acquiring lock, give an error
            log.exception(
                "Timeout acquiring lock  %s for codesign, is something broken? ",
                lockfile, error)
            raise
        except:
            # catch any other locking error
            log.exception(
                "Error acquiring  %s for codesign, is something broken?",
                lockfile)
            raise
        finally:
            # Lock the keychain again, no matter what happens
            # This command does not require a password
            check_call(lock_command)

            # Release the lock, if it was acquired
            if sign_lock:
                try:
                    sign_lock.unlock()
                    log.debug("Release %s", lockfile)
                except NotLockedError:
                    log.debug("%s was already unlocked", lockfile)
Exemplo n.º 22
0
class TaskQueue(object):
    ''' The actual Task Queue object. See Module docs '''

    def __init__(self, config_file):
        ''' initialise the task queue, from the config file '''
        self.config = Config(config_file)
        self.lock = Lock(pathjoin(self.config.get('DIRS', 'db'),
                         'TaskQueue.lock'))
        self.db = DictLiteStore(pathjoin(self.config.get('DIRS', 'db'),
                                'TaskQueue.db'), 'Tasks')

    def __enter__(self):
        ''' start of with TaskQueue(...) as t: block '''
        self.lock.lock()
        self.db.open()
        return self

    def __exit__(self, exptype, value, tb):
        ''' end of with ... block '''
        self.db.close()
        self.lock.unlock()

    def tasks(self, group=None, state=None):

        q = []
        if group:
            q.append(('group', 'LIKE', NoJSON('%"' + group + '"%')))
        if state:
            q.append(('state', '==', state))

        return self.db.get(*q) # pylint: disable=W0142


    def active_groups(self):
        ''' return a list of all groups currently in the task list, and how
            many tasks they each are running '''

        # grouplist looks like:
        #
        # dict[groupname] -> dict[state] -> count
        # so you can do awesome things.

        grouplist = defaultdict(lambda:defaultdict(lambda:0))

        sql = u'SELECT Tasks."group", Tasks."state" From Tasks'

        try:
            rows = self.db.cur.execute(sql).fetchall()
        except OperationalError as err:
            # usually no such column, which means usually no rows.
            rowcount = self.db.cur.execute(u'SELECT Count(id) FROM Tasks')
            if rowcount.fetchone()[0] == 0:
                return grouplist
            else:
                raise err

        for rawgroups, rawstate in rows:

            groups = json.loads(rawgroups)
            state = json.loads(rawstate)

            if isinstance(groups, list):
                for g in groups:
                    grouplist[g][state] += 1
            else:
                grouplist[groups][state] += 1

        return grouplist

        ######################################
        # If for some reason it would be better to return dicts
        # rather than defaultdicts, then this is the code:
        #
        #to_return = {}
        #for groupname in grouplist:
        #    to_return[groupname] = dict(grouplist[groupname])

        #return to_return

    def grouplimit(self, groupname):
        ''' how many tasks can be run at the same time in this group? '''

        return int(self.config.get(groupname, 'limit', 1))


    def _getnexttask(self, group, new_state='running'):
        ''' get the next 'ready' task of this group. This should ONLY be called
        by self.getnexttask, not by end users. getnexttask checks that limits
        haven't been reached, etc. '''
        try:
            task = self.tasks(group, 'ready')[0]
            if new_state:
                task['state'] = new_state
                self.db.update(task, False, ('uid', '==', task['uid']))

            # Now we are going to start the task, import the defaults from
            # the group config:
            if self.config.config.has_section(group):
                for k, v in self.config.config.items(group):
                    if not k in task:
                        task[k] = v

            # and finally load defaults:
            if self.config.config.has_section('task_defaults'):
                for k, v in self.config.config.items('task_defaults'):
                    if not k in task:
                        task[k] = v

            return task
        except IndexError as err:
            import pdb; pdb.set_trace()
            raise NoAvailableTasks()


    def getnexttask(self, group=None, new_state='running'):
        ''' Get one available next task, as long as 'group' isn't overloaded.
            When the task is 'got', sets the state to new_state in the database.
            So this can be used as an atomic action on tasks. '''

        if group:
            running_tasks = self.active_groups()[group]['running']
            group_limit = self.grouplimit(group)

            if running_tasks < group_limit:
                return self._getnexttask(group, new_state)
            else:
                raise TooBusy()

        else: #no group specified.

            all_groups = self.active_groups()

            for groupname, grouptasks in all_groups.items():

                # already at limit:
                if grouptasks['running'] >= self.grouplimit(groupname):
                    continue

                # no ready tasks:
                if grouptasks['ready'] == 0:
                    continue

                # we have a winner! (a group with available tasks)
                return self._getnexttask(groupname, new_state)

            # if there are no ready tasks at all, then raise that exception

            if all((g['ready'] == 0 for g in all_groups.values())):
                raise NoAvailableTasks()

            # otherwise, there are availible tasks, but we're too busy.

            raise TooBusy()


    def save(self, data):
        ''' add needed fields if they're not there, and then save to the
            database.  If the same uuid is already there, then update it. '''

        if 'state' not in data:
            data['state'] = 'ready'

        if not 'uid' in data:
            data['uid'] = uuid1().hex

        if not 'group' in data:
            data['group'] = 'none'

        # If output files are not absolute paths, then place them in the config
        # file specified logfile directory.

        if 'stdout' in data and not data['stdout'] == abspath(data['stdout']):
            data['stdout'] = abspath(pathjoin(self.config.get('DIRS', 'log'),
                                              data['stdout']))

        if 'stderr' in data and not data['stderr'] == abspath(data['stderr']):
            data['stderr'] = abspath(pathjoin(self.config.get('DIRS', 'log'),
                                              data['stderr']))

        # And save it to the database.

        self.db.update(data, True, ('uid', '==', data['uid']))

        return data

    def get(self, uid):
        ''' get a task based of its uuid '''

        return self.db.get(('uid', '==', uid))
Exemplo n.º 23
0
def dmg_signfile(filename, keychain, signing_identity, code_resources, identifier, subject_ou, lockfile, fake=False, passphrase=None):
    """ Sign a mac .app folder
    """
    from flufl.lock import Lock, TimeOutError, NotLockedError
    from datetime import timedelta
    import pexpect

    basename = os.path.basename(filename)
    dirname = os.path.dirname(filename)
    stdout = tempfile.TemporaryFile()

    sign_command = ['codesign',
                    '-s', signing_identity, '-fv',
                    '--keychain', keychain,
                    '--resource-rules', code_resources,
                    '--requirement', MAC_DESIGNATED_REQUIREMENTS % locals(),
                    basename]

    # pexpect requires a string as input
    unlock_command = 'security unlock-keychain ' + keychain
    lock_command = ['security', 'lock-keychain', keychain]
    try:
        sign_lock = None
        try:
            # Acquire a lock for the signing command, to ensure we don't have a
            # race condition where one process locks the keychain immediately after another
            # unlocks it.
            log.debug("Try to acquire %s", lockfile)
            sign_lock = Lock(lockfile)
            # Put a 30 second timeout on waiting for the lock.
            sign_lock.lock(timedelta(0, 30))

            # Unlock the keychain so that we do not get a user-interaction prompt to use
            # the keychain for signing. This operation requires a password.
            child = pexpect.spawn(unlock_command)
            child.expect('password to unlock .*')
            child.sendline(passphrase)
            # read output until child exits
            child.read()
            child.close()
            if child.exitstatus != 0:
                raise ValueError("keychain unlock failed")

            # Execute the signing command
            check_call(sign_command, cwd=dirname, stdout=stdout, stderr=STDOUT)

        except TimeOutError, error:
            # timed out acquiring lock, give an error
            log.exception("Timeout acquiring lock  %s for codesign, is something broken? ", lockfile, error)
            raise
        except:
            # catch any other locking error
            log.exception("Error acquiring  %s for codesign, is something broken?", lockfile)
            raise
        finally:
            # Lock the keychain again, no matter what happens
            # This command does not require a password
            check_call(lock_command)

            # Release the lock, if it was acquired
            if sign_lock:
                try:
                    sign_lock.unlock()
                    log.debug("Release %s", lockfile)
                except NotLockedError:
                    log.debug("%s was already unlocked", lockfile)
Exemplo n.º 24
0
class DictListFile:
    def __init__(self, filename, header="report('", footer="');"):
        self.filename = Path(filename)
        self.filename.parent.mkdir(parents=True, exist_ok=True)

        lockfilename = f"{filename}.lock"
        self.lock = Lock(str(lockfilename))

        if isinstance(header, str):
            header = header.encode()
        self.header = header

        if isinstance(footer, str):
            footer = footer.encode()
        self.footer = footer

        self.dictlist = None
        self.is_dirty = None

    @classmethod
    @lru_cache(maxsize=128)
    def cached(cls, filename, **kwargs):
        return cls(filename, **kwargs)

    def __enter__(self):
        self.lock.lock()

        self.dictlist = []
        self.is_dirty = False
        if self.filename.is_file():
            with open(str(self.filename), "rb") as fp:
                bytesfromfile = fp.read()
            try:
                if self.header is not None:
                    bytesfromfile = bytesfromfile[len(self.header):]
                if self.footer is not None:
                    bytesfromfile = bytesfromfile[:-len(self.footer)]
                jsonstr = bytesfromfile.decode()
                jsonstr = jsonstr.replace("\\\n", "")
                self.dictlist = json.loads(jsonstr)
            except json.decoder.JSONDecodeError as e:
                logger.warning("JSONDecodeError %s", e)
        return self

    def __exit__(self, *args):
        if self.is_dirty:
            with open(str(self.filename), "w") as fp:
                fp.write(self.header.decode())
                jsonstr = json.dumps(self.dictlist,
                                     indent=4,
                                     sort_keys=True,
                                     ensure_ascii=False)
                for line in jsonstr.splitlines():
                    fp.write(line)
                    fp.write("\\\n")
                fp.write(self.footer.decode())
        try:
            self.lock.unlock()
        except RuntimeError:
            pass
        self.dictlist = None

    def to_table(self):
        dictlist = [{str(k): str(v)
                     for k, v in indict.items()} for indict in self.dictlist]
        dataframe = pd.DataFrame.from_records(dictlist)
        dataframe = dataframe.replace({np.nan: ""})

        columnsinorder = [
            entity for entity in reversed(entities) if entity in dataframe
        ]
        columnsinorder.extend(
            sorted([column for column in dataframe if column not in entities]))

        dataframe = dataframe[columnsinorder]

        table_str = tabulate(dataframe, headers="keys", showindex=False)

        table_filename = self.filename.parent / f"{self.filename.stem}.txt"
        with open(str(table_filename), "w") as fp:
            fp.write(table_str)
            fp.write("\n")

    def put(self, indict):
        assert self.dictlist is not None

        intags = {k: v for k, v in indict.items() if k in entities}

        matches = False

        for i, curdict in enumerate(self.dictlist):
            curtags = {k: v for k, v in curdict.items() if k in entities}

            if set(intags.items()) == set(curtags.items()):
                if set(indict.keys()) == set(curdict.keys()):
                    if all(_compare(v, curdict[k]) for k, v in indict.items()):
                        return  # update not needed

                matches = True

                break

        self.is_dirty = True  # will need to write out file

        if matches:
            self.dictlist[i].update(indict)
            logger.debug(
                f"Updating {self.filename} entry {curdict} with {indict}")
        else:
            self.dictlist.append(indict)
Exemplo n.º 25
0
def dmg_signpackage(pkgfile,
                    dstfile,
                    keychain,
                    mac_id,
                    subject_ou,
                    fake=False,
                    passphrase=None):
    """ Sign a mac build, putting results into `dstfile`.
        pkgfile must be a tar, which gets unpacked, signed, and repacked.
    """
    # Keep track of our output in a list here, and we can output everything
    # when we're done This is to avoid interleaving the output from
    # multiple processes.
    from flufl.lock import Lock, TimeOutError, NotLockedError
    from datetime import timedelta
    import pexpect

    # TODO: Is it even possible to do 'fake' signing?
    logs = []
    logs.append("Repacking %s to %s" % (pkgfile, dstfile))

    # pexpect requires a string as input
    unlock_command = 'security unlock-keychain ' + keychain
    lock_command = ['security', 'lock-keychain', keychain]
    lockfile = os.path.join(os.path.dirname(keychain), '.lock')

    tmpdir = tempfile.mkdtemp()
    try:
        # Unpack it
        logs.append("Unpacking %s to %s" % (pkgfile, tmpdir))
        unpacktar(pkgfile, tmpdir)

        for macdir in os.listdir(tmpdir):
            macdir = os.path.join(tmpdir, macdir)
            log.debug('Checking if we should sign %s', macdir)
            if shouldSign(macdir, 'mac'):
                log.debug('Need to sign %s', macdir)

                try:
                    sign_lock = None
                    # Acquire a lock for the signing command, to ensure we don't have a
                    # race condition where one process locks the keychain immediately after another
                    # unlocks it.
                    log.debug("Try to acquire %s", lockfile)
                    sign_lock = Lock(lockfile)
                    # Put a 30 second timeout on waiting for the lock.
                    sign_lock.lock(timedelta(0, 30))

                    # Unlock the keychain so that we do not get a user-interaction prompt to use
                    # the keychain for signing. This operation requires a password.
                    child = pexpect.spawn(unlock_command)
                    child.expect('password to unlock .*')
                    child.sendline(passphrase)
                    # read output until child exits
                    child.read()
                    child.close()
                    if child.exitstatus != 0:
                        raise ValueError("keychain unlock failed")

                    # Sign the thing!
                    dmg_signfile(macdir, keychain, mac_id, subject_ou, fake)

                except TimeOutError:
                    # timed out acquiring lock, give an error
                    log.exception(
                        "Timeout acquiring lock  %s for codesign, is something broken? ",
                        lockfile)
                    raise
                except:
                    # catch any other locking error
                    log.exception(
                        "Error acquiring  %s for codesign, is something broken?",
                        lockfile)
                    raise
                finally:
                    # Lock the keychain again, no matter what happens
                    # This command does not require a password
                    check_call(lock_command)

                    # Release the lock, if it was acquired
                    if sign_lock:
                        try:
                            sign_lock.unlock()
                            log.debug("Release %s", lockfile)
                        except NotLockedError:
                            log.debug("%s was already unlocked", lockfile)

        # Repack it
        logs.append("Packing %s" % dstfile)
        tar_dir(dstfile, tmpdir)
    except:
        log.exception("Error signing %s", pkgfile)
        raise
    finally:
        # Clean up after ourselves, and output our logs
        shutil.rmtree(tmpdir)
        log.info("\n  ".join(logs))
Exemplo n.º 26
0
    def setupTrain(self, param, dataset_in):

        # Lock to prevent race condition
        if not os.path.exists(".locks"):
            os.makedirs(".locks")
        lock_file = ".locks/setup.lock"
        if os.name == "posix":

            lock = Lock(lock_file)
            lock.lifetime = timedelta(days=2)
            frameinfo = getframeinfo(currentframe())
            print(">> {}/{}: waiting to obtain lock <<".format(
                frameinfo.filename, frameinfo.lineno))
            lock.lock()
            print(">> obtained lock for posix system <<")
        elif os.name == "nt":
            import filelock
            lock = filelock.FileLock(lock_file)
            lock.acquire()
            if lock.is_locked:
                print(">> obtained lock for windows system <<")
        else:
            print("Unknown operating system, lock unavailable")

        # ---------------------------------------------------------------------
        # Base path
        # for dataset
        # self.dataset = os.getenv('PROJ_DATA_DIR', '')
        # if self.dataset == '':
        #     self.dataset = os.path.expanduser("~/Datasets")
        # self.dataset += "/" + str(dataset_in)
        self.dataset = os.path.join(param.data_dir,
                                    str(dataset_in)).rstrip("/") + "/"
        # for temp
        # self.temp = os.getenv('PROJ_TEMP_DIR', '')
        # if self.temp == '':
        #     self.temp = os.path.expanduser("~/Temp")
        # self.temp += "/" + str(dataset_in)
        self.temp = os.path.join(param.temp_dir,
                                 str(dataset_in)).rstrip("/") + "/"
        # for volatile temp
        # self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '')
        # if self.volatile_temp == '':
        #     self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp"
        # self.volatile_temp += "/" + str(dataset_in)
        self.volatile_temp = os.path.join(param.scratch_dir,
                                          str(dataset_in)).rstrip("/") + "/"
        # self.negdata = os.path.expanduser("~/Datasets/NegData/")  # LEGACY

        # # create dump directory if it does not exist
        # if not os.path.exists(self.temp):
        #     os.makedirs(self.temp)

        # ---------------------------------------------------------------------
        # Path for data loading

        # path for synthetic data generation
        self.train_data = self.dataset + "train/" + self.prefix_dataset(param)
        self.train_mask = (self.dataset + "train/" +
                           self.prefix_dataset(param) + "masks/")
        if not os.path.exists(self.train_data):
            # Check if the un-sorted prefix exists
            unsorted_hash_path = (self.dataset + "train/" +
                                  self.prefix_dataset(param, do_sort=False))
            if os.path.exists(unsorted_hash_path):
                os.symlink(unsorted_hash_path.rstrip("/"),
                           self.train_data.rstrip("/"))
                # shutil.copytree(unsorted_hash_path, self.train_data)
                # shutil.rmtree(unsorted_hash_path)

        # dump folder for dataset selection (sampling and etc)
        self.train_dump = self.temp + "train/" + self.prefix_dataset(param)
        if not os.path.exists(self.train_dump):
            # Check if the un-sorted prefix exists
            unsorted_hash_path = (self.temp + "train/" +
                                  self.prefix_dataset(param, do_sort=False))
            if os.path.exists(unsorted_hash_path):
                os.symlink(unsorted_hash_path.rstrip("/"),
                           self.train_dump.rstrip("/"))
                # shutil.copytree(unsorted_hash_path, self.train_dump)
                # shutil.rmtree(unsorted_hash_path)

        # dump folder for patch extraction (if necessary)
        self.patch_dump = (self.temp + "train/" + self.prefix_dataset(param) +
                           self.prefix_patch(param))
        if not os.path.exists(self.patch_dump):
            # Check if the un-sorted prefix exists
            unsorted_hash_path = (self.temp + "train/" +
                                  self.prefix_dataset(param, do_sort=False) +
                                  self.prefix_patch(param, do_sort=False))
            if os.path.exists(unsorted_hash_path):
                os.symlink(unsorted_hash_path.rstrip("/"),
                           self.patch_dump.rstrip("/"))
                # shutil.copytree(unsorted_hash_path, self.patch_dump)
                # shutil.rmtree(unsorted_hash_path)

        # volatile dump folder for patch extraction (if necessary)
        self.volatile_patch_dump = (self.volatile_temp + "train/" +
                                    self.prefix_dataset(param) +
                                    self.prefix_patch(param))
        # if not os.path.exists(self.volatile_patch_dump):
        #     # Check if the un-sorted prefix exists
        #     unsorted_hash_path = (self.volatile_temp + "train/" +
        #                           self.prefix_dataset(param, do_sort=False) +
        #                           self.prefix_patch(param, do_sort=False))
        #     os.symlink(unsorted_hash_path.rstrip("/"),
        #                self.volatile_patch_dump.rstrip("/"))
        #     # shutil.copytree(unsorted_hash_path, self.volatile_patch_dump)
        #     # shutil.rmtree(unsorted_hash_path)

        # debug info folder
        self.debug = self.dataset + "debug/" + self.prefix_dataset(param)
        if not os.path.exists(self.debug):
            # Check if the un-sorted prefix exists
            unsorted_hash_path = (self.dataset + "debug/" +
                                  self.prefix_dataset(param, do_sort=False))
            if os.path.exists(unsorted_hash_path):
                shutil.copytree(unsorted_hash_path, self.debug)
                shutil.rmtree(unsorted_hash_path)

        # # ---------------------------------------------------------------------
        # # Path for the model learning
        # resdir = os.getenv('PROJ_RES_DIR', '')
        # if resdir == '':
        #     resdir = os.path.expanduser("~/Results")
        # self.result = (resdir + "/" +
        #                self.getResPrefix(param) +
        #                self.prefix_dataset(param) +
        #                self.prefix_patch(param) +
        #                self.prefix_learning(param))
        # if not os.path.exists(self.result):
        #     # Check if the un-sorted prefix exists
        #     unsorted_hash_path = (resdir + "/" +
        #                           self.getResPrefix(param, do_sort=False) +
        #                           self.prefix_dataset(param, do_sort=False) +
        #                           self.prefix_patch(param, do_sort=False) +
        #                           self.prefix_learning(param, do_sort=False))

        #     if os.path.exists(unsorted_hash_path):
        #         shutil.copytree(unsorted_hash_path, self.result)
        #         shutil.rmtree(unsorted_hash_path)

        # # create result directory if it does not exist
        # if not os.path.exists(self.result):
        #     os.makedirs(self.result)

        if os.name == "posix":
            lock.unlock()
        elif os.name == "nt":
            lock.release()
        else:
            pass
Exemplo n.º 27
0
def dmg_signpackage(pkgfile, dstfile, keychain, mac_id, subject_ou, fake=False, passphrase=None):
    """ Sign a mac build, putting results into `dstfile`.
        pkgfile must be a tar, which gets unpacked, signed, and repacked.
    """
    # Keep track of our output in a list here, and we can output everything
    # when we're done This is to avoid interleaving the output from
    # multiple processes.
    from flufl.lock import Lock, TimeOutError, NotLockedError
    from datetime import timedelta
    import pexpect

    # TODO: Is it even possible to do 'fake' signing?
    logs = []
    logs.append("Repacking %s to %s" % (pkgfile, dstfile))

    # pexpect requires a string as input
    unlock_command = 'security unlock-keychain ' + keychain
    lock_command = ['security', 'lock-keychain', keychain]
    lockfile = os.path.join(os.path.dirname(keychain), '.lock')

    tmpdir = tempfile.mkdtemp()
    try:
        # Unpack it
        logs.append("Unpacking %s to %s" % (pkgfile, tmpdir))
        unpacktar(pkgfile, tmpdir)


        for macdir in os.listdir(tmpdir):
            macdir = os.path.join(tmpdir, macdir)
            log.debug('Checking if we should sign %s', macdir)
            if shouldSign(macdir, 'mac'):
                log.debug('Need to sign %s', macdir)

                try:
                    sign_lock = None
                    # Acquire a lock for the signing command, to ensure we don't have a
                    # race condition where one process locks the keychain immediately after another
                    # unlocks it.
                    log.debug("Try to acquire %s", lockfile)
                    sign_lock = Lock(lockfile)
                    # Put a 30 second timeout on waiting for the lock.
                    sign_lock.lock(timedelta(0, 30))

                    # Unlock the keychain so that we do not get a user-interaction prompt to use
                    # the keychain for signing. This operation requires a password.
                    child = pexpect.spawn(unlock_command)
                    child.expect('password to unlock .*')
                    child.sendline(passphrase)
                    # read output until child exits
                    child.read()
                    child.close()
                    if child.exitstatus != 0:
                        raise ValueError("keychain unlock failed")

                    # Sign the thing!
                    dmg_signfile(macdir, keychain, mac_id, subject_ou, fake)

                except TimeOutError:
                    # timed out acquiring lock, give an error
                    log.exception("Timeout acquiring lock  %s for codesign, is something broken? ", lockfile)
                    raise
                except:
                    # catch any other locking error
                    log.exception("Error acquiring  %s for codesign, is something broken?", lockfile)
                    raise
                finally:
                    # Lock the keychain again, no matter what happens
                    # This command does not require a password
                    check_call(lock_command)

                    # Release the lock, if it was acquired
                    if sign_lock:
                        try:
                            sign_lock.unlock()
                            log.debug("Release %s", lockfile)
                        except NotLockedError:
                            log.debug("%s was already unlocked", lockfile)


        # Repack it
        logs.append("Packing %s" % dstfile)
        tar_dir(dstfile, tmpdir)
    except:
        log.exception("Error signing %s", pkgfile)
        raise
    finally:
        # Clean up after ourselves, and output our logs
        shutil.rmtree(tmpdir)
        log.info("\n  ".join(logs))
Exemplo n.º 28
0
    def load_data_for_set(self, pathconf, param, mode):

        # ----------------------------------------------------------------------
        # Train, Validation, and Test
        # mlab = matlab.engine.start_matlab()

        # Read from pathconf
        # Original implementation
        train_data_dir = os.path.normpath(pathconf.dataset)
        dump_data_dir = os.path.normpath(pathconf.train_dump)
        dump_patch_dir = os.path.normpath(pathconf.patch_dump)
        # local (or volatile) copy of the dump data
        tmp_patch_dir = os.path.normpath(pathconf.volatile_patch_dump)

        # print("train_data_dir = {}".format(train_data_dir))
        # print("dump_data_dir = {}".format(dump_data_dir))
        # print("dump_patch_dir = {}".format(dump_patch_dir))
        # print("tmp_patch_dir = {}".format(tmp_patch_dir))

        if not os.path.exists(dump_data_dir):
            os.makedirs(dump_data_dir)
        if not os.path.exists(dump_patch_dir):
            os.makedirs(dump_patch_dir)
        if not os.path.exists(tmp_patch_dir):
            os.makedirs(tmp_patch_dir)

        # Check if we have the big h5 file ready
        big_file_name = dump_patch_dir + mode + "-data-chunked.h5"
        # if os.getenv("MLTEST_DEBUG", default=""):
        #     import pdb
        #     pdb.set_trace()

        # Mutex lock
        #
        # We will create an nfs-safe lock file in a temporary directory to
        # prevent our script from using corrupted, or data that is still being
        # generated. This allows us to launch multiple instances at the same
        # time, and allow only a single instance to generate the big_file.
        if not os.path.exists(".locks"):
            os.makedirs(".locks")
        check_lock_file = ".locks/" + \
            hashlib.md5(big_file_name.encode()).hexdigest()
        if os.name == "posix":
            check_lock = Lock(check_lock_file)
            check_lock.lifetime = timedelta(days=2)
            frameinfo = getframeinfo(currentframe())
            print("-- {}/{}: waiting to obtain lock --".format(
                frameinfo.filename, frameinfo.lineno))
            check_lock.lock()
            print(">> obtained lock for posix system<<")
        elif os.name == "nt":
            import filelock
            check_lock = filelock.FileLock(check_lock_file)
            check_lock.timeout = 2000
            check_lock.acquire()
            if check_lock.is_locked:
                print(">> obtained lock for windows system <<")
        else:
            print("Unknown operating system, lock unavailable")

        # if the large training data file does not exist
        if not os.path.exists(big_file_name):
            print("big data file does not exist...")
            # if the patch-mode-data file does not exist
            if not os.path.exists(
                    os.path.join(dump_patch_dir, mode + "-data.h5")):
                print("{0} does not exist...".format(
                    os.path.join(dump_patch_dir, mode + "-data.h5")))

                # Read scale histogram
                hist_file_path = train_data_dir + "scales-histogram-minsc-" + str(
                    param.dataset.fMinKpSize) + ".h5"
                if not os.path.exists(hist_file_path):
                    print("Hist file does not exist, creating...")
                    get_scale_hist(train_data_dir, param)
                # print("Loading hist file...")
                hist_file = h5py.File(hist_file_path, "r")
                scale_hist = np.asarray(hist_file["histogram_bins"],
                                        dtype=float).flatten()
                # print(scale_hist)
                scale_hist /= np.sum(scale_hist)
                scale_hist_c = np.asarray(
                    hist_file["histogram_centers"]).flatten()

                # Read list of images from split files
                split_name = ""
                split_name += str(param.dataset.nTrainPercent) + "-"
                split_name += str(param.dataset.nValidPercent) + "-"
                split_name += str(param.dataset.nTestPercent) + "-"
                if mode == "train":
                    # split_name += "train-"
                    split_name += "train"
                elif mode == "valid":
                    # split_name += "val-"
                    split_name += "val"
                elif mode == "test":
                    # split_name += "test-"
                    split_name += "test"
                print("split_name: {}".format(split_name))
                # split_file_name = train_data_dir + "split-" \
                #     + split_name + "minsc-" \
                #     + str(param.dataset.fMinKpSize) + ".h.txt"
                # split_file_name = "split-" + split_name + "minsc-" + str(param.dataset.fMinKpSize) + ".h.txt"
                split_file_name = "split-" + split_name + ".txt"
                split_file_name = train_data_dir + split_file_name
                # split_file_name = os.path.join(train_data_dir, split_file_name)
                print("split_file_name: {}".format(split_file_name))

                if not os.path.exists(split_file_name):
                    print("split_file_name does not exist...")
                    list_jpg_file = get_list_of_img(train_data_dir,
                                                    dump_data_dir, param, mode)
                else:
                    print("split_file_name exists...")
                    list_jpg_file = []
                    for file_name in list(
                            np.loadtxt(split_file_name, dtype=bytes)):
                        list_jpg_file += [
                            file_name.decode("utf-8").replace(
                                "-kp-minsc-" + str(param.dataset.fMinKpSize),
                                ".jpg")
                        ]

                # -------------------------------------------------
                # Create dumps in parallel
                # I am lazy so create arguments in loop lol
                pool_arg = [None] * len(list_jpg_file)
                for idx_jpg in six.moves.xrange(len(list_jpg_file)):
                    pool_arg[idx_jpg] = (idx_jpg, list_jpg_file[idx_jpg],
                                         train_data_dir, dump_data_dir,
                                         tmp_patch_dir, scale_hist,
                                         scale_hist_c, self.out_dim, param)

                # # if true, use multi thread, otherwise use only single thread
                prod = True
                if prod:
                    number_of_process = int(ratio_CPU * mp.cpu_count())
                    pool = mp.Pool(processes=number_of_process)
                    manager = mp.Manager()
                    queue = manager.Queue()
                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
                        pool_arg[idx_jpg] = pool_arg[idx_jpg] + (queue, )
                    # map async
                    pool_res = pool.map_async(createDump, pool_arg)
                    # pool_res = pool.map_async(createDump, pool_arg, chunksize = int(len(list_jpg_file)/(number_of_process* mp.cpu_count())))
                    # monitor loop
                    while True:
                        if pool_res.ready():
                            print("Pool_res ready?")
                            break
                        else:
                            size = queue.qsize()
                            print("\r -- " + mode +
                                  ": Processing image {}/{}".format(
                                      size, len(list_jpg_file)),
                                  end="")
                            # print(list_jpg_file[size])
                            sys.stdout.flush()
                            time.sleep(1)
                    pool.close()
                    pool.join()
                    print("\r -- " + mode + ": Finished Processing Images!")
                # for debugging, if multi thread is used, then it is difficult
                # to debug
                else:
                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
                        pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None, )
                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):
                        createDump(pool_arg[idx_jpg])
                        print("\r -- " + mode + ": Processing image "
                              "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
                              end="")
                        sys.stdout.flush()
                    print("\r -- " + mode + ": Finished Processing Images!")
                # -------------------------------------------------

                # # --------------------
                # use single thread for simplify debugging
                # for idx_jpg in six.moves.xrange(len(list_jpg_file)):
                #     pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None,)
                # for idx_jpg in six.moves.xrange(len(list_jpg_file)):
                #     createDump(pool_arg[idx_jpg])
                #     print("\r -- " + mode + ": Processing image "
                #           "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
                #           end="")
                #     sys.stdout.flush()
                # print("\r -- " + mode + ": Finished Processing Images!")

                # ------------------------------------------------------------------
                # Use only valid indices to ascertain mutual exclusiveness
                id_file_name = train_data_dir + "split-"
                id_file_name += str(param.dataset.nTrainPercent) + "-"
                id_file_name += str(param.dataset.nValidPercent) + "-"
                id_file_name += str(param.dataset.nTestPercent) + "-"
                id_file_name += ("minsc-" + str(param.dataset.fMinKpSize) +
                                 ".h5")

                if mode == "train":
                    id_key = "indices_train"
                elif mode == "valid":
                    id_key = "indices_val"
                elif mode == "test":
                    id_key = "indices_test"
                # print(id_file_name)
                try:
                    with h5py.File(id_file_name, "r") as id_file:
                        id_2_keep = np.asarray(id_file[id_key])
                except OSError as err:
                    print(err)
                    print("Creating idx file...")
                    # if "unable to open file" in err:
                    createsplitindexh5file(id_file_name, train_data_dir, param)
                    with h5py.File(id_file_name, "r") as id_file:
                        id_2_keep = np.asarray(id_file[id_key])
                        # print(id_2_keep)
                        print("{0} has {1} sfmid points to keep...".format(
                            id_key, len(id_2_keep)))
                # exit()

                # ind_2_keep = np.in1d(dataset[2], id_2_keep)
                # ind_2_keep += dataset[2] < 0

                # loop through files to figure out how many valid items we have
#                pdb.set_trace() # for tracking of the dataset

                num_valid = 0
                # print(len(list_jpg_file))
                # exit()
                for idx_jpg in six.moves.xrange(len(list_jpg_file)):

                    jpg_file = list_jpg_file[idx_jpg]

                    print("\r -- " + mode + ": "
                          "Reading dumps to figure out number of valid "
                          "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
                          end="")
                    sys.stdout.flush()

                    # Load created dump
                    # final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5")
                    # print(tmp_patch_dir)
                    # print(jpg_file)
                    final_dump_file_name = tmp_patch_dir + "\\" + os.path.basename(
                        jpg_file)[:-4] + ".h5"
                    # print(final_dump_file_name)
                    # Use loadh5 and turn it back to original cur_data_set
                    try:
                        with h5py.File(final_dump_file_name, "r") as dump_file:
                            # print(list(dump_file.keys()))
                            cur_ids = dump_file["2"].value
                            # kps = dump_file["valid_keypoints"][()]
                            # cur_ids = np.asarray(kps[:, 4])
                            # print(cur_ids)
                    except OSError as err:
                        # print(err)
                        continue

                    # Find cur valid by looking at id_2_keep
                    cur_valid = np.in1d(cur_ids, id_2_keep)
                    # print(cur_valid)
                    # Add all negative labels as valid (neg data)
                    cur_valid += cur_ids < 0

                    # Sum it up
                    num_valid += np.sum(cur_valid)
                    # print(num_valid)

                print("\n -- " + mode + ": "
                      "Found {} valid data points from {} files"
                      "".format(num_valid, len(list_jpg_file)))

                # Get the first data to simply check the shape
                tmp_dump_file_name = tmp_patch_dir + "\\" + os.path.basename(
                    list_jpg_file[-1])[:-4] + ".h5"
                with h5py.File(tmp_dump_file_name, "r") as dump_file:
                    dataset_shape = []
                    dataset_type = []
                    for _idx in six.moves.xrange(len(dump_file.keys())):
                        dataset_shape += [dump_file[str(_idx)].shape]
                        dataset_type += [dump_file[str(_idx)].dtype]

                # create and save the large dataset chunk
                with h5py.File(big_file_name, "w-") as big_file:
                    big_file["time_stamp"] = np.asarray(time.localtime())
                    name_list = ["x", "y", "ID", "pos", "angle", "coords"]
                    # create the dataset storage chunk
                    for __i in six.moves.xrange(len(dataset_shape)):
                        big_file.create_dataset(
                            name_list[__i],
                            (num_valid, ) + dataset_shape[__i][1:],
                            chunks=(1, ) + dataset_shape[__i][1:],
                            maxshape=((num_valid, ) + dataset_shape[__i][1:]),
                            dtype=dataset_type[__i])
                    # loop through the file to save to a big chunk
                    save_base = 0
                    for idx_jpg in six.moves.xrange(len(list_jpg_file)):

                        jpg_file = list_jpg_file[idx_jpg]

                        print("\r -- " + mode + ": "
                              "Saving the data to the big dump "
                              "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
                              end="")
                        sys.stdout.flush()

                        # Load created dump
                        # final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5")
                        final_dump_file_name = tmp_patch_dir + "\\" + os.path.basename(
                            jpg_file)[:-4] + ".h5"
                        # print(final_dump_file_name)
                        # Use loadh5 and turn it back to original cur_data_set
                        try:
                            tmpdict = loadh5(final_dump_file_name)
                            cur_data_set = tuple([
                                tmpdict[str(_idx)]
                                for _idx in range(len(tmpdict.keys()))
                            ])
                            # Find cur valid by looking at id_2_keep
                            cur_valid = np.in1d(cur_data_set[2], id_2_keep)
                            # Add all negative labels as valid (neg data)
                            cur_valid += cur_data_set[2] < 0
                            for __i in six.moves.xrange(len(dataset_shape)):
                                big_file[name_list[__i]][
                                    save_base:save_base +
                                    np.sum(cur_valid
                                           )] = cur_data_set[__i][cur_valid]
                            # Move base to the next chunk
                            save_base += np.sum(cur_valid)
                        except OSError as err:
                            # print(err)
                            # print("{0} skipped due to invalidity...".format(final_dump_file_name))
                            # sys.stdout.flush()
                            continue

                    # Assert that we saved all
                    assert save_base == num_valid

                print("\n -- " + mode + ": "
                      "Done saving {} valid data points from {} files"
                      "".format(num_valid, len(list_jpg_file)))

                # --------------------------------------------------
                #  Cleanup dump
                for idx_jpg in six.moves.xrange(len(list_jpg_file)):

                    jpg_file = list_jpg_file[idx_jpg]

                    print("\r -- " + mode + ": "
                          "Removing dump "
                          "{}/{}".format(idx_jpg + 1, len(list_jpg_file)),
                          end="")
                    sys.stdout.flush()

                    # Delete dump
                    # final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5")
                    final_dump_file_name = tmp_patch_dir + "\\" + os.path.basename(
                        jpg_file)[:-4] + ".h5"
                    try:
                        os.remove(final_dump_file_name)
                    except FileNotFoundError as err:
                        pass

                print("\r -- " + mode + ": "
                      "Cleaned up dumps! "
                      "Local dump is now clean!")

            else:
                print(" -- Found old file without chunks. "
                      "Copying to new file with chunks...")
                old_big_file_name = dump_patch_dir + mode + "-data.h5"
                with h5py.File(old_big_file_name, "r") as old_big_file, \
                        h5py.File(big_file_name, "w-") as big_file:
                    dataset = []

                    # load old train into array
                    name_list = ["x", "y", "ID", "pos", "angle", "coords"]
                    for __i in six.moves.xrange(len(name_list)):
                        dataset += [np.asarray(old_big_file[name_list[__i]])]

                    # save train
                    big_file["time_stamp"] = np.asarray(time.localtime())

                    # allocate and write
                    for __i in six.moves.xrange(len(name_list)):
                        if name_list[__i] == "x":
                            chunk_shape = (1, ) + dataset[__i].shape[1:]
                        else:
                            chunk_shape = None
                        big_file.create_dataset(
                            name_list[__i],
                            dataset[__i].shape,
                            data=dataset[__i],
                            chunks=chunk_shape,
                            maxshape=dataset[__i].shape,
                        )

                print(" -- Finished creating chunked file, removing old...")
                os.remove(old_big_file_name)

        # ----------------------------------------------------------------------
        # Copy to local tmp if necessary
        if not os.path.exists(tmp_patch_dir + mode + "-data-chunked.h5"):
            print(" -- " + mode + ": "
                  "Local dump does not exist! "
                  "Copying big dump to local drive... ")
            shutil.copy(dump_patch_dir + mode + "-data-chunked.h5",
                        tmp_patch_dir + mode + "-data-chunked.h5")
        else:
            print(" -- " + mode + ": "
                  "Local dump exists. Checking timestamp...")

            # get timestamp from nfs
            with h5py.File(dump_patch_dir + mode + "-data-chunked.h5", "r") \
                    as nfs_file:
                nfs_time = np.asarray(nfs_file["time_stamp"])

            # get timestamp from local
            with h5py.File(tmp_patch_dir + mode + "-data-chunked.h5", "r") \
                    as local_file:
                local_time = np.asarray(local_file["time_stamp"])

            # if the two files have different time stamps
            if any(nfs_time != local_time):
                print(" -- " + mode + ": "
                      "Time stamps are different! "
                      "Copying big dump to local drive... ")
                shutil.copy(dump_patch_dir + mode + "-data-chunked.h5",
                            tmp_patch_dir + mode + "-data-chunked.h5")
            else:
                print(" -- " + mode + ": "
                      "Time stamps are identical! Re-using local dump")

        # Free lock
        if os.name == "posix":
            check_lock.unlock()
            print("-- free lock --")
        elif os.name == "nt":
            check_lock.release()
            print("-- free lock --")
        else:
            pass
        # ----------------------------------------------------------------------
        # Use local copy for faster speed
        print(" -- " + mode + ": Loading from local drive... ")
        big_file_name = tmp_patch_dir + mode + "-data-chunked.h5"

        # open big_file and don"t close
        big_file = h5py.File(big_file_name, "r")

        x = big_file["x"]
        # work arround for h5py loading all things to memory
        read_batch_size = 10000
        read_batch_num = int(
            np.ceil(float(big_file["x"].shape[0]) / float(read_batch_size)))

        # Manual, since I don't want to bother debugging the below
        # fields = ["y", "ID", "pos", "angle", "coords"]
        # for var_name in fields:
        #     # allocate data
        #     exec("{0} = np.zeros(big_file['{0}'].shape, "
        #          "dtype=big_file['{0}'].dtype)".format(var_name))
        #     # copy data in batches
        #     for idx_batch in six.moves.xrange(read_batch_num):
        #         idx_s = idx_batch * read_batch_size
        #         idx_e = (idx_batch + 1) * read_batch_size
        #         idx_e = np.minimum(idx_e, big_file["x"].shape[0])
        #         exec("{0}[idx_s:idx_e] = np.asarray(big_file['{0}'][idx_s:idx_e])"
        #              "". format(var_name))

        # Allocate
        y = np.zeros(big_file["y"].shape, dtype=big_file["y"].dtype)
        ID = np.zeros(big_file["ID"].shape, dtype=big_file["ID"].dtype)
        pos = np.zeros(big_file["pos"].shape, dtype=big_file["pos"].dtype)
        angle = np.zeros(big_file["angle"].shape,
                         dtype=big_file["angle"].dtype)
        coords = np.zeros(big_file["coords"].shape,
                          dtype=big_file["coords"].dtype)

        # Copy data in batches
        for idx_batch in six.moves.xrange(read_batch_num):
            idx_s = idx_batch * read_batch_size
            idx_e = (idx_batch + 1) * read_batch_size
            idx_e = np.minimum(idx_e, big_file["x"].shape[0])

            y[idx_s:idx_e] = np.asarray(big_file['y'][idx_s:idx_e])
            ID[idx_s:idx_e] = np.asarray(big_file['ID'][idx_s:idx_e])
            pos[idx_s:idx_e] = np.asarray(big_file['pos'][idx_s:idx_e])
            angle[idx_s:idx_e] = np.asarray(big_file['angle'][idx_s:idx_e])
            coords[idx_s:idx_e] = np.asarray(big_file['coords'][idx_s:idx_e])

        #     import pdb
        #     pdb.set_trace()

        # # Make sure data is contiguos
        # y = np.ascontiguousarray(y)
        # ID = np.ascontiguousarray(ID)
        # pos = np.ascontiguousarray(pos)
        # angle = np.ascontiguousarray(angle)
        # coords = np.ascontiguousarray(coords)

        print(" -- " + mode + ": Done... ")

        return x, y, ID, pos, angle, coords
Exemplo n.º 29
0
class SafeUpdater(object):
    """
    Container for the safe update functionality.
    """

    def __init__(self, lifetime=LOCK_LIFETIME, timeout=LOCK_TIMEOUT):
        """
        Create a SafeUpdater with the given lock lifetime and timeout
        (see flufl.lock documentation). The defaults are the lifetime
        and timeout found in the config.
        """
        self.lock = Lock(LOCK_FILE, lifetime=lifetime)
        self.timeout = timeout

    def clone_repo(self, repo):
        """
        Clone a repository on the disk.
        The parent directories are created under the cloning directory from
        config. For example, the repository name might be "devs/joe/task1".
        If "devs/joe" was not a directory under the cloning directory, it is
        created.

        Raise an exception on failure.
        """

        # Get the full path for "devs/joe/task1".
        repo_path = os.path.abspath(os.path.join(CLONE_DIR, repo))

        # Sanity check - make sure the path is actually in the cloning
        # directory.
        if not repo_path.startswith(CLONE_DIR):
            raise Exception("Illegal base path for repo: %s" % repo_path)

        # Clone the task into the desired directory.
        # git will create the subdirectories if needed.
        SafeUpdater.run(["git", "clone",
                         "gitolite3@localhost:%s" % repo,
                         repo_path])

    def update_repo(self, repo, allow_clone):
        """
        Update a repository on the disk. If allow_clone is set, and the
        repository doesn't exist on the disk yet, it will be cloned
        with the clone_repo method.

        Raise an exception on failure.
        """

        # Clone if needed. If the repository doesn't exist and we shouldn't
        # clone, raise an error for the caller.
        repo_path = os.path.join(CLONE_DIR, repo)
        if not os.path.isdir(repo_path):
            if allow_clone:
                # We only need to clone in this case.
                # Pulling (below) is not relevant.
                self.clone_repo(repo)
                return
            else:
                raise Exception("Directory doesn't exist, "
                                "and allow_clone is false: %s" % repo_path)

        # Make sure the repository is up to date.
        # We temporarily change the working directory, for git.
        old_working_dir = os.getcwd()
        os.chdir(repo_path)
        try:
            # We want to "git pull" here, except that repositories may have
            # been updated with force, and we want the newest version.
            SafeUpdater.run(["git", "fetch", "origin"])
            SafeUpdater.run(["git", "reset", "--hard", "origin/master"])
        finally:
            os.chdir(old_working_dir)

    def generate_task(self, repo, update, allow_clone, gen_dir=None):
        """
        Generate a task on the disk with TaskSandbox into gen_dir.
        If update is true, we first update the task.
        If both update and allow_clone are true, and the task
        doesn't exist on the disk yet, it will be cloned with
        the clone_repo method.

        If gen_dir is not specified, the default of TaskSandbox is used
        (auto.gen inside the task directory).

        Raise an exception on failure.
        """

        if update:
            self.update_repo(repo, allow_clone)

        repo_path = os.path.abspath(os.path.join(CLONE_DIR, repo))
        if not os.path.isdir(repo_path):
            raise Exception("Task directory not found: %s" % repo_path)

        TaskSandbox.execute(repo_path, gen_dir=gen_dir)

    def update_contest(self, repo, update, generate, add_new_users,
                       update_users, auto_submit, auto_submit_new,
                       auto_submit_all=False):
        """
        Update a contest and its tasks on the database.
        This should be done after generating newly updated tasks
        with TaskSandbox, in order to update CMS.

        If generate is true, tasks are updated and generated
        (cloned if needed).

        The contest repository itself is updated (cloned if needed),
        if update is true.

        If update_users is true, the users repository is updated,
        and the contest's users are updated. Users are never modified
        or deleted (this requires manual action).

        auto_submit_tasks is a set/list of task repositories for which
        auto_submit will be invoked.

        If auto_submit_new is given, auto_submit will also be invoked
        for tasks that were not in the contest before.

        Raise an exception on failure.
        """

        # Update/clone contest.
        if update:
            self.update_repo(repo, allow_clone=True)

        # Get contest module.
        repo_path = os.path.abspath(os.path.join(CLONE_DIR, repo))
        module_path = os.path.join(repo_path, "module.yaml")

        # Read contest params.
        with open(module_path) as stream:
            contest_params = yaml.safe_load(stream)

        # Update/clone users, and add them.
        if add_new_users:
            self.add_new_users(contest_params["users_file"], update_users,
                               contest_params["short_name"])

        if generate:
            # Clone and generate tasks.
            for task in contest_params["tasks"]:
                task_repo = task["path"]
                self.generate_task(task_repo, update=True, allow_clone=True)

        # Fetch the tasks that were already in the contest before.
        # If an exception is raised, this contest is not yet in the database.
        contest_name = contest_params["short_name"]
        try:
            existing_tasks = set(get_contest_tasks(contest_name))
        except Exception:
            existing_tasks = set()

        # Note: cmsImportContest drops participations when updating
        # a contest. We can give the --update-contest flag because
        # our cmsImportContest script was modified to ignore
        # participations. See issue #775.
        SafeUpdater.run(["cmsImportContest",
                         "--import-tasks",
                         "--update-tasks",
                         "--update-contest",
                         repo_path])

        # Invoke auto_submit for every task that didn't exist
        # in the contest before, and every task in auto_submit.
        for task in contest_params["tasks"]:
            is_new = task["short_name"] not in existing_tasks
            should_submit = auto_submit_all
            should_submit |= auto_submit_new and is_new
            should_submit |= task["path"] in auto_submit
            if should_submit:
                self.auto_submit(contest_name, task)

    def auto_submit(self, contest_name, task_info):
        """
        Perform auto submission for the given contest and task,
        removing previous auto submissions from the database.
        If the task does not specify any solutions to auto submit,
        do nothing.
        """

        username = "******"
        task_name = task_info["short_name"]
        task_dir = os.path.join(CLONE_DIR, task_info["path"])
        processor = create_processor(task_dir)

        # For each submission, we convert the list of files to a dictionary
        # that maps the submission filename to the path. For example:
        # {"Task.%l": "path/to/sol.cpp"}
        # This relies on the type being batch, with a single file
        # per submission.
        auto_submit_items = []
        for item in processor.get_auto_submit_items():
            file_path = item["files"][0]
            auto_submit_items += [{"Task.%l": file_path}]

        if not auto_submit_items:
            return

        if not remove_submissions(contest_name, task_name, username):
            raise Exception("Auto submission failed: could not remove old "
                            "submissions, they are in progress.")
        add_submissions(contest_name, task_name, username, auto_submit_items)

    def add_new_users(self, users_file, update_repo, contest_name=None):
        """
        Add the users in the given YAML path to the database.
        Users that already exist are ignored.
        This never deletes or modifies existing users.

        If update_repo is true, update/clone the users repository first.

        If contest_name is given and it exists, add participations too.

        Raise an exception on failure.
        """

        # Update the users repository.
        if update_repo:
            self.update_repo("users", allow_clone=True)

        # Get the information from the users file.
        yaml_path = os.path.join(CLONE_DIR, users_file)
        with open(yaml_path) as stream:
            users_info = yaml.safe_load(stream)

        add_users(users_info, contest_name)

    def __enter__(self):
        """
        Lock when starting a "with" block.
        """
        self.lock.lock(timeout=self.timeout)
        return self

    def __exit__(self, exc_type, exc_val, traceback):
        """
        Unlock when finishing a "with" block.
        Any exceptions are raised to the caller.
        """
        self.lock.unlock()
        return False

    @staticmethod
    def run(commands, input_string="", fail_abort=True):
        """
        Run the given commands as a subprocess, wait for it to finish.
        If fail_abort is set, then a non-zero return code will trigger
        an exception.
        Return (return_code, stdout, stderr).
        """
        process = subprocess.Popen(commands,
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        stdout, stderr = process.communicate(input=input_string)
        return_code = process.returncode
        if return_code != 0 and fail_abort:
            raise Exception("Command returned non-zero: %s\n"
                            "Return code: %s\n"
                            "Stdout: %s\n"
                            "Stderr: %s\n" %
                            (commands, return_code, stdout, stderr))
        return (return_code, stdout, stderr)