Пример #1
0
    def load_data(self):
        self.original_labels = []
        if (not os.path.exists(self.original_labels_filename)
                or self.original_labels_filename is None):
            raise OSError(
                "original_labels_filename %s does not exist or None."
                " Please specify path to file with labels. If you don't have "
                "pickle with labels, generate it with preparation_imagenet.py"
                % self.original_labels_filename)
        if (not os.path.exists(self.count_samples_filename)
                or self.count_samples_filename is None):
            raise OSError(
                "count_samples_filename %s does not exist or None. Please "
                "specify path to file with count of samples. If you don't "
                "have json file with count of samples, generate it with "
                "preparation_imagenet.py" % self.count_samples_filename)
        if (not os.path.exists(self.samples_filename)
                or self.samples_filename is None):
            raise OSError("samples_filename %s does not exist or None. Please "
                          "specify path to file with samples. If you don't "
                          "have dat file with samples, generate it with "
                          "preparation_imagenet.py" % self.samples_filename)
        with open(self.original_labels_filename, "rb") as fin:
            for lbl in pickle.load(fin):
                self.original_labels.append(lbl)
                self.labels_mapping[int(lbl)] = int(lbl)
        self.info("Labels (min max count): %d %d %d",
                  numpy.min(self.original_labels),
                  numpy.max(self.original_labels), len(self.original_labels))

        with open(self.count_samples_filename, "r") as fin:
            for key, value in (json.load(fin)).items():
                set_type = {"test": 0, "val": 1, "train": 2}
                self.class_lengths[set_type[key]] = value
        self.info("Class Lengths: %s", str(self.class_lengths))

        if self.total_samples != len(self.original_labels):
            raise error.Bug(
                "Number of labels missmatches sum of class lengths")

        with open(self.matrixes_filename, "rb") as fin:
            matrixes = pickle.load(fin)

        self.mean.mem = matrixes[0]
        self.rdisp.mem = matrixes[1].astype(
            opencl_types.dtypes[root.common.engine.precision_type])
        if numpy.count_nonzero(numpy.isnan(self.rdisp.mem)):
            raise ValueError("rdisp matrix has NaNs")
        if numpy.count_nonzero(numpy.isinf(self.rdisp.mem)):
            raise ValueError("rdisp matrix has Infs")
        if self.mean.shape != self.rdisp.shape:
            raise ValueError("mean.shape != rdisp.shape")
        if self.mean.shape[0] != self.sy or self.mean.shape[1] != self.sx:
            raise ValueError("mean.shape != (%d, %d)" % (self.sy, self.sx))

        self.file_samples = open(self.samples_filename, "rb")
        if (self.file_samples.seek(0, 2) //
            (self.sx * self.sy * self.channels) != len(self.original_labels)):
            raise error.Bug("Wrong data file size")
Пример #2
0
    def load_data(self):
        if (self.original_labels_filename is None
                or not os.path.exists(self.original_labels_filename)):
            raise OSError(
                "original_labels_filename %s does not exist or None."
                " Please specify path to file with labels. If you don't have "
                "pickle with labels, generate it with preparation_imagenet.py"
                % self.original_labels_filename)
        if (self.count_samples_filename is None
                or not os.path.exists(self.count_samples_filename)):
            raise OSError(
                "count_samples_filename %s does not exist or None. Please "
                "specify path to file with count of samples. If you don't "
                "have json file with count of samples, generate it with "
                "preparation_imagenet.py" % self.count_samples_filename)
        if (self.samples_filename is None
                or not os.path.exists(self.samples_filename)):
            raise OSError("samples_filename %s does not exist or None. Please "
                          "specify path to file with samples. If you don't "
                          "have dat file with samples, generate it with "
                          "preparation_imagenet.py" % self.samples_filename)
        with open(self.original_labels_filename, "rb") as fin:
            for lbls in pickle.load(fin):
                txt_lbl, int_lbl = lbls
                self._original_labels_.append(txt_lbl)
                self.labels_mapping[txt_lbl] = int(int_lbl)
        for _ in range(len(self.labels_mapping)):
            self.reversed_labels_mapping.append(None)
        for key, val in self.labels_mapping.items():
            self.reversed_labels_mapping[val] = key

        with open(self.count_samples_filename, "r") as fin:
            for key, value in (json.load(fin)).items():
                set_type = {"test": 0, "val": 1, "train": 2}
                self.class_lengths[set_type[key]] = value
        self.info("Class Lengths: %s", str(self.class_lengths))

        for lbl in self._original_labels_[self.class_lengths[0] +
                                          self.class_lengths[1]:]:
            self._train_different_labels_[lbl] += 1

        if self.total_samples != len(self._original_labels_):
            raise error.Bug(
                "Number of labels missmatches sum of class lengths")

        self._file_samples_ = open(self.samples_filename, "rb")

        number_of_samples = (self._file_samples_.seek(0, 2) //
                             (self.sx * self.sy * self.channels))

        if number_of_samples != len(self._original_labels_):
            raise error.Bug(
                "Wrong data file size: %s (original data) != %s (original "
                "labels)" % (number_of_samples, len(self._original_labels_)))
Пример #3
0
 def class_index_by_sample_index(self, index):
     for class_index, class_offset in enumerate(
             self.effective_class_end_offsets):
         if index < class_offset:
             return class_index, class_offset - index
     raise error.Bug("Could not convert sample index to class index, "
                     "probably due to incorrect class_end_offsets.")
Пример #4
0
 def class_ended(self):
     for offset in self.effective_class_end_offsets:
         if self.global_offset == offset:
             return True
         if self.global_offset < offset:
             return False
     raise error.Bug("global_offset %d is out of bounds %s" %
                     (self.global_offset, self.effective_class_end_offsets))
Пример #5
0
 def _gpu_fill(self, nbytes):
     bytes_per_round = self.num_states * 16 * 8
     nbytes = roundup(nbytes, bytes_per_round)
     if nbytes > self.output.nbytes:
         raise error.Bug("nbytes > self.output.nbytes")
     self.unmap_vectors(self.states, self.output)
     self.cl_const[0] = nbytes // bytes_per_round
     self.set_arg(1, self.cl_const)
     self.execute_kernel(self._global_size, self._local_size)
Пример #6
0
 def apply_data_from_slave(self, data, slave):
     if slave is None:
         # Partial update
         return
     try:
         self.minibatch_offset, self.minibatch_size = \
             self.pending_minibatches_[slave.id].pop()
     except KeyError:
         raise error.Bug("pending_minibatches_ does not contain %s" %
                         slave.id)
     self._on_successful_serve()
     if not self.has_data_for_slave:
         self.has_data_for_slave = self.last_minibatch
Пример #7
0
    def numpy_fill(self, nbytes):
        bytes_per_round = self.num_states * 16 * 8
        nbytes = roundup(nbytes, bytes_per_round)
        if nbytes > self.output.nbytes:
            raise error.Bug("nbytes > self.output.nbytes")
        self.states.map_write()
        self.output.map_invalidate()
        n_rounds = nbytes // bytes_per_round

        u64 = numpy.array([1181783497276652981], dtype=numpy.uint64)
        s0 = numpy.zeros(1, dtype=numpy.uint64)
        s1 = numpy.zeros(1, dtype=numpy.uint64)

        states = self.states.mem.view(dtype=numpy.uint64)
        states = states.reshape(states.size // 16, 16)
        output = self.output.mem.view(dtype=numpy.uint64)
        for i in range(self.num_states):
            offs = i
            s = states[i]
            self.p = 0
            for _round in range(n_rounds):
                for _iter in range(16):
                    output[offs] = self._next_rand(s, s0, s1, u64)
                    offs += self.num_states
Пример #8
0
 def fill_minibatch(self):
     # minibatch was filled in fill_indices, so fill_minibatch not need
     raise error.Bug("Control should not go here")
Пример #9
0
class VelesProtocol(StringLineReceiver, IDLogger):
    """A communication controller from client to server.

    Attributes:
        FSM_DESCRIPTION     The definition of the Finite State Machine of the
                            protocol.
    """
    def onFSMStateChanged(self, e):
        """
        Logs the current state transition.
        """
        self.debug("state: %s, %s -> %s", e.event, e.src, e.dst)

    FSM_DESCRIPTION = {
        'initial':
        'INIT',
        'events': [
            {
                'name': 'disconnect',
                'src': '*',
                'dst': 'ERROR'
            },
            {
                'name': 'close',
                'src': '*',
                'dst': 'END'
            },
            {
                'name': 'reconnect',
                'src': '*',
                'dst': 'INIT'
            },
            {
                'name': 'request_id',
                'src': ['INIT', 'WAIT'],
                'dst': 'WAIT'
            },
            {
                'name': 'send_id',
                'src': 'INIT',
                'dst': 'WAIT'
            },
            {
                'name': 'request_job',
                'src': ['WAIT', 'POSTPONED'],
                'dst': 'GETTING_JOB'
            },
            {
                'name': 'obtain_job',
                'src': 'GETTING_JOB',
                'dst': 'BUSY'
            },
            {
                'name': 'refuse_job',
                'src': 'GETTING_JOB',
                'dst': 'END'
            },
            {
                'name': 'postpone_job',
                'src': 'GETTING_JOB',
                'dst': 'POSTPONED'
            },
            {
                'name': 'complete_job',
                'src': 'BUSY',
                'dst': 'WAIT'
            },
        ],
        'callbacks': {
            'onchangestate': onFSMStateChanged
        }
    }

    def __init__(self, addr, host):
        """
        Initializes the protocol.

        Parameters:
            addr        The address of the server (reported by Twisted).
            factory     The factory which produced this protocol.
        """
        super(VelesProtocol, self).__init__(logger=host.logger)
        self.addr = addr
        self.host = host
        self._last_update = None
        self.state = host.state
        self._current_deferred = None
        self._power_upload_time = 0
        self._power_upload_threshold = 60
        self.rand = get_rg()

    def connectionMade(self):
        self.info("Connected in %s state", self.state.current)
        self.disconnect_time = None
        if self.id is None:
            self.request_id()
            return
        self.send_id()
        self.state.send_id()

    def connectionLost(self, reason):
        self.debug("Connection was lost")
        if self._current_deferred is not None:
            self._current_deferred.cancel()

    def lineReceived(self, line):
        self.debug("lineReceived:  %s", line)
        msg = json.loads(line.decode("utf-8"))
        if not isinstance(msg, dict):
            self.error("Could not parse the received line, dropping it")
            return
        err = msg.get("error")
        if err:
            self.disconnect("Server returned error: '%s'", err)
            return
        if self.state.current == "WAIT":
            if msg.get("reconnect") == "ok":
                if self.id is None:
                    self.error("Server returned a successful reconnection, "
                               "but my ID is None")
                    self.request_id()
                    return
                self.request_job()
                return
            cid = msg.get("id")
            if cid is None:
                self.error("No ID was received in WAIT state")
                self.request_id()
                return
            self.id = cid
            self.debug("Received ID")
            log_id = msg.get("log_id")
            if log_id is None:
                self.error("No log ID was received in WAIT state")
                self.request_id()
                return
            self.host.on_id_received(self.id, log_id)
            endpoint = msg.get("endpoint")
            if endpoint is None:
                self.error("No endpoint was received")
                self.request_id()
                return
            self.host.zmq_connection = self.zmq_connection = ZmqDealer(
                cid, self, ZmqEndpoint("connect", endpoint))
            self.info("Connected to ZeroMQ endpoint %s", endpoint)
            data = msg.get('data')
            if data is not None:
                self._set_deferred(
                    self.host.workflow.apply_initial_data_from_master, data)
            self.request_job()
            return
        self.disconnect("disconnect: invalid state %s", self.state.current)

    def job_received(self, job):
        if not job:
            # False, None or empty string mean job refusal
            self.info("Job was refused")
            self.state.refuse_job()
        elif job == b"NEED_UPDATE":
            self.debug("Master returned NEED_UPDATE, will repeat the job "
                       "request in update_result_received()")
            self.state.postpone_job()
        else:
            try:
                self.state.obtain_job()
            except fysom.FysomError as e:
                self.warning("Job was received too late or too early: %s", e)
                return
        update = self._last_update
        if self.host. async and update is not None:
            self.request_update()
        if job == b"NEED_UPDATE":
            return
        if not job:
            # No jobs are available => terminate itself
            self.host.launcher.stop()
            return
        try:
            if self.host.death_probability > 0 and \
                    self.rand.random() < self.host.death_probability:
                raise error.Bug("This slave has randomly crashed (death "
                                "probability was %f)" %
                                self.host.death_probability)
            now = time.time()
            if now - self._power_upload_time > self._power_upload_threshold:
                self._power_upload_time = now
                self.sendLine({
                    'cmd': 'change_power',
                    'power': self.host.workflow.computing_power
                })
            # workflow.do_job may hang, so launch it in the thread pool
            self._set_deferred(self.host.workflow.do_job, job, update,
                               self.job_finished)
        except:
            errback(Failure())