Python StaticDataset.generate_batches示例，GeneratingDataset.StaticDataset.generate_batches Python示例

示例#1

0

显示文件

    def classify_in_background(self):
        while True:
            requests = []
            # fetch first request
            r = yield self.classification_queue.get()
            requests.append(r)
            # grab all other waiting requests
            try:
                while True:
                    requests.append(self.classification_queue.get_nowait())
            except QueueEmpty:
                pass

            output_dim = {}
            # Do dataset creation and classification.
            dataset = StaticDataset(data=[r.data for r in requests],
                                    output_dim=output_dim)
            dataset.init_seq_order()
            batches = dataset.generate_batches(
                recurrent_net=self.engine.network.recurrent,
                batch_size=self.batch_size,
                max_seqs=self.max_seqs)

            with (yield self.lock.acquire()):
                ctt = ForwardTaskThread(self.engine.network, self.devices,
                                        dataset, batches)
                yield ctt.join()

            try:
                for i in range(dataset.num_seqs):
                    requests[i].future.set_result(ctt.result[i])
                    self.classification_queue.task_done()
            except Exception as e:
                print('exception', e)
                raise

示例#2

0

显示文件

文件： Engine.py 项目： chagge/returnn

 def _classify(params):
   ret = { }
   output_dim = {}
   hash = hashlib.new('ripemd160')
   hash.update(json.dumps(params))
   hash = hash.hexdigest()
   for k in params:
     try:
       params[k] = numpy.asarray(params[k], dtype='float32')
       if k != 'data':
         output_dim[k] = network.n_out[k] # = [network.n_in,2] if k == 'data' else network.n_out[k]
     except Exception:
       if k != 'data' and not k in network.n_out:
         ret['error'] = 'unknown target: %s' % k
       else:
         ret['error'] = 'unable to convert %s to an array from value %s' % (k,str(params[k]))
       break
   if not 'error' in ret:
     data = StaticDataset(data=[params], output_dim=output_dim)
     data.init_seq_order()
     try:
       data = StaticDataset(data=[params], output_dim=output_dim)
       data.init_seq_order()
     except Exception:
       ret['error'] = "invalid data: %s" % params
     else:
       batches = data.generate_batches(recurrent_net=network.recurrent,
                                       batch_size=sys.maxint, max_seqs=1)
       if not hash in classifiers:
         classifiers[hash] = ClassificationTaskThread(network, devices, data, batches)
         classifiers[hash].json_params = params
         print >> log.v3, "classifier started:", hash
       ret['result'] = { 'hash' : hash }
   return ret

示例#3

0

显示文件

文件： Server.py 项目： rwth-i6/returnn

  def classify_in_background(self):
    while True:
      requests = []
      # fetch first request
      r = yield self.classification_queue.get()
      requests.append(r)
      # grab all other waiting requests
      try:
        while True:
          requests.append(self.classification_queue.get_nowait())
      except QueueEmpty:
        pass

      output_dim = {}
      # Do dataset creation and classification.
      dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim)
      dataset.init_seq_order()
      batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent,
                                         batch_size=self.batch_size, max_seqs=self.max_seqs)

      with (yield self.lock.acquire()):
        ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches)
        yield ctt.join()

      try:
        for i in range(dataset.num_seqs):
          requests[i].future.set_result(ctt.result[i])
          self.classification_queue.task_done()
      except Exception as e:
        print('exception', e)
        raise

示例#4

0

显示文件

 def _classify(params):
   ret = { }
   output_dim = {}
   hash = hashlib.new('ripemd160')
   hash.update(json.dumps(params))
   hash = hash.hexdigest()
   for k in params:
     try:
       params[k] = numpy.asarray(params[k], dtype='float32')
       if k != 'data':
         output_dim[k] = network.n_out[k] # = [network.n_in,2] if k == 'data' else network.n_out[k]
     except Exception:
       if k != 'data' and not k in network.n_out:
         ret['error'] = 'unknown target: %s' % k
       else:
         ret['error'] = 'unable to convert %s to an array from value %s' % (k,str(params[k]))
       break
   if not 'error' in ret:
     data = StaticDataset(data=[params], output_dim=output_dim)
     data.init_seq_order()
     try:
       data = StaticDataset(data=[params], output_dim=output_dim)
       data.init_seq_order()
     except Exception:
       ret['error'] = "invalid data: %s" % params
     else:
       batches = data.generate_batches(recurrent_net=network.recurrent,
                                       batch_size=sys.maxsize, max_seqs=1)
       if not hash in workers:
         workers[hash] = ClassificationTaskThread(network, devices, data, batches)
         workers[hash].json_params = params
         print("worker started:", hash, file=log.v3)
       ret['result'] = { 'hash' : hash }
   return ret

示例#5

0

显示文件

    def post(self, *args, **kwargs):
        #TODO: Make this batch over a specific time period
    
        params = json.loads(self.request.body)
        output_dim = {}
        ret = {}
        
        #first get meta data
        engine_hash = params['engine_hash']
        
        print('Received engine hash: ', engine_hash, file=log.v4)
        
        #delete unneccessary stuff so that the rest works
        del params['engine_hash']
        
        #load in engine and hash
        engine = _engines[engine_hash]
        network = engine.network
        devices = _devices[engine_hash]
        
        hash_engine = hashlib.new('ripemd160')
        hash_engine.update(json.dumps(params) + engine_hash)
        hash_temp = hash_engine.hexdigest()
        
        #process the data
        for k in params:
            try:
                params[k] = numpy.asarray(params[k], dtype='float32')
                if k != 'data':
                  output_dim[k] = network.n_out[k]  # = [network.n_in,2] if k == 'data' else network.n_out[k]
            except Exception:
                if k != 'data' and not k in network.n_out:
                    ret['error'] = 'unknown target: %s' % k
                else:
                    ret['error'] = 'unable to convert %s to an array from value %s' % (k, str(params[k]))
                break
        if not 'error' in ret:
            try:
                data = StaticDataset(data=[params], output_dim=output_dim)
                data.init_seq_order()
            except Exception:
                ret['error'] = 'Dataset server error'
                self.write(ret)
                pass
            else:
                batches = data.generate_batches(recurrent_net=network.recurrent,
                                                batch_size=sys.maxsize, max_seqs=1)
                if not hash_temp in _classify_cache:
                    print('Starting classification', file=log.v3)
                    #if we haven't yet processed this exact request, and saved it in the cache
                    _classify_cache[hash_temp] = yield self.classification_task(network=network,
                                                                                devices=devices,
                                                                                data=data, batches=batches)

                ret = {'result':
                     {k: _classify_cache[hash_temp].result[k].tolist() for k in _classify_cache[hash_temp].result}}
        
        print("Finished processing classification with ID: ", hash_temp, file=log.v4)
        
        self.write(ret)

示例#6

0

显示文件

    def post(self, *args, **kwargs):
        # TODO: Make this batch over a specific time period
        # TODO: Write formal documentation

        url_params = self.request.arguments
        output_dim = {}
        ret = {}
        data = {}
        data_format = ''
        data_type = ''
        engine_hash = ''
        data_shape = ''
        # First get meta data from URL parameters
        try:
            engine_hash = str(url_params['engine_hash']).replace("['",
                                                                 '').replace(
                                                                     "']", '')
            if 'data_format' in url_params:
                data_format = str(url_params['data_format']).replace(
                    "['", '').replace("']", '')
            if 'data_type' in url_params:
                # Possible options: https://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html
                data_type = str(url_params['data_type']).replace("['",
                                                                 '').replace(
                                                                     "']", '')
            if 'data_shape' in url_params:
                data_shape = str(url_params['data_shape']).replace(
                    "['", '').replace("']", '')  # either '' or 'dim1,dim2'
        except Exception as e:
            print('Parameter formatting exception: ' + str(e.message),
                  file=log.v4)
        # Apply defaults, in case we didn't get them through the header.
        if data_format == '':
            data_format = 'json'
        if data_type == '':
            data_type = 'float32'

        print('Received engine hash: ' + engine_hash + ', data formatted: ' +
              data_format + ', data type ' + data_type + ' data shape: ' +
              data_shape,
              file=log.v5)
        # Load in engine and hash
        engine = _engines[engine_hash]
        network = engine.network
        devices = _devices[engine_hash]
        hash_engine = hashlib.new('ripemd160')
        hash_engine.update(str(self.request.body) + engine_hash)
        hash_temp = hash_engine.hexdigest()

        # Pre-process the data
        if data_format == 'json':
            data = json.loads(self.request.body)
            for k in data:
                try:
                    data[k] = np.asarray(data[k], dtype=data_type)
                    if k != 'data':
                        output_dim[k] = network.n_out[
                            k]  # = [network.n_in,2] if k == 'data' else network.n_out[k]
                except Exception:
                    if k != 'data' and not k in network.n_out:
                        ret['error'] = 'unknown target: %s' % k
                    else:
                        ret['error'] = 'unable to convert %s to an array from value %s' % (
                            k, str(data[k]))
                    break

        if data_format == 'binary':
            try:
                float_array = array(self._get_type_code(data_type))
                float_array.fromstring(self.request.body)
                data['data'] = np.asarray(float_array.tolist(),
                                          dtype=data_type)
                data_shape_arr = data_shape.split(",")
                shape = (int(data_shape_arr[0]), int(data_shape_arr[1]))
                data['data'] = np.reshape(data['data'], shape)
            except Exception as e:
                print('Binary data error: ' + str(e.message), file=log.v4)
                ret['error'] = 'Error during binary data conversion: ' + e.message

        # Do dataset creation and classification.
        if not 'error' in ret:
            try:
                data = StaticDataset(data=[data], output_dim=output_dim)
                data.init_seq_order()
            except Exception:
                ret['error'] = 'Dataset server error'
                self.write(ret)
                pass
            else:
                batches = data.generate_batches(
                    recurrent_net=network.recurrent,
                    batch_size=sys.maxsize,
                    max_seqs=1)
                if not hash_temp in _classify_cache:
                    print('Starting classification', file=log.v3)
                    # If we haven't yet processed this exact request and saved it in the cache
                    _classify_cache[
                        hash_temp] = yield self.classification_task(
                            network=network,
                            devices=devices,
                            data=data,
                            batches=batches)
                ret = {
                    'result': {
                        k: _classify_cache[hash_temp].result[k].tolist()
                        for k in _classify_cache[hash_temp].result
                    }
                }

        # Update engine usage for performance optimization
        _engine_usage[engine_hash] = datetime.datetime.now()
        print("Finished processing classification with ID: ",
              hash_temp,
              file=log.v3)
        self.write(ret)

示例#7

0

显示文件

文件： Server.py 项目： panky8070/returnn

    def post(self, *args, **kwargs):
        # TODO: Write formal documentation
        """
    Method for handling classification via HTTP Post request. The following must
    be defined in the URL paramaters: engine_hash (engine hash which points to which
    engine to use), and the data itself in the body. If using binary data, the following
    URL paramaters must also be supplied: data_format='binary', data_shape=(<dim1,dim2>).
    If using a specific data type, you can supply it as the url parameter data_type.
    :param args:
    :param kwargs:
    :return: Either JSON with error or JSON list of generated outputs.
    """
        url_params = self.request.arguments
        output_dim = {}
        ret = {}
        data = {}
        data_format = ''
        data_type = ''
        engine_hash = ''
        data_shape = ''
        # First get meta data from URL parameters
        engine_hash = str(url_params['engine_hash']).replace("['", '').replace(
            "']", '')
        if 'data_format' in url_params:
            data_format = str(url_params['data_format']).replace("['",
                                                                 '').replace(
                                                                     "']", '')
        if 'data_type' in url_params:
            # Possible options: https://docs.scipy.org/doc/numpy-1.10.1/user/basics.types.html
            data_type = str(url_params['data_type']).replace("['", '').replace(
                "']", '')
        if 'data_shape' in url_params:
            data_shape = str(url_params['data_shape']).replace(
                "['", '').replace("']", '')  # either '' or 'dim1,dim2'
        # Apply defaults, in case we didn't get them through the header.
        if data_format == '':
            data_format = 'json'
        if data_type == '':
            data_type = 'float32'

        print(
            'Received engine hash: %s data formatted: %s, data type %s data shape: %s'
            % (engine_hash, data_format, data_type, data_shape),
            file=log.v5)
        # Load in engine and hash
        engine = _engines[engine_hash]
        network = engine.network
        devices = _devices[engine_hash]
        hash_engine = hashlib.new('ripemd160')
        hash_engine.update(str(self.request.body) + engine_hash)
        hash_temp = hash_engine.hexdigest()

        # Pre-process the data
        if data_format == 'json':
            data = json.loads(self.request.body)
            for k in data:
                try:
                    data[k] = np.asarray(data[k], dtype=data_type)
                    if k != 'data':
                        output_dim[k] = network.n_out[
                            k]  # = [network.n_in,2] if k == 'data' else network.n_out[k]
                except Exception:
                    if k != 'data' and k not in network.n_out:
                        ret['error'] = 'unknown target: %s' % k
                    else:
                        ret['error'] = 'unable to convert %s to an array from value %s' % (
                            k, str(data[k]))
                    break

        if data_format == 'binary':
            float_array = array(self._get_type_code(data_type))
            try:
                float_array.fromstring(self.request.body)
            except Exception as e:
                print('Binary data error: %s' % str(e.message), file=log.v4)
                ret['error'] = 'Error during binary data conversion: ' + e.message
            data['data'] = np.asarray(float_array.tolist(), dtype=data_type)
            data_shape_arr = data_shape.split(",")
            shape = (int(data_shape_arr[0]), int(data_shape_arr[1]))
            data['data'] = np.reshape(data['data'], shape)

        # Do dataset creation and classification.
        if 'error' not in ret:
            data = StaticDataset(data=[data], output_dim=output_dim)
            data.init_seq_order()
            batches = data.generate_batches(recurrent_net=network.recurrent,
                                            batch_size=sys.maxsize,
                                            max_seqs=1)
            if hash_temp not in _classify_cache:
                print('Starting classification', file=log.v3)
                # If we haven't yet processed this exact request and saved it in the cache
                _classify_cache[hash_temp] = yield self._classification_task(
                    network=network,
                    devices=devices,
                    data=data,
                    batches=batches)
            ret = {
                'result': {
                    k: _classify_cache[hash_temp].result[k].tolist()
                    for k in _classify_cache[hash_temp].result
                }
            }

        # Update engine usage for performance optimization
        _engine_usage[engine_hash] = datetime.datetime.now()
        print("Finished processing classification with ID: ",
              hash_temp,
              file=log.v3)
        self.write(ret)