Пример #1
0
def gen_sample(type, partition):
    images, labels = load_mnist('./', type)
    print(labels.shape)
    print(images.shape)

    record_ios = []
    for x in xrange(partition):
        record_io = RecordIO('/tmp/minst_%s_%d' % (type, x), BinaryWrite)
        record_io.write_header(name=['label', 'image'],
                               type=[np.float32, np.float32])
        record_ios.append(record_io)

    for index in xrange(labels.shape[0]):
        label = labels[index:index + 1]
        image = images[index:index + 1]

        label_tensor = Tensor([1, 1], np.float32)
        image_tensor = Tensor([1, images.shape[1]], np.float32)

        label_tensor.load_numpy(label.astype(np.float32))
        image_tensor.load_numpy(np.array(image[0], np.float32))
        '''
        print(label_tensor.asnumpy())
        print(image_tensor.asnumpy())
        '''

        record_ios[index % partition].write_sample({
            'label': label_tensor,
            'image': image_tensor
        })

    for x in xrange(partition):
        record_ios[x].write_finalize()
Пример #2
0
def write_sample(comm_feature, group_feature, group_label, record_io):
    global curr_index

    tensor_map = {}
    for name, type in meta.iteritems():
        if name == 'label_1':
            value = np.zeros([len(group_label), 1], type)
            for index in xrange(len(group_label)):
                value[index] = [group_label[index][0]]
            label_tensor = Tensor(value.shape, type)
            label_tensor.load_numpy(value)
            tensor_map[name] = label_tensor
        elif name == 'label_2':
            value = np.zeros([len(group_label), 1], type)
            for index in xrange(len(group_label)):
                value[index] = [group_label[index][1]]
            label_tensor = Tensor(value.shape, type)
            label_tensor.load_numpy(value)
            tensor_map[name] = label_tensor
        elif name == 'indices':
            value = np.zeros([1], type)
            value[0] = len(group_feature)
            indices_tensor = Tensor(value.shape, type)
            indices_tensor.load_numpy(value)
            tensor_map[name] = indices_tensor
        else:
            # real feature
            splits = name.split('.')
            pos = 0 if splits[1] == 'ids' else 1
            feature_name = splits[0]
            width = feaWidth[feature_name]
            if feature_name in user_feature:
                value = np.zeros([1, width], type)
                if comm_feature.has_key(feature_name):
                    data = comm_feature[feature_name]
                    for index in xrange(len(data)):
                        value[0][index] = data[index][pos]
            else:
                value = np.zeros([len(group_feature), width], type)
                for row in xrange(len(group_feature)):
                    if group_feature[row].has_key(feature_name):
                        data = group_feature[row][feature_name]
                        for index in xrange(len(data)):
                            value[row][index] = data[index][pos]

            feature_tensor = Tensor(value.shape, type)
            feature_tensor.load_numpy(value)
            tensor_map[name] = feature_tensor

    record_io[curr_index].write_sample(tensor_map)
    curr_index = (curr_index + 1) % worker_number
Пример #3
0
class ModelParamManager(object):
    """ The neural network model param manager, which is used for managing and
    synchronizing the variables in NN model more easily
    """
    def __init__(self, model, kwargs={}, solver='adam'):
        """ The constructor of ModelParamManager

        The constructor will associate the parameter with hpps array table,
        the initial value of ArrayTable will be same as the parameter of model.
        If different parameters are used in different process, the average of them
        will be used as the initial value.

        """
        self.model = model

        self.arg_offsets = []
        self.arg_shapes = []
        self.aux_offsets = []
        self.aux_shapes = []

        self.arg_size = 0
        for value in self.get_all_arg_list():
            assert (np.dtype("float32") == value.dtype)
            self.arg_offsets.append(self.arg_size)
            self.arg_size += value.size
            self.arg_shapes.append(value.shape)

        self.aux_size = 0
        for value in self.get_all_aux_list():
            assert (np.dtype("float32") == value.dtype)
            self.aux_offsets.append(self.aux_size)
            self.aux_size += value.size
            self.aux_shapes.append(value.shape)

        self.arg_tensor = None
        self.arg_grad_tensor = None

        self.arg_array_table = create_array_table(size=self.arg_size,
                                                  type=np.float32,
                                                  kwargs=kwargs,
                                                  solver=solver)
        if zoo_is_worker():
            self.arg_tensor = Tensor([self.arg_size], np.float32)
            self.arg_grad_tensor = Tensor([self.arg_size], np.float32)

        self.aux_tensor = None
        if self.aux_size > 0:
            self.aux_array_table = create_array_table(size=self.aux_size,
                                                      type=np.float32,
                                                      kwargs=kwargs,
                                                      solver="avg")
            if z.is_worker():
                self.aux_tensor = Tensor([self.aux_size], np.float32)

        zoo_barrier()

        # Pull argument from Parameter Server
        if zoo_is_worker():
            self.arg_array_table.get(self.arg_tensor)
            self.set_all_arg_to_model()
            if self.aux_size > 0:
                self.aux_array_table.get(self.aux_tensor)
                self.set_all_aux_to_model()

    def get_all_arg_list(self):
        """ Get all args list of specific model

        Parameters
        ----------
          None

        Return
        ------
          A list of Numpy.
        """
        raise NotImplemented()

    def get_all_aux_list(self):
        """ Get all auxs list of specific model

        Parameters
        ----------
          None

        Return
        ------
          A list of Numpy.
        """
        raise NotImplemented()

    def get_all_arg_grad_list(self):
        """ Get all arg grad list of specific model

        Parameters
        ----------
          None

        Return
        ------
          A list of Numpy.
        """
        raise NotImplemented()

    def zero_grad(self):
        """ zero all grad

        Parameters
        ----------
          None

        Return
        ------
          None
        """
        raise NotImplemented()

    def set_all_arg_to_model(self):
        """ Set all args to specific model

        Parameters
        ----------
          None

        Return
        ------
          None
        """
        raise NotImplemented()

    def set_all_aux_to_model(self):
        """ Set all aux to specific model

        Parameters
        ----------
          None

        Return
        ------
          None
        """
        raise NotImplemented()

    def sync_all_param(self, option={}):
        """ Sync all params

        Parameters
        ----------
          None

        Return
        ------
          None
        """
        if zoo_is_worker() == False:
            return

        # copy grad from backend engine
        all_arg_grad_list = self.get_all_arg_grad_list()
        for index in xrange(len(all_arg_grad_list)):
            self.arg_grad_tensor.load_numpy(all_arg_grad_list[index],
                                            self.arg_offsets[index])

        # push grad and pull arg
        self.arg_array_table.add(self.arg_grad_tensor, option)
        self.arg_array_table.get(self.arg_tensor)

        # deploy new arg to backend engine
        self.set_all_arg_to_model()

        if self.aux_size > 0:
            # copy aux from backend engine
            all_aux_list = self.get_all_aux_list()
            for index in xrange(len(all_aux_list)):
                self.aux_tensor.load_numpy(all_aux_list[index],
                                           self.aux_offsets[index])

            # push and pull aux
            self.aux_array_table.add(self.arg_aux_tensor)
            self.aux_array_table.get(self.arg_aux_tensor)

            # deploy new aux to backend engine
            self.set_all_aux_to_model()

    def save_model(self, path, dummy_input, inames, onames):
        """ Save model

        Parameters
        ----------
          path: The file path of model
          dummy_input: The dummpy input
          inames: The input names
          onames: The output names

        Return
        ------
          None
        """
        raise NotImplemented()
Пример #4
0
zoo_start()

array_table = create_array_table(size=100, type=np.float32, solver="default")
kv_table = create_kv_table(capacity=1000000,
                           value_len=2,
                           key_type=np.int64,
                           value_type=np.float32)
zoo_barrier()

data = Tensor(shape=[100], type=np.float32)
array_table.get(value=data)
print data.asnumpy()

grad = Tensor(shape=[100], type=np.float32)
grad.load_numpy(np.ones([100], np.float32))
array_table.add(grad=grad)

array_table.get(value=data)
print data.asnumpy()

id = Tensor(shape=[100], type=np.int64)
id.load_numpy(np.zeros([100], np.int64))
value = Tensor(shape=[1], type=np.float32)
kv_table.get(key=id, value=value)
print value.asnumpy()

id.load_numpy(np.ones([100], np.int64))
wait_value = Tensor(shape=[1], type=np.float32)
wait_id = kv_table.get_async(key=id, value=wait_value)
kv_table.get_async(wait_id=wait_id, key=id, value=wait_value)
Пример #5
0
from ddls.feeder.plan_maker import PlanMaker
from ddls.feeder.record_io import RecordIO, BinaryWrite
from ddls.hpps.tensor import Tensor
from ddls.feeder.feeder import Feeder

import numpy as np

################################################################

record_io = RecordIO('/tmp/sample1', BinaryWrite)
record_io.write_header(name=['ad', 'user'], type=[np.float32, np.float32])

ad_tensor = Tensor([300], np.float32)
ad_tensor.load_numpy(np.random.rand(300))
print(ad_tensor.asnumpy())
user_tensor = Tensor([400], np.float32)
user_tensor.load_numpy(np.random.rand(400))

for x in xrange(1, 1000):
    record_io.write_sample({'ad': ad_tensor, 'user': user_tensor})
record_io.write_finalize()

##################################################################
Пример #6
0
from ddls.hpps.zoo import *
from ddls.hpps.array_table import ArrayTable, create_array_table
from ddls.hpps.kv_table import KVTable, create_kv_table
from ddls.hpps.tensor import Tensor
from ddls.topi.embedding import Embedding
import numpy as np

zoo_start()

embedding = Embedding(capacity=200000, value_len=24)

key = Tensor([20000], np.int64)
key_numpy = np.ones([20000], np.int64)
for x in xrange(20000):
    key_numpy[x] = x + 1
key.load_numpy(key_numpy)

grad = np.ones([20000], np.float32)

for x in xrange(100):
    key.load_numpy(np.ones([1000], np.int64))
    value = embedding.get(key, key)
    print value

    embedding.add(key, grad)

zoo_stop()
Пример #7
0
class Embedding(object):
    """ The embedding model param manager, which is used for managing and
    synchronizing the variables in sparse embedding.
    """
    def __init__(self, capacity, value_len, kwargs={}, solver='adam'):
        """ The constructor of Embedding
        """
        self.kv_table = create_kv_table(capacity=capacity,
                                        value_len=value_len,
                                        key_type=np.int64,
                                        value_type=np.float32,
                                        kwargs=kwargs,
                                        solver=solver)
        self.wait_get_id = None
        self.wait_add_id = None

        self.value = Tensor([1], np.float32)
        self.grad = Tensor([1], np.float32)

    def get(self, key, next_key):
        """ get current key's value and pre-get the next key.

        Parameters
        ----------
          key: The current iteration id key, which is Tensor instance
          next_key: The next iteration id key

        Returns
        -------
          Return value, which is numpy instance.
        """
        assert isinstance(key, Tensor)
        assert isinstance(next_key, Tensor)

        if self.wait_get_id is None:
            self.wait_get_id = self.kv_table.get_async(key=key,
                                                       value=self.value)

        self.kv_table.get_async(key=key,
                                value=self.value,
                                wait_id=self.wait_get_id)
        self.wait_get_id = self.kv_table.get_async(key=next_key,
                                                   value=self.value)
        return self.value.asnumpy()

    def add(self, key, grad, option={}):
        """ add grad

        Parameter
        ---------
          key: The current iteration id key, which is Tensor instance
          grad: The current iteration id grad, which is numpy instance
        """
        assert isinstance(key, Tensor)

        if self.wait_add_id is not None:
            self.kv_table.wait(self.wait_add_id)

        self.grad.reshape(grad.shape)
        self.grad.load_numpy(grad)

        self.wait_add_id = self.kv_table.add_async(key=key,
                                                   grad=self.grad,
                                                   option=option)