示例#1
0
class TrainModel(models.Model):
    name = fields.EncryptedCharField(immutable=True)
    code_ipfs = fields.EncryptedCharField(immutable=True)

    @classmethod
    def upload_and_create(cls, code_path, **kwargs):
        code_ipfs = IPFS().add_file(code_path).multihash
        return cls.create(code_ipfs=code_ipfs, **kwargs)
示例#2
0
文件: train.py 项目: makar21/core
class TrainData(models.Model):
    # owner only producer, share data with workers
    data_index = fields.IntegerField(immutable=True)

    # this data may be encrypted for different workers
    model_code_ipfs = fields.EncryptedCharField()
    train_chunks_ipfs = fields.EncryptedJsonField()

    # data for evaluation
    test_chunks_ipfs = fields.EncryptedJsonField()

    task_assignment_id = fields.CharField(null=True, initial=None)

    @cached_property
    def task_assignment(self):
        return TaskAssignment.get(asset_id=self.task_assignment_id,
                                  db=self.db,
                                  encryption=self.encryption)

    @cached_property
    def current_iteration(self):
        return self.task_assignment.task_declaration.current_iteration

    @cached_property
    def weights_ipfs(self):
        return self.task_assignment.task_declaration.weights_ipfs

    @cached_property
    def epochs(self):
        return self.task_assignment.task_declaration.epochs_in_current_iteration

    @cached_property
    def batch_size(self):
        return self.task_assignment.task_declaration.batch_size
示例#3
0
class EstimationData(models.Model):
    # this data may be encrypted for different estimators
    chunk_ipfs = fields.EncryptedCharField()
    model_code_ipfs = fields.EncryptedCharField()

    estimation_assignment_id = fields.CharField(null=True, initial=None)

    @cached_property
    def estimation_assignment(self):
        return EstimationAssignment.get(self.estimation_assignment_id,
                                        db=self.db,
                                        encryption=self.encryption)

    @cached_property
    def weights_ipfs(self):
        return self.estimation_assignment.task_declaration.weights_ipfs

    @cached_property
    def batch_size(self):
        return self.estimation_assignment.task_declaration.batch_size
示例#4
0
class VerificationData(models.Model):
    # owner only producer, share data with verifier
    test_dir_ipfs = fields.EncryptedCharField(immutable=True)
    model_code_ipfs = fields.EncryptedCharField(immutable=True)

    verification_assignment_id = fields.CharField()
    train_results = fields.EncryptedJsonField()

    @cached_property
    def verification_assignment(self):
        return VerificationAssignment.get(
            asset_id=self.verification_assignment_id,
            db=self.db,
            encryption=self.encryption)

    @cached_property
    def current_iteration(self):
        return self.verification_assignment.task_declaration.current_iteration

    @cached_property
    def current_iteration_retry(self):
        return self.verification_assignment.task_declaration.current_iteration_retry
示例#5
0
class EstimationResult(models.Model):
    # owner only estimator, share data with producer
    class State:
        INITIAL = 'initial'
        IN_PROGRESS = 'in progress'
        FINISHED = 'finished'

    estimation_assignment_id = fields.CharField(immutable=True)

    state = fields.CharField(initial=State.INITIAL)
    tflops = fields.FloatField(initial=0.0)
    progress = fields.FloatField(initial=0.0)
    error = fields.EncryptedCharField(null=True, initial=None)
示例#6
0
class VerificationResult(models.Model):
    # owner only verifier, share data with producer
    class State:
        INITIAL = 'initial'
        IN_PROGRESS = 'in progress'
        VERIFICATION_FINISHED = 'verification is finished'
        FINISHED = 'finished'

    verification_assignment_id = fields.CharField(immutable=True)

    state = fields.CharField(initial=State.INITIAL)
    progress = fields.FloatField(initial=0.0)
    tflops = fields.FloatField(initial=0.0)
    current_iteration = fields.IntegerField(initial=0)
    current_iteration_retry = fields.IntegerField(initial=0)

    # results should be public
    result = fields.JsonField(required=False)

    weights_ipfs = fields.CharField(required=False)
    loss = fields.FloatField(required=False)
    accuracy = fields.FloatField(required=False)

    error = fields.EncryptedCharField(required=False)

    def clean(self):
        self.progress = 0.0
        self.tflops = 0.0
        self.result = None

        # remove from ipfs storage summarized weights_ipfs from prev iteration
        if self.weights_ipfs is not None:
            IPFS().remove_from_storage(self.weights_ipfs)

        self.weights_ipfs = None
        self.loss = 0.0
        self.accuracy = 0.0

    @cached_property
    def verification_assignment(self):
        return VerificationAssignment.get(self.verification_assignment_id,
                                          db=self.db,
                                          encryption=self.encryption)
示例#7
0
文件: train.py 项目: makar21/core
class TrainResult(models.Model):
    # owner only worker, share data with producer
    class State:
        INITIAL = 'initial'
        IN_PROGRESS = 'in progress'
        FINISHED = 'finished'

    task_assignment_id = fields.CharField(immutable=True)
    state = fields.CharField(initial=State.INITIAL)

    progress = fields.FloatField(initial=0.0)
    tflops = fields.FloatField(initial=0.0)
    current_iteration = fields.IntegerField(initial=0)

    weights_ipfs = fields.CharField(required=False)
    error = fields.EncryptedCharField(required=False)

    loss = fields.FloatField(required=False)
    accuracy = fields.FloatField(required=False)
    train_history = fields.JsonField(required=False)
    eval_results = fields.JsonField(required=False)

    @cached_property
    def task_assignment(self):
        return TaskAssignment.get(self.task_assignment_id,
                                  db=self.db,
                                  encryption=self.encryption)

    def clean(self):
        self.progress = 0.0
        self.tflops = 0.0
        # remove from ipfs storage weights_ipfs from prev iteration
        if self.weights_ipfs is not None:
            IPFS().remove_from_storage(self.weights_ipfs)

        self.weights_ipfs = None
        self.error = None
        self.loss = 0.0
        self.accuracy = 0.0
        self.train_history = None
示例#8
0
class Dataset(models.Model):
    name = fields.EncryptedCharField(immutable=True)
    train_dir_ipfs = fields.EncryptedCharField(immutable=True)
    test_dir_ipfs = fields.EncryptedCharField(immutable=True)

    @classmethod
    def upload_and_create(cls, train_dir, test_dir, **kwargs):
        logger.info('Creating dataset')
        ipfs = IPFS()

        kwargs['test_dir_ipfs'] = ipfs.add_dir(test_dir).multihash
        kwargs['train_dir_ipfs'] = ipfs.add_dir(train_dir).multihash

        return cls.create(**kwargs)

    @staticmethod
    def _download_to_dir(csv_text, target_dir, train_part=True):
        urls = []
        for row in csv.reader(StringIO(csv_text), delimiter=',',
                              quotechar='"'):
            urls.append({'x_url': row[0], 'y_url': row[1]})

        if train_part:
            name_format = '_train_{{:0{}d}}'.format(len(str(len(urls))) + 1)
        else:
            name_format = '_test_{{:0{}d}}'.format(len(str(len(urls))) + 1)

        download_list = []
        for index, u in enumerate(urls):
            download_list += [
                FileDownloader.Params(url=u['x_url'],
                                      target_path=os.path.join(
                                          target_dir,
                                          'x' + name_format.format(index))),
                FileDownloader.Params(url=u['y_url'],
                                      target_path=os.path.join(
                                          target_dir,
                                          'y' + name_format.format(index)))
            ]

        FileDownloader.download_all(download_list)

    @staticmethod
    def parse_csv_and_upload_to_ipfs(csv_text, train_part):
        target_dir = tempfile.mkdtemp()
        try:
            Dataset._download_to_dir(csv_text, target_dir, train_part)
            ipfs = IPFS()
            return ipfs.add_dir(target_dir).multihash
        finally:
            shutil.rmtree(target_dir)

    @classmethod
    def create_from_csv(cls, train_csv_text, test_csv_text, **kwargs):
        train_dir = tempfile.mkdtemp()
        test_dir = tempfile.mkdtemp()
        try:
            kwargs['train_dir_ipfs'] = Dataset.parse_csv_and_upload_to_ipfs(
                train_csv_text, train_part=False)
            logger.info('Train part is uploaded: {}'.format(
                kwargs['train_dir_ipfs']))

            kwargs['test_dir_ipfs'] = Dataset.parse_csv_and_upload_to_ipfs(
                test_csv_text, train_part=True)
            logger.info('Test part is uploaded: {}'.format(
                kwargs['test_dir_ipfs']))
        finally:
            shutil.rmtree(train_dir)
            shutil.rmtree(test_dir)

        return cls.create(**kwargs)

    @classmethod
    def _split_files(cls, x_path, y_path, minibatch_size, target_dir):
        x_train = np.load(x_path)
        y_train = np.load(y_path)
        batches = int(len(x_train) / minibatch_size)
        logger.info('Split dataset to {} batches'.format(batches))
        name_format = '{{:0{}d}}'.format(len(str(batches)) + 1)

        for batch_idx in range(0, batches):
            start_idx = batch_idx * minibatch_size
            end_idx = start_idx + minibatch_size
            x_batch = x_train[start_idx:end_idx]
            y_batch = y_train[start_idx:end_idx]

            chunk_dir = os.path.join(target_dir,
                                     'chunk_' + name_format.format(batch_idx))
            os.mkdir(chunk_dir)

            x_path = os.path.join(chunk_dir, 'x')
            np.save(x_path, x_batch)

            y_path = os.path.join(chunk_dir, 'y')
            np.save(y_path, y_batch)

    @classmethod
    def download_and_create(cls, x_train_url, y_train_url, x_test_url,
                            y_test_url, minibatch_size, **kwargs):
        logger.info('Creating dataset')

        train_download_target_dir = tempfile.mkdtemp()
        test_target_dir = tempfile.mkdtemp()
        train_dir = tempfile.mkdtemp()
        test_dir = tempfile.mkdtemp()
        try:
            x_train_path = os.path.join(train_download_target_dir, 'x_train')
            y_train_path = os.path.join(train_download_target_dir, 'y_train')
            x_test_path = os.path.join(test_target_dir, 'x_test')
            y_test_path = os.path.join(test_target_dir, 'y_test')

            download_list = [
                FileDownloader.Params(url=x_train_url,
                                      target_path=x_train_path),
                FileDownloader.Params(url=y_train_url,
                                      target_path=y_train_path),
                FileDownloader.Params(url=x_test_url, target_path=x_test_path),
                FileDownloader.Params(url=y_test_url, target_path=y_test_path),
            ]
            FileDownloader.download_all(download_list)

            ipfs = IPFS()

            cls._split_files(x_path=x_test_path,
                             y_path=y_test_path,
                             minibatch_size=minibatch_size,
                             target_dir=test_dir)
            kwargs['test_dir_ipfs'] = ipfs.add_dir(test_dir,
                                                   recursive=True).multihash
            logger.info('Test part is uploaded: {}'.format(
                kwargs['test_dir_ipfs']))

            cls._split_files(x_path=x_train_path,
                             y_path=y_train_path,
                             minibatch_size=minibatch_size,
                             target_dir=train_dir)

            kwargs['train_dir_ipfs'] = ipfs.add_dir(train_dir,
                                                    recursive=True).multihash
            logger.info('Train part is uploaded: {}'.format(
                kwargs['train_dir_ipfs']))
            return cls.create(**kwargs)
        finally:
            shutil.rmtree(test_target_dir)
            shutil.rmtree(train_download_target_dir)
            shutil.rmtree(train_dir)
            shutil.rmtree(test_dir)