Пример #1
0
 def upload_data(self, force=False):
     for key, data in self.data.items():
         if 'source' in data:
             bucket = S3Bucket(data['bucket'])
             if force or not bucket.exists(key):
                 logging.info('Uploading {} to {}'.format(
                     key, data['bucket']))
                 bucket.upload(data['source'], key)
Пример #2
0
def iterate_projects():
    s3 = S3Client()
    buckets = s3.list_buckets()
    for bucket in buckets:
        bucket_name = bucket['Name']
        if bucket_name.startswith('doppel-'):
            bucket = S3Bucket(bucket_name)
            if bucket.exists('doppel.status'):
                status = bucket.load_json('doppel.status')
                yield bucket_name, status
Пример #3
0
 def save_pickle(self, obj, doppel_path, local_path=None, zip=False):
     if self.is_doppel:
         S3Bucket(self.doppel_arn).save_pickle(obj, doppel_path, zip=zip)
     elif local_path:
         buffer = io.BytesIO()
         if not zip:
             buffer = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
         else:
             with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as zip:
                 zip.writestr(
                     'object.pkl',
                     pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL))
         with open(local_path, 'wb') as file:
             file.write(buffer.getvalue())
Пример #4
0
    def load(cls, name):
        name = cls._format_name(name)
        arn = cls._get_arn(name)
        if not S3Client().bucket_exists(arn):
            raise ValueError('Project {} does not exists.'.format(name))

        bucket = S3Bucket(arn)
        config = bucket.load_json('doppel.config')
        config['context'] = DoppelContext(config['context'])
        project = cls(**config)

        status = bucket.load_json('doppel.status')
        if status['start_time'] is not None:
            project.start_time = datetime.fromisoformat(status['start_time'])
        project.terminated = (status['status'] == 'termminated')

        if bucket.exists('key.pem'):
            project.key_material = bucket.load('key.pem')
        return project
Пример #5
0
 def _init_aws_clients(self):
     self.ec2 = Ec2Client()
     self.bucket = S3Bucket(self.arn)
     self.bucket.block_public_access()
     self.iam = IamClient()
Пример #6
0
 def __init__(self, bucket, activated=True):
     self.bucket = S3Bucket(bucket)
     self.activated = activated
Пример #7
0
 def __init__(self, bucket_name, log_path=r'C:\data\logs', temp=r'C:\data\aikit\tmp.json'):
     self.bucket = S3Bucket(bucket_name)
     self.base_folder = log_path
     self.temp = temp
     self._cache = {}
Пример #8
0
    loader = DataLoader(CONTEXT.data_path())
    train, questions, lectures = loader.load()
    questions = preprocess_questions(questions)
    lectures = preprocess_lectures(lectures)

    test = loader.load_tests('tests_0.pkl')
    train = merge_test(train, test)
    del test

    PARAMS['question_embedding']['workers'] = 32
    PARAMS['answers_embedding']['workers'] = 32
    model = RiiidModel(questions, lectures, params=PARAMS)
    X, y, train, valid = model.fit_transform(train)

    bucket = S3Bucket(model.get_normalized_name())

    logging.info('Saving data')
    for data, name in [(X, 'X'), (y, 'y'), (train, 'train'), (valid, 'valid')]:
        bucket.save_pickle_multiparts(data, name + '.pkl')

    model.fit_lgbm(X[train], y[train], X[valid], y[valid])
    model.fit_catboost(X[train], y[train], X[valid], y[valid])

    logging.info('Saving model')
    bucket.save_multiparts(model.save_with_source(), model.get_name())

except Exception as e:
    logging.info('Unexpected exception: ' + str(e))

finally:
Пример #9
0
import logging
import numpy as np

from doppel import terminate
from doppel.aws.s3 import S3Bucket

from riiid.config import PARAMS
from riiid.core.neural import NeuralModel
from riiid.aws.config import CONTEXT

CONTEXT.get_logger()

try:
    logging.info('Loading data')
    bucket = S3Bucket('model-20201219-093629')
    X = bucket.load_pickle('X.pkl')
    y = bucket.load_pickle('y.pkl')
    train = bucket.load_pickle('train.pkl')
    valid = bucket.load_pickle('valid.pkl')

    nn = NeuralModel(PARAMS['mlp_params'])
    nn.fit(X[train], y[train], X[valid], y[valid])

    bucket.save_pickle_multiparts(nn.save())

except Exception as e:
    logging.info('Unexpected exception: ' + str(e))

finally:
    terminate(CONTEXT)
Пример #10
0
import os
import json
from doppel.aws.s3 import S3Bucket

from riiid.config import TUNE_PATH

bucket = S3Bucket('doppel-riiid-tune')
_, files = bucket.listdir('results')
for file in files:
    data = bucket.load_json(os.path.join('results', file))
    with open(os.path.join(TUNE_PATH, file), 'w') as file:
        json.dump(data, file)
Пример #11
0
 def save_json(self, obj, doppel_path, local_path=None):
     if self.is_doppel:
         S3Bucket(self.doppel_arn).save_json(obj, doppel_path)
     elif local_path:
         with open(local_path, 'w') as file:
             json.dump(obj, file, indent=4)
Пример #12
0
 def save(self, obj, doppel_path, local_path=None):
     if self.is_doppel:
         S3Bucket(self.doppel_arn).save(obj, doppel_path)
     elif local_path:
         with open(local_path, 'wb') as file:
             file.write(obj.getvalue())