def upload_data(self, force=False): for key, data in self.data.items(): if 'source' in data: bucket = S3Bucket(data['bucket']) if force or not bucket.exists(key): logging.info('Uploading {} to {}'.format( key, data['bucket'])) bucket.upload(data['source'], key)
def iterate_projects(): s3 = S3Client() buckets = s3.list_buckets() for bucket in buckets: bucket_name = bucket['Name'] if bucket_name.startswith('doppel-'): bucket = S3Bucket(bucket_name) if bucket.exists('doppel.status'): status = bucket.load_json('doppel.status') yield bucket_name, status
def save_pickle(self, obj, doppel_path, local_path=None, zip=False): if self.is_doppel: S3Bucket(self.doppel_arn).save_pickle(obj, doppel_path, zip=zip) elif local_path: buffer = io.BytesIO() if not zip: buffer = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL) else: with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as zip: zip.writestr( 'object.pkl', pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)) with open(local_path, 'wb') as file: file.write(buffer.getvalue())
def load(cls, name): name = cls._format_name(name) arn = cls._get_arn(name) if not S3Client().bucket_exists(arn): raise ValueError('Project {} does not exists.'.format(name)) bucket = S3Bucket(arn) config = bucket.load_json('doppel.config') config['context'] = DoppelContext(config['context']) project = cls(**config) status = bucket.load_json('doppel.status') if status['start_time'] is not None: project.start_time = datetime.fromisoformat(status['start_time']) project.terminated = (status['status'] == 'termminated') if bucket.exists('key.pem'): project.key_material = bucket.load('key.pem') return project
def _init_aws_clients(self): self.ec2 = Ec2Client() self.bucket = S3Bucket(self.arn) self.bucket.block_public_access() self.iam = IamClient()
def __init__(self, bucket, activated=True): self.bucket = S3Bucket(bucket) self.activated = activated
def __init__(self, bucket_name, log_path=r'C:\data\logs', temp=r'C:\data\aikit\tmp.json'): self.bucket = S3Bucket(bucket_name) self.base_folder = log_path self.temp = temp self._cache = {}
loader = DataLoader(CONTEXT.data_path()) train, questions, lectures = loader.load() questions = preprocess_questions(questions) lectures = preprocess_lectures(lectures) test = loader.load_tests('tests_0.pkl') train = merge_test(train, test) del test PARAMS['question_embedding']['workers'] = 32 PARAMS['answers_embedding']['workers'] = 32 model = RiiidModel(questions, lectures, params=PARAMS) X, y, train, valid = model.fit_transform(train) bucket = S3Bucket(model.get_normalized_name()) logging.info('Saving data') for data, name in [(X, 'X'), (y, 'y'), (train, 'train'), (valid, 'valid')]: bucket.save_pickle_multiparts(data, name + '.pkl') model.fit_lgbm(X[train], y[train], X[valid], y[valid]) model.fit_catboost(X[train], y[train], X[valid], y[valid]) logging.info('Saving model') bucket.save_multiparts(model.save_with_source(), model.get_name()) except Exception as e: logging.info('Unexpected exception: ' + str(e)) finally:
import logging import numpy as np from doppel import terminate from doppel.aws.s3 import S3Bucket from riiid.config import PARAMS from riiid.core.neural import NeuralModel from riiid.aws.config import CONTEXT CONTEXT.get_logger() try: logging.info('Loading data') bucket = S3Bucket('model-20201219-093629') X = bucket.load_pickle('X.pkl') y = bucket.load_pickle('y.pkl') train = bucket.load_pickle('train.pkl') valid = bucket.load_pickle('valid.pkl') nn = NeuralModel(PARAMS['mlp_params']) nn.fit(X[train], y[train], X[valid], y[valid]) bucket.save_pickle_multiparts(nn.save()) except Exception as e: logging.info('Unexpected exception: ' + str(e)) finally: terminate(CONTEXT)
import os import json from doppel.aws.s3 import S3Bucket from riiid.config import TUNE_PATH bucket = S3Bucket('doppel-riiid-tune') _, files = bucket.listdir('results') for file in files: data = bucket.load_json(os.path.join('results', file)) with open(os.path.join(TUNE_PATH, file), 'w') as file: json.dump(data, file)
def save_json(self, obj, doppel_path, local_path=None): if self.is_doppel: S3Bucket(self.doppel_arn).save_json(obj, doppel_path) elif local_path: with open(local_path, 'w') as file: json.dump(obj, file, indent=4)
def save(self, obj, doppel_path, local_path=None): if self.is_doppel: S3Bucket(self.doppel_arn).save(obj, doppel_path) elif local_path: with open(local_path, 'wb') as file: file.write(obj.getvalue())