def get_client_and_resource(aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None, s3_endpoint=None): aws_access_key_id = aws_access_key_id or get_env('AWS_ACCESS_KEY_ID') aws_secret_access_key = aws_secret_access_key or get_env( 'AWS_SECRET_ACCESS_KEY') aws_session_token = aws_session_token or get_env('AWS_SESSION_TOKEN') logger.debug( f'Create boto3 session with ' f'access key id={aws_access_key_id}, ' f'secret key={aws_secret_access_key[:4] + "..." if aws_secret_access_key else None}, ' f'session token={aws_session_token}') session = boto3.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_session_token=aws_session_token) settings = { 'region_name': region_name or get_env('S3_region') or 'us-east-1' } s3_endpoint = s3_endpoint or get_env('S3_ENDPOINT') if s3_endpoint: settings['endpoint_url'] = s3_endpoint client = session.client( 's3', config=boto3.session.Config(signature_version='s3v4'), **settings) resource = session.resource( 's3', config=boto3.session.Config(signature_version='s3v4'), **settings) return client, resource
def get_data(self, key): path = Path(key) if self.use_blob_urls: # include self-hosted links pointed to local resources via {settings.HOSTNAME}/data/local-files?d=<path/to/local/dir> document_root = Path( get_env('LOCAL_FILES_DOCUMENT_ROOT', default='/')) relative_path = str(path.relative_to(document_root)) return { settings.DATA_UNDEFINED_NAME: f'{settings.HOSTNAME}/data/local-files/?d={relative_path}' } try: with open(path, encoding='utf8') as f: value = json.load(f) except (UnicodeDecodeError, json.decoder.JSONDecodeError): raise ValueError( f"Can\'t import JSON-formatted tasks from {key}. If you're trying to import binary objects, " f"perhaps you've forgot to enable \"Treat every bucket object as a source file\" option?" ) if not isinstance(value, dict): raise ValueError( f"Error on key {key}: For {self.__class__.__name__} your JSON file must be a dictionary with one task." ) # noqa return value
def localfiles_data(request): """Serving files for LocalFilesImportStorage""" path = request.GET.get('d') if settings.LOCAL_FILES_SERVING_ENABLED is False: return HttpResponseForbidden( "Serving local files can be dangerous, so it's disabled by default. " 'You can enable it with LOCAL_FILES_SERVING_ENABLED environment variable' ) local_serving_document_root = get_env('LOCAL_FILES_DOCUMENT_ROOT', default='/') if path and request.user.is_authenticated: path = posixpath.normpath(path).lstrip('/') full_path = Path(safe_join(local_serving_document_root, path)) link = LocalFilesImportStorageLink.objects.filter( key=str(full_path)).first() if link and link.has_permission( request.user) and os.path.exists(full_path): content_type, encoding = mimetypes.guess_type(str(full_path)) content_type = content_type or 'application/octet-stream' return RangedFileResponse(request, open(full_path, mode='rb'), content_type) else: return HttpResponseNotFound() return HttpResponseForbidden()
def localfiles_data(request): """Serving files for LocalFilesImportStorage""" path = request.GET.get('d') if settings.LOCAL_FILES_SERVING_ENABLED is False: return HttpResponseForbidden("Serving local files can be dangerous, so it's disabled by default. " 'You can enable it with LOCAL_FILES_SERVING_ENABLED environment variable') local_serving_document_root = get_env('LOCAL_FILES_DOCUMENT_ROOT', default='/') if path and request.user.is_authenticated: return serve(request, path, document_root=local_serving_document_root) return HttpResponseForbidden()
def storage(self): # maybe task has storage link storage_link = self.get_storage_link() if storage_link: return storage_link.storage # or try global storage settings (only s3 for now) elif get_env('USE_DEFAULT_S3_STORAGE', default=False, is_bool=True): # TODO: this is used to access global environment storage settings. # We may use more than one and non-default S3 storage (like GCS, Azure) from io_storages.s3.models import S3ImportStorage return S3ImportStorage()
def _get_task_storage(self): # maybe task has storage link storage_link = self.get_storage_link() if storage_link: return storage_link.storage # or try global storage settings (only s3 for now) # negated it as a hack, elif ~get_env('get_env', default=False, is_bool=True): logger.info("got the value") # TODO: this is used to access global environment storage settings. # We may use more than one and non-default S3 storage (like GCS, Azure) from io_storages.gcs.models import GCSImportStorage return GCSImportStorage()
def _migrate_uploaded_files(project, project_path): """Migrate files uploaded by user""" source_upload_path = project_path / 'upload' if not source_upload_path.exists(): return target_upload_path = pathlib.Path(get_env('LABEL_STUDIO_BASE_DATA_DIR', get_data_dir())) / 'upload' if not target_upload_path.exists(): os.makedirs(str(target_upload_path), exist_ok=True) src_files = os.listdir(str(source_upload_path)) for file_name in src_files: full_file_name = os.path.join(str(source_upload_path), file_name) with open(full_file_name, 'rb') as f: FileUpload.objects.create(user=project.created_by, project=project, file=File(f, name=file_name))
def get_data(self, key): path = Path(self.path) / key if self.use_blob_urls: # include self-hosted links pointed to local resources via /data/filename?d=<path/to/local/dir> document_root = Path( get_env('LOCAL_FILES_DOCUMENT_ROOT', default='/')) relative_path = str(path.relative_to(document_root)) return { settings.DATA_UNDEFINED_NAME: f'/data/local-files/?d={relative_path}' } with open(path) as f: value = json.load(f) if not isinstance(value, dict): raise ValueError( f"Error on key {key}: For {self.__class__.__name__} your JSON file must be a dictionary with one task." ) # noqa return value
Django Base settings for Label Studio. For more information on this file, see https://docs.djangoproject.com/en/3.1/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/3.1/ref/settings/ """ import os import re from core.utils.io import get_data_dir from core.utils.params import get_bool_env, get_env # Hostname is used for proper path generation to the resources, pages, etc HOSTNAME = get_env('HOST', '') if HOSTNAME: if not HOSTNAME.startswith('http://') and not HOSTNAME.startswith('https://'): print("! HOST variable found in environment, but it must start with http:// or https://, ignore it:", HOSTNAME) HOSTNAME = '' else: print("=> Hostname correctly is set to:", HOSTNAME) if HOSTNAME.endswith('/'): HOSTNAME = HOSTNAME[0:-1] # for django url resolver if HOSTNAME: # http[s]://domain.com:8080/script_name => /script_name pattern = re.compile(r'^http[s]?:\/\/([^:\/\s]+(:\d*)?)(.*)?') match = pattern.match(HOSTNAME) FORCE_SCRIPT_NAME = match.group(3)
def get_account_key(self): return str(self.account_key) if self.account_key else get_env( 'AZURE_BLOB_ACCOUNT_KEY')
def get_account_name(self): return str(self.account_name) if self.account_name else get_env( 'AZURE_BLOB_ACCOUNT_NAME')