def download_experiment(request, experiment_id, comptype): """ takes string parameter "comptype" for compression method. Currently implemented: "zip" and "tar" """ # TODO: intelligent selection of temp file versus in-memory buffering. datafiles = Dataset_File.objects\ .filter(dataset__experiments__id=experiment_id) rootdir = str(experiment_id) msg = _check_download_limits(rootdir, datafiles, comptype) if msg: return render_error_message(request, 'Requested download is too large: %s' % msg, status=403) if comptype == "tar": reader = StreamingFile(_write_tar_func(rootdir, datafiles), asynchronous_file_creation=True) response = HttpResponse(FileWrapper(reader), mimetype='application/x-tar') response['Content-Disposition'] = 'attachment; filename="experiment' \ + rootdir + '-complete.tar"' elif comptype == "zip": reader = StreamingFile(_write_zip_func(rootdir, datafiles), asynchronous_file_creation=True) response = HttpResponse(FileWrapper(reader), mimetype='application/zip') response['Content-Disposition'] = 'attachment; filename="experiment' \ + rootdir + '-complete.zip"' else: response = render_error_message(request, 'Unsupported download format: %s' % comptype, status=404) return response
def download_experiment(request, experiment_id, comptype, organization='classic'): """ takes string parameter "comptype" for compression method. Currently implemented: "zip" and "tar" """ # TODO: intelligent selection of temp file versus in-memory buffering. datafiles = Dataset_File.objects\ .filter(dataset__experiments__id=experiment_id) rootdir = str(experiment_id) mapper = _make_mapper(organization, rootdir) if not mapper: return render_error_message( request, 'Unknown download organization: %s' % organization, status=400) msg = _check_download_limits(mapper, datafiles, comptype) if msg: return render_error_message( request, 'Cannot download: %s' % msg, status=400) try: if comptype == "tar": reader = StreamingFile(_write_tar_func(mapper, datafiles), asynchronous_file_creation=True) response = StreamingHttpResponse(FileWrapper(reader), mimetype='application/x-tar') response['Content-Disposition'] = \ 'attachment; filename="experiment' \ + rootdir + '-complete.tar"' elif comptype == "zip": reader = StreamingFile(_write_zip_func(mapper, datafiles), asynchronous_file_creation=True) response = StreamingHttpResponse(FileWrapper(reader), mimetype='application/zip') response['Content-Disposition'] = \ 'attachment; filename="experiment' \ + rootdir + '-complete.zip"' else: response = render_error_message( request, 'Unsupported download format: %s' % comptype, status=404) except ValueError: # raised when replica not verified TODO: custom excptn redirect = request.META.get('HTTP_REFERER', 'http://%s/' % request.META.get('HTTP_HOST')) message = """The experiment you are trying to access has not yet been verified completely. Verification is an automated background process. Please try again later or contact the system administrator if the issue persists.""" message = ' '.join(message.split()) # removes spaces redirect = redirect + '#error:' + message return HttpResponseRedirect(redirect) return response
def _streaming_downloader(request, datafiles, rootdir, filename, comptype='tgz', organization='deep-storage'): ''' private function to be called by wrappers creates download response with given files and names ''' mapper = _make_mapper(organization, rootdir) if not mapper: return render_error_message( request, 'Unknown download organization: %s' % organization, status=400) try: files = _get_datafile_details_for_archive(mapper, datafiles) tfs = UncachedTarStream( files, filename=filename, do_gzip=comptype != 'tar') return tfs.get_response() except ValueError: # raised when replica not verified TODO: custom excptn redirect = request.META.get('HTTP_REFERER', 'http://%s/' % request.META.get('HTTP_HOST')) message = """The experiment you are trying to access has not yet been verified completely. Verification is an automated background process. Please try again later or contact the system administrator if the issue persists.""" message = ' '.join(message.split()) # removes spaces redirect = redirect + '#error:' + message return HttpResponseRedirect(redirect)
def download_datafiles(request): """ takes string parameter "comptype" for compression method. Currently implemented: "zip" and "tar" The datafiles to be downloaded are selected using "datafile", "dataset" or "url" parameters. An "expid" parameter may be supplied for use in the download archive name. If "url" is used, the "expid" parameter is also used to limit the datafiles to be downloaded to a given experiment. """ # Create the HttpResponse object with the appropriate headers. # TODO: handle no datafile, invalid filename, all http links # TODO: intelligent selection of temp file versus in-memory buffering. logger.error('In download_datafiles !!') comptype = "zip" organization = "classic" if 'comptype' in request.POST: comptype = request.POST['comptype'] if 'organization' in request.POST: organization = request.POST['organization'] if 'datafile' in request.POST or 'dataset' in request.POST: if (len(request.POST.getlist('datafile')) > 0 \ or len(request.POST.getlist('dataset'))) > 0: datasets = request.POST.getlist('dataset') datafiles = request.POST.getlist('datafile') # Generator to produce datafiles from dataset id def get_dataset_datafiles(dsid): for datafile in Dataset_File.objects.filter(dataset=dsid): if has_datafile_download_access(request=request, dataset_file_id=datafile.id): yield datafile # Generator to produce datafile from datafile id def get_datafile(dfid): datafile = Dataset_File.objects.get(pk=dfid) if has_datafile_download_access(request=request, dataset_file_id=datafile.id): yield datafile # Take chained generators and turn them into a set of datafiles df_set = set(chain(chain.from_iterable(map(get_dataset_datafiles, datasets)), chain.from_iterable(map(get_datafile, datafiles)))) else: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) elif 'url' in request.POST: if not len(request.POST.getlist('url')) == 0: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) for url in request.POST.getlist('url'): url = urllib.unquote(url) raw_path = url.partition('//')[2] experiment_id = request.POST['expid'] datafile = Dataset_File.objects.filter(url__endswith=raw_path, dataset__experiment__id=experiment_id)[0] if has_datafile_download_access(request=request, dataset_file_id=datafile.id): df_set = set([datafile]) else: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) logger.info('Files for archive command: %s' % df_set) if len(df_set) == 0: return render_error_message( request, 'You do not have download access for any of the ' 'selected Datasets or Datafiles ', status=403) rootdir = 'datasets' mapper = _make_mapper(organization, rootdir) if not mapper: return render_error_message( request, 'Unknown download organization: %s' % organization, status=400) msg = _check_download_limits(mapper, df_set, comptype) if msg: return render_error_message( request, 'Cannot download: %s' % msg, status=400) # Handle missing experiment ID - only need it for naming try: expid = request.POST['expid'] except KeyError: expid = iter(df_set).next().dataset.get_first_experiment().id if comptype == "tar": reader = StreamingFile(_write_tar_func(mapper, df_set), asynchronous_file_creation=True) response = HttpResponse(FileWrapper(reader), mimetype='application/x-tar') response['Content-Disposition'] = \ 'attachment; filename="experiment%s-selection.tar"' % expid elif comptype == "zip": reader = StreamingFile(_write_zip_func(mapper, df_set), asynchronous_file_creation=True) response = HttpResponse(FileWrapper(reader), mimetype='application/zip') response['Content-Disposition'] = \ 'attachment; filename="experiment%s-selection.zip"' % expid else: response = render_error_message( request, 'Unsupported download format: %s' % comptype, status=404) return response
def download_datafiles(request): """ takes string parameter "comptype" for compression method. Currently implemented: "zip" and "tar" The datafiles to be downloaded are selected using "datafile", "dataset" or "url" parameters. An "expid" parameter may be supplied for use in the download archive name. If "url" is used, the "expid" parameter is also used to limit the datafiles to be downloaded to a given experiment. """ # Create the HttpResponse object with the appropriate headers. # TODO: handle no datafile, invalid filename, all http links # TODO: intelligent selection of temp file versus in-memory buffering. logger.error('In download_datafiles !!') comptype = "zip" if 'comptype' in request.POST: comptype = request.POST['comptype'] if 'datafile' in request.POST or 'dataset' in request.POST: if (len(request.POST.getlist('datafile')) > 0 \ or len(request.POST.getlist('dataset'))) > 0: datasets = request.POST.getlist('dataset') datafiles = request.POST.getlist('datafile') # Generator to produce datafiles from dataset id def get_dataset_datafiles(dsid): for datafile in Dataset_File.objects.filter(dataset=dsid): if has_datafile_download_access( request=request, dataset_file_id=datafile.id): yield datafile # Generator to produce datafile from datafile id def get_datafile(dfid): datafile = Dataset_File.objects.get(pk=dfid) if has_datafile_download_access(request=request, dataset_file_id=datafile.id): yield datafile # Take chained generators and turn them into a set of datafiles df_set = set( chain( chain.from_iterable(map(get_dataset_datafiles, datasets)), chain.from_iterable(map(get_datafile, datafiles)))) else: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) elif 'url' in request.POST: if not len(request.POST.getlist('url')) == 0: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) for url in request.POST.getlist('url'): url = urllib.unquote(url) raw_path = url.partition('//')[2] experiment_id = request.POST['expid'] datafile = Dataset_File.objects.filter( url__endswith=raw_path, dataset__experiment__id=experiment_id)[0] if has_datafile_download_access(request=request, dataset_file_id=datafile.id): df_set = set([datafile]) else: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) logger.info('Files for archive command: %s' % df_set) if len(df_set) == 0: return render_error_message( request, 'You do not have download access for any of the ' 'selected Datasets or Datafiles ', status=403) rootdir = 'datasets' msg = _check_download_limits(rootdir, df_set, comptype) if msg: return render_error_message(request, 'Requested download is too large: %s' % msg, status=403) # Handle missing experiment ID - only need it for naming try: expid = request.POST['expid'] except KeyError: expid = iter(df_set).next().dataset.get_first_experiment().id if comptype == "tar": reader = StreamingFile(_write_tar_func(rootdir, df_set), asynchronous_file_creation=True) response = HttpResponse(FileWrapper(reader), mimetype='application/x-tar') response['Content-Disposition'] = \ 'attachment; filename="experiment%s-selection.tar"' % expid elif comptype == "zip": reader = StreamingFile(_write_zip_func(rootdir, df_set), asynchronous_file_creation=True) response = HttpResponse(FileWrapper(reader), mimetype='application/zip') response['Content-Disposition'] = \ 'attachment; filename="experiment%s-selection.zip"' % expid else: response = render_error_message(request, 'Unsupported download format: %s' % comptype, status=404) return response
def streaming_download_datafiles(request): """ takes string parameter "comptype" for compression method. Currently implemented: "tgz" and "tar" The datafiles to be downloaded are selected using "datafile", "dataset" or "url" parameters. An "expid" parameter may be supplied for use in the download archive name. If "url" is used, the "expid" parameter is also used to limit the datafiles to be downloaded to a given experiment. """ # Create the HttpResponse object with the appropriate headers. # TODO: handle no datafile, invalid filename, all http links # TODO: intelligent selection of temp file versus in-memory buffering. logger.error('In download_datafiles !!') comptype = getattr(settings, 'DEFAULT_ARCHIVE_FORMATS', ['tar'])[0] organization = getattr(settings, 'DEFAULT_ARCHIVE_ORGANIZATION', 'classic') if 'comptype' in request.POST: comptype = request.POST['comptype'] if 'organization' in request.POST: organization = request.POST['organization'] if 'datafile' in request.POST or 'dataset' in request.POST: if (len(request.POST.getlist('datafile')) > 0 or len(request.POST.getlist('dataset'))) > 0: datasets = request.POST.getlist('dataset') datafiles = request.POST.getlist('datafile') # Generator to produce datafiles from dataset id def get_dataset_datafiles(dsid): for datafile in Dataset_File.objects.filter(dataset=dsid): if has_datafile_download_access( request=request, dataset_file_id=datafile.id): yield datafile # Generator to produce datafile from datafile id def get_datafile(dfid): datafile = Dataset_File.objects.get(pk=dfid) if has_datafile_download_access(request=request, dataset_file_id=datafile.id): yield datafile # Take chained generators and turn them into a set of datafiles df_set = set(chain(chain.from_iterable(map(get_dataset_datafiles, datasets)), chain.from_iterable(map(get_datafile, datafiles)))) else: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) elif 'url' in request.POST: if not len(request.POST.getlist('url')) == 0: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) for url in request.POST.getlist('url'): url = urllib.unquote(url) raw_path = url.partition('//')[2] experiment_id = request.POST['expid'] datafile = Dataset_File.objects.filter( url__endswith=raw_path, dataset__experiment__id=experiment_id)[0] if has_datafile_download_access(request=request, dataset_file_id=datafile.id): df_set = set([datafile]) else: return render_error_message( request, 'No Datasets or Datafiles were selected for downloaded', status=404) logger.info('Files for archive command: %s' % df_set) if len(df_set) == 0: return render_error_message( request, 'You do not have download access for any of the ' 'selected Datasets or Datafiles ', status=403) try: expid = request.POST['expid'] experiment = Experiment.objects.get(id=expid) except (KeyError, Experiment.DoesNotExist): experiment = iter(df_set).next().dataset.get_first_experiment() filename = '%s-selection.tar' % experiment.title.replace(' ', '_') rootdir = '%s-selection' % experiment.title.replace(' ', '_') return _streaming_downloader(request, df_set, rootdir, filename, comptype, organization)