def initialize_new_upload(data_config, access_token, description=None, source_dir='.'): # TODO: hit upload server to check for liveness before moving on data_config.set_tarball_path(None) data_config.set_data_endpoint(None) data_config.set_resource_id(None) namespace = data_config.namespace or access_token.username data_name = "{}/{}".format(namespace, data_config.name) # Create tarball of the data using the ID returned from the API # TODO: allow to the users to change directory for the compression temp_dir = tempfile.mkdtemp() tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz") floyd_logger.debug("Creating tarfile with contents of current directory: %s", tarball_path) floyd_logger.info("Compressing data...") # TODO: purge tarball on Ctrl-C create_tarfile(source_dir=source_dir, filename=tarball_path) # If starting a new upload fails for some reason down the line, we don't # want to re-tar, so save off the tarball path now data_config.set_tarball_path(tarball_path) DataConfigManager.set_config(data_config) # Create data object using API data = DataRequest(name=data_name, description=description, family_id=data_config.family_id, data_type='gzip') data_info = DataClient().create(data) if not data_info: rmtree(temp_dir) sys.exit(1) data_config.set_data_id(data_info['id']) data_config.set_data_name(data_info['name']) DataConfigManager.set_config(data_config) # fetch auth token for upload server creds = DataClient().new_tus_credentials(data_info['id']) if not creds: # TODO: delete module from server? rmtree(temp_dir) sys.exit(1) data_resource_id = creds[0] data_endpoint = TusDataClient().initialize_upload( tarball_path, metadata={"filename": data_resource_id}, auth=creds) if not data_endpoint: # TODO: delete module from server? floyd_logger.error("Failed to get upload URL from Floydhub!") rmtree(temp_dir) sys.exit(1) data_config.set_data_endpoint(data_endpoint) DataConfigManager.set_config(data_config)
def create(self, data): """ Create a temporary directory for the tar file that will be removed at the end of the operation. """ with tempfile.TemporaryDirectory() as temp_directory: floyd_logger.info("Compressing data ...") compressed_file_path = os.path.join(temp_directory, "data.tar.gz") # Create tarfile floyd_logger.debug("Creating tarfile with contents of current directory: {}".format(compressed_file_path)) create_tarfile(source_dir='.', filename=compressed_file_path) total_file_size = os.path.getsize(compressed_file_path) floyd_logger.info("Creating data source. Total upload size: {}".format(sizeof_fmt(total_file_size))) floyd_logger.info("Uploading compressed data ...") # Add request data request_data = [] request_data.append(("data", ('data.tar', open(compressed_file_path, 'rb'), 'text/plain'))) request_data.append(("json", json.dumps(data.to_dict()))) multipart_encoder = MultipartEncoder( fields=request_data ) # Attach progress bar progress_callback = create_progress_callback(multipart_encoder) multipart_encoder_monitor = MultipartEncoderMonitor(multipart_encoder, progress_callback) response = self.request("POST", self.url, data=multipart_encoder_monitor, headers={"Content-Type": multipart_encoder.content_type}, timeout=3600) floyd_logger.info("Done") return response.json().get("id")