Пример #1
0
    def create(self, module):
        try:
            upload_files, total_file_size = get_files_in_current_directory(
                file_type='code')
        except OSError:
            sys.exit(
                "Directory contains too many files to upload. If you have data files in the current directory, "
                "please upload them separately using \"floyd data\" command and remove them from here.\n"
                "See http://docs.floydhub.com/faqs/job/#i-get-too-many-open-files-error-when-i-run-my-project "
                "for more details on how to fix this.")

        if total_file_size > self.MAX_UPLOAD_SIZE:
            sys.exit((
                "Code size too large to sync, please keep it under %s.\n"
                "If you have data files in the current directory, please upload them "
                "separately using \"floyd data\" command and remove them from here.\n"
                "You may find the following documentation useful:\n\n"
                "\thttps://docs.floydhub.com/guides/create_and_upload_dataset/\n"
                "\thttps://docs.floydhub.com/guides/data/mounting_data/\n"
                "\thttps://docs.floydhub.com/guides/floyd_ignore/") %
                     (sizeof_fmt(self.MAX_UPLOAD_SIZE)))

        floyd_logger.info("Creating project run. Total upload size: %s",
                          sizeof_fmt(total_file_size))
        floyd_logger.debug("Creating module. Uploading: %s files",
                           len(upload_files))
        floyd_logger.info("Syncing code ...")

        # Add request data
        upload_files.append(("json", json.dumps(module.to_dict())))
        multipart_encoder = MultipartEncoder(fields=upload_files)

        # Attach progress bar
        progress_callback, bar = create_progress_callback(multipart_encoder)
        multipart_encoder_monitor = MultipartEncoderMonitor(
            multipart_encoder, progress_callback)

        try:
            response = self.request(
                "POST",
                self.url,
                data=multipart_encoder_monitor,
                headers={"Content-Type": multipart_encoder.content_type},
                timeout=3600)
        finally:
            # always make sure we clear the console
            bar.done()
        return response.json().get("id")
Пример #2
0
    def create(self, data):
        """
        Create a temporary directory for the tar file that will be removed at the
        end of the operation.
        """
        with tempfile.TemporaryDirectory() as temp_directory:
            floyd_logger.info("Compressing data ...")
            compressed_file_path = os.path.join(temp_directory, "data.tar.gz")

            # Create tarfile
            floyd_logger.debug("Creating tarfile with contents of current directory: {}".format(compressed_file_path))
            create_tarfile(source_dir='.', filename=compressed_file_path)

            total_file_size = os.path.getsize(compressed_file_path)
            floyd_logger.info("Creating data source. Total upload size: {}".format(sizeof_fmt(total_file_size)))
            floyd_logger.info("Uploading compressed data ...")

            # Add request data
            request_data = []
            request_data.append(("data", ('data.tar', open(compressed_file_path, 'rb'), 'text/plain')))
            request_data.append(("json", json.dumps(data.to_dict())))

            multipart_encoder = MultipartEncoder(
                fields=request_data
            )

            # Attach progress bar
            progress_callback = create_progress_callback(multipart_encoder)
            multipart_encoder_monitor = MultipartEncoderMonitor(multipart_encoder, progress_callback)

            response = self.request("POST",
                                    self.url,
                                    data=multipart_encoder_monitor,
                                    headers={"Content-Type": multipart_encoder.content_type},
                                    timeout=3600)

            floyd_logger.info("Done")
            return response.json().get("id")
Пример #3
0
def complete_upload(data_config):
    data_endpoint = data_config.data_endpoint
    data_id = data_config.data_id
    tarball_path = data_config.tarball_path

    if not data_id:
        floyd_logger.error("Corrupted upload state, please start a new one.")
        sys.exit(1)

    # check for tarball upload, upload to server if not done
    if not data_config.resource_id and (tarball_path and data_endpoint):
        floyd_logger.debug("Getting fresh upload credentials")
        creds = DataClient().new_tus_credentials(data_id)
        if not creds:
            sys.exit(1)

        file_size = os.path.getsize(tarball_path)
        # check for upload limit dimension
        if file_size > MAX_UPLOAD_SIZE:
            try:
                floyd_logger.info("Removing compressed data...")
                rmtree(os.path.dirname(tarball_path))
            except (OSError, TypeError):
                pass

            sys.exit(("Data size too large to upload, please keep it under %s.\n") %
                     (sizeof_fmt(MAX_UPLOAD_SIZE)))

        floyd_logger.info("Uploading compressed data. Total upload size: %s",
                          sizeof_fmt(file_size))
        tus_client = TusDataClient()
        if not tus_client.resume_upload(tarball_path, data_endpoint, auth=creds):
            floyd_logger.error("Failed to finish upload!")
            return

        try:
            floyd_logger.info("Removing compressed data...")
            rmtree(os.path.dirname(tarball_path))
        except (OSError, TypeError):
            pass

        floyd_logger.debug("Created data with id : %s", data_id)
        floyd_logger.info("Upload finished.")

        # Update data config
        data_config.set_tarball_path(None)
        data_config.set_data_endpoint(None)
        data_source = DataClient().get(data_id)
        data_config.set_resource_id(data_source.resource_id)
        DataConfigManager.set_config(data_config)

    # data tarball uploaded, check for server untar
    if data_config.resource_id:
        floyd_logger.info(
            "Waiting for server to unpack data.\n"
            "You can exit at any time and come back to check the status with:\n"
            "\tfloyd data upload -r")
        try:
            for i in dots(ResourceWaitIter(data_config.resource_id),
                          label='Waiting for unpack...'):
                pass
        except WaitTimeoutException:
            clint_STREAM.write('\n')
            clint_STREAM.flush()
            floyd_logger.info(
                "Looks like it is going to take longer for Floydhub to unpack "
                "your data. Please check back later.")
            sys.exit(1)
        else:
            data_config.set_resource_id(None)
            data_config.set_tarball_path(None)
            data_config.set_data_endpoint(None)
            data_config.set_resource_id(None)
            data_config.set_data_id(None)
            DataConfigManager.set_config(data_config)

    # Print output
    table_output = [["NAME"],
                    [normalize_data_name(data_config.data_name)]]
    floyd_logger.info('')
    floyd_logger.info(tabulate(table_output, headers="firstrow"))