Пример #1
0
def abort_previous_upload(data_config):
    if data_config.tarball_path and os.path.exists(data_config.tarball_path):
        rmtree(os.path.dirname(data_config.tarball_path))

    data_config.set_tarball_path("")
    data_config.set_data_endpoint("")
    DataConfigManager.set_config(data_config)
Пример #2
0
def upload():
    """
    Upload data in the current dir to Floyd.
    """
    data_config = DataConfigManager.get_config()
    access_token = AuthConfigManager.get_access_token()
    version = data_config.version

    # Create data object
    data_name = "{}/{}:{}".format(access_token.username, data_config.name,
                                  version)
    data = DataRequest(name=data_name, description=version, version=version)
    data_id = DataClient().create(data)
    floyd_logger.debug("Created data with id : {}".format(data_id))
    floyd_logger.info("Upload finished")

    # Update expt config including predecessor
    data_config.increment_version()
    data_config.set_data_predecessor(data_id)
    DataConfigManager.set_config(data_config)

    # Print output
    table_output = [["DATA ID", "NAME", "VERSION"],
                    [data_id, data_name, version]]
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
Пример #3
0
def init(dataset_name):
    """
    Initialize a new dataset at the current dir.
    After init ensure that your data files are in this directory.
    Then you can upload them to Floyd. Example:

        floyd data upload
    """
    dataset_obj = DatasetClient().get_by_name(dataset_name)
    if not dataset_obj:
        create_dataset_base_url = "{}/datasets/create".format(
            floyd.floyd_web_host)
        create_dataset_url = "{}?name={}".format(create_dataset_base_url,
                                                 dataset_name)
        floyd_logger.error(
            ("Dataset name does not match your list of datasets. "
             "Create your new dataset in the web dashboard:\n\t%s"),
            create_dataset_base_url)
        webbrowser.open(create_dataset_url)
        return

    data_config = DataConfig(name=dataset_name, family_id=dataset_obj.id)
    DataConfigManager.set_config(data_config)
    floyd_logger.info(
        "Data source \"{}\" initialized in current directory".format(
            dataset_name))
    floyd_logger.info("""
    You can now upload your data to Floyd by:
        floyd data upload
    """)
Пример #4
0
def init(dataset_name):
    """
    Initialize a new dataset at the current dir.

    Then run the upload command to copy all the files in this
    directory to FloydHub.

        floyd data upload
    """
    dataset_obj = DatasetClient().get_by_name(dataset_name)

    if not dataset_obj:
        namespace, name = get_namespace_from_name(dataset_name)
        create_dataset_base_url = "{}/datasets/create".format(
            floyd.floyd_web_host)
        create_dataset_url = "{}?name={}&namespace={}".format(
            create_dataset_base_url, name, namespace)
        floyd_logger.info(
            ("Dataset name does not match your list of datasets. "
             "Create your new dataset in the web dashboard:\n\t%s"),
            create_dataset_base_url)
        webbrowser.open(create_dataset_url)

        name = click.prompt(
            'Press ENTER to use dataset name "%s" or enter a different name' %
            dataset_name,
            default=dataset_name,
            show_default=False)

        dataset_name = name.strip() or dataset_name
        dataset_obj = DatasetClient().get_by_name(dataset_name)

        if not dataset_obj:
            raise FloydException(
                'Dataset "%s" does not exist on floydhub.com. Ensure it exists before continuing.'
                % dataset_name)

    namespace, name = get_namespace_from_name(dataset_name)
    data_config = DataConfig(name=name,
                             namespace=namespace,
                             family_id=dataset_obj.id)
    DataConfigManager.set_config(data_config)
    floyd_logger.info(
        "Data source \"{}\" initialized in current directory".format(
            dataset_name))
    floyd_logger.info("""
    You can now upload your data to Floyd by:
        floyd data upload
    """)
Пример #5
0
def init(name):
    """
    Initialize a new data upload.
    After init ensure that your data files are in this directory.
    Then you can upload them to Floyd. Example:

        floyd data upload
    """
    data_config = DataConfig(name=name, family_id=generate_uuid())
    DataConfigManager.set_config(data_config)
    floyd_logger.info("Data source \"{}\" initialized in current directory".format(name))
    floyd_logger.info("""
    You can now upload your data to Floyd by:
        floyd data upload
    """)
Пример #6
0
 def add_data(self, source):
     data_config = DataConfigManager.get_config()
     dataset_id = data_config.family_id
     if not dataset_id:
         sys.exit(
             'Please initialize current directory with \'floyd data init DATASET_NAME\' first.'
         )
     re = self.request('POST',
                       '%s/%s' % (self.url, dataset_id),
                       json={'source': source})
     return re.json()
Пример #7
0
def upload(resume, tar_file, message):
    """
    Upload data in the current dir to Floyd.
    """
    data_config = DataConfigManager.get_config()

    if not upload_is_resumable(data_config) or not opt_to_resume(resume):
        abort_previous_upload(data_config)
        access_token = AuthConfigManager.get_access_token()
        initialize_new_upload(data_config, access_token, tar_file, message)

    complete_upload(data_config)
Пример #8
0
def initialize_new_upload(data_config, access_token, description=None, source_dir='.'):
    # TODO: hit upload server to check for liveness before moving on
    data_config.set_tarball_path(None)
    data_config.set_data_endpoint(None)
    data_config.set_resource_id(None)

    namespace = data_config.namespace or access_token.username
    data_name = "{}/{}".format(namespace, data_config.name)

    # Create tarball of the data using the ID returned from the API
    # TODO: allow to the users to change directory for the compression
    temp_dir = tempfile.mkdtemp()
    tarball_path = os.path.join(temp_dir, "floydhub_data.tar.gz")

    floyd_logger.debug("Creating tarfile with contents of current directory: %s",
                       tarball_path)
    floyd_logger.info("Compressing data...")

    # TODO: purge tarball on Ctrl-C
    create_tarfile(source_dir=source_dir, filename=tarball_path)

    # If starting a new upload fails for some reason down the line, we don't
    # want to re-tar, so save off the tarball path now
    data_config.set_tarball_path(tarball_path)
    DataConfigManager.set_config(data_config)

    # Create data object using API
    data = DataRequest(name=data_name,
                       description=description,
                       family_id=data_config.family_id,
                       data_type='gzip')
    data_info = DataClient().create(data)
    if not data_info:
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_id(data_info['id'])
    data_config.set_data_name(data_info['name'])
    DataConfigManager.set_config(data_config)

    # fetch auth token for upload server
    creds = DataClient().new_tus_credentials(data_info['id'])
    if not creds:
        # TODO: delete module from server?
        rmtree(temp_dir)
        sys.exit(1)

    data_resource_id = creds[0]
    data_endpoint = TusDataClient().initialize_upload(
        tarball_path,
        metadata={"filename": data_resource_id},
        auth=creds)
    if not data_endpoint:
        # TODO: delete module from server?
        floyd_logger.error("Failed to get upload URL from Floydhub!")
        rmtree(temp_dir)
        sys.exit(1)

    data_config.set_data_endpoint(data_endpoint)
    DataConfigManager.set_config(data_config)
Пример #9
0
    def get_all(self):
        try:
            data_config = DataConfigManager.get_config()

            response = self.request("GET",
                                    self.url,
                                    params={
                                        "module_type": "data",
                                        "family_id": data_config.family_id
                                    })
            data_dict = response.json()
            return [Data.from_dict(data) for data in data_dict]
        except FloydException as e:
            floyd_logger.error("Error while retrieving data: %s", e.message)
            return []
Пример #10
0
def complete_upload(data_config):
    data_endpoint = data_config.data_endpoint
    data_id = data_config.data_id
    tarball_path = data_config.tarball_path

    if not data_id:
        floyd_logger.error("Corrupted upload state, please start a new one.")
        sys.exit(1)

    # check for tarball upload, upload to server if not done
    if not data_config.resource_id and (tarball_path and data_endpoint):
        floyd_logger.debug("Getting fresh upload credentials")
        creds = DataClient().new_tus_credentials(data_id)
        if not creds:
            sys.exit(1)

        file_size = os.path.getsize(tarball_path)
        # check for upload limit dimension
        if file_size > MAX_UPLOAD_SIZE:
            try:
                floyd_logger.info("Removing compressed data...")
                rmtree(os.path.dirname(tarball_path))
            except (OSError, TypeError):
                pass

            sys.exit(("Data size too large to upload, please keep it under %s.\n") %
                     (sizeof_fmt(MAX_UPLOAD_SIZE)))

        floyd_logger.info("Uploading compressed data. Total upload size: %s",
                          sizeof_fmt(file_size))
        tus_client = TusDataClient()
        if not tus_client.resume_upload(tarball_path, data_endpoint, auth=creds):
            floyd_logger.error("Failed to finish upload!")
            return

        try:
            floyd_logger.info("Removing compressed data...")
            rmtree(os.path.dirname(tarball_path))
        except (OSError, TypeError):
            pass

        floyd_logger.debug("Created data with id : %s", data_id)
        floyd_logger.info("Upload finished.")

        # Update data config
        data_config.set_tarball_path(None)
        data_config.set_data_endpoint(None)
        data_source = DataClient().get(data_id)
        data_config.set_resource_id(data_source.resource_id)
        DataConfigManager.set_config(data_config)

    # data tarball uploaded, check for server untar
    if data_config.resource_id:
        floyd_logger.info(
            "Waiting for server to unpack data.\n"
            "You can exit at any time and come back to check the status with:\n"
            "\tfloyd data upload -r")
        try:
            for i in dots(ResourceWaitIter(data_config.resource_id),
                          label='Waiting for unpack...'):
                pass
        except WaitTimeoutException:
            clint_STREAM.write('\n')
            clint_STREAM.flush()
            floyd_logger.info(
                "Looks like it is going to take longer for Floydhub to unpack "
                "your data. Please check back later.")
            sys.exit(1)
        else:
            data_config.set_resource_id(None)
            data_config.set_tarball_path(None)
            data_config.set_data_endpoint(None)
            data_config.set_resource_id(None)
            data_config.set_data_id(None)
            DataConfigManager.set_config(data_config)

    # Print output
    table_output = [["NAME"],
                    [normalize_data_name(data_config.data_name)]]
    floyd_logger.info('')
    floyd_logger.info(tabulate(table_output, headers="firstrow"))
Пример #11
0
def current_dataset_namespace():
    return DataConfigManager.get_config().namespace or current_username()
Пример #12
0
def current_dataset_name():
    return DataConfigManager.get_config().name