Exemplo n.º 1
0
def test_upload_record_exists(mock_cli):
    url = mock_cli.get_globus_http_url('my_folder/test_file_zero_length.txt')
    sub = mock_cli.get_subject_url('my_folder')
    meta = scrape_metadata(EMPTY_TEST_FILE, url, mock_cli.profile, 'foo')
    entry = {'content': [meta], 'subject': sub}
    mock_cli.list_entries = Mock(return_value=[entry])
    with pytest.raises(exc.RecordExists):
        mock_cli.upload(SMALL_TEST_FILE, 'my_folder')
Exemplo n.º 2
0
def test_no_update_needed(mock_cli, mock_transfer_log, mock_search_client):
    basen = os.path.basename(EMPTY_TEST_FILE)
    url = mock_cli.get_globus_http_url(basen)
    meta = scrape_metadata(EMPTY_TEST_FILE, url, mock_cli.profile,
                           'foo-project')
    entry = {'content': [meta], 'subject': mock_cli.get_subject_url(basen)}
    mock_cli.list_entries = Mock(return_value=[entry])
    mock_cli.upload(EMPTY_TEST_FILE, '/', update=True)
    assert not mock_search_client.ingest.called
    assert not mock_transfer_log.called
Exemplo n.º 3
0
def test_upload_record_exists(mock_cli, mock_search_results):
    base_name = os.path.basename(EMPTY_TEST_FILE)
    url = mock_cli.get_globus_http_url(base_name)
    sub = mock_cli.get_subject_url(base_name)
    meta = scrape_metadata(EMPTY_TEST_FILE, url, mock_cli.profile,
                           'foo-project')
    mock_search_results['gmeta'][0]['content'][0] = meta
    mock_search_results['gmeta'][0]['subject'] = sub
    mock_cli.list_entries = Mock(return_value=mock_search_results['gmeta'])

    result = CliRunner().invoke(upload, [EMPTY_TEST_FILE, '/', '--no-gcp'])
    assert result.exit_code == ExitCodes.RECORD_EXISTS
Exemplo n.º 4
0
def test_no_update_needed(mock_cli, mock_search_results):
    base_name = os.path.basename(EMPTY_TEST_FILE)
    url = mock_cli.get_globus_http_url(base_name)
    sub = mock_cli.get_subject_url(base_name)
    meta = scrape_metadata(EMPTY_TEST_FILE, url, mock_cli.profile,
                           'foo-project')
    mock_search_results['gmeta'][0]['content'][0] = meta
    mock_search_results['gmeta'][0]['subject'] = sub
    mock_cli.list_entries = Mock(return_value=mock_search_results['gmeta'])
    result = CliRunner().invoke(upload,
                                [EMPTY_TEST_FILE, '/', '--no-gcp', '-u'])
    assert result.exit_code == 0
    assert 'Files and search entry are an exact match.' in result.output
Exemplo n.º 5
0
def upload(dataframe, destination, metadata, gcp, update, test, dry_run,
           verbose, no_analyze):
    """
    Create a search entry and upload this file to the GCS Endpoint.

    # TODO: Fault tolerance for interrupted or failed file uploads (rollback)
    """
    pc = pilot.commands.get_pilot_client()
    if not pc.is_logged_in():
        click.echo('You are not logged in.')
        return

    if test:
        click.secho('Using test location: {}'.format(pc.TESTING_DIR),
                    fg='yellow')
        click.secho('Using test index for Globus Search', fg='yellow')

    if not destination:
        path = pc.get_path('', '', test)
        dirs = pc.ls('', '', test)
        click.echo('No Destination Provided. Please select one from the '
                   'directory "{}":\n{}'.format(path, '\t '.join(dirs)))
        return

    try:
        pc.ls(dataframe, destination, test)
    except globus_sdk.exc.TransferAPIError as tapie:
        if tapie.code == 'ClientError.NotFound':
            url = pc.get_globus_app_url('', test)
            click.secho('Directory does not exist: "{}"\nPlease create it at: '
                        '{}'.format(destination, url),
                        err=True,
                        bg='red')
            return 1
        else:
            click.secho(tapie.message, err=True, bg='red')
            return 1

    if metadata is not None:
        with open(metadata) as mf_fh:
            user_metadata = json.load(mf_fh)
    else:
        user_metadata = {}

    filename = os.path.basename(dataframe)
    prev_metadata = pc.get_search_entry(filename, destination, test)

    url = pc.get_globus_http_url(filename, destination, test)
    new_metadata = scrape_metadata(dataframe, url, no_analyze, test)

    try:
        new_metadata = update_metadata(new_metadata, prev_metadata,
                                       user_metadata)
        subject = pc.get_subject_url(filename, destination, test)
        gmeta = gen_gmeta(subject, pc.GROUP, new_metadata)
    except (RequiredUploadFields, ValidationError) as e:
        click.secho('Error Validating Metadata: {}'.format(e), fg='red')
        return 1

    if json.dumps(new_metadata) == json.dumps(prev_metadata):
        click.secho(
            'Files and search entry are an exact match. No update '
            'necessary.',
            fg='green')
        return 1

    if prev_metadata and not update:
        last_updated = prev_metadata['dc']['dates'][-1]['date']
        dt = datetime.datetime.strptime(last_updated, '%Y-%m-%dT%H:%M:%S.%fZ')
        click.echo('Existing record found for {}, specify -u to update.\n'
                   'Last updated: {: %A, %b %d, %Y}'
                   ''.format(filename, dt))
        return 1

    if dry_run:
        click.echo('Success! (Dry Run -- No changes made.)')
        click.echo(
            'Pre-existing record: {}'.format('yes' if prev_metadata else 'no'))
        click.echo('Version: {}'.format(new_metadata['dc']['version']))
        click.echo('Search Subject: {}\nURL: {}'.format(subject, url))
        if verbose:
            click.echo('Ingesting the following data:')
            click.echo(json.dumps(new_metadata, indent=2))
        return

    click.echo('Ingesting record into search...')
    pc.ingest_entry(gmeta, test)
    click.echo('Success!')

    if prev_metadata and not files_modified(new_metadata['files'],
                                            prev_metadata['files']):
        click.echo('Metadata updated, dataframe is already up to date.')
        return
    if gcp:
        local_ep = globus_sdk.LocalGlobusConnectPersonal().endpoint_id
        if not local_ep:
            raise Exception('No local GCP client found')
        auth = pc.get_authorizers()['transfer.api.globus.org']
        tc = globus_sdk.TransferClient(authorizer=auth)
        tdata = globus_sdk.TransferData(tc,
                                        local_ep,
                                        pc.ENDPOINT,
                                        label='{} Transfer'.format(
                                            pc.APP_NAME),
                                        notify_on_succeeded=False,
                                        sync_level='checksum',
                                        encrypt_data=True)
        path = pc.get_path(filename, destination, test)
        tdata.add_item(dataframe, path)
        click.echo('Starting Transfer...')
        transfer_result = tc.submit_transfer(tdata)
        short_path = os.path.join(destination, filename)
        pilot.config.config.add_transfer_log(transfer_result, short_path)
        click.echo('{}. You can check the status below: \n'
                   'https://app.globus.org/activity/{}/overview\n'
                   'URL will be: {}'.format(transfer_result['message'],
                                            transfer_result['task_id'], url))
    else:
        click.echo('Uploading data...')
        response = pc.upload(dataframe, destination, test)
        if response.status_code == 200:
            click.echo('Upload Successful! URL is \n{}'.format(url))
        else:
            click.echo('Failed with status code: {}'.format(
                response.status_code))