Exemplo n.º 1
0
def generate_base_submission():
    file_name = "base_submission.json"
    path = os.path.join(TEST_SUBMISSIONS_DIR, file_name)
    mdfcc = MDFConnectClient()  # service_instance is irrelevant
    mdfcc.create_dc_block(title="Base Deploy Testing Dataset",
                          authors="jgaff",
                          affiliations="UChicago")
    mdfcc.add_data_source(DATA_SOURCES)
    mdfcc.set_test(True)
    mdfcc.update = UPDATE_FLAG
    submission = mdfcc.get_submission()
    with open(path, 'w') as f:
        json.dump(submission, f)
    return {"success": True}
def mdf_check_status(source_id):
    """Check status of MDF Connect submission.

    Arguments:
        source_id (str): The source_id of the dataset to check. The source_id is returned
                from the submission call.
    """
    from mdf_connect_client import MDFConnectClient

    mdfcc = MDFConnectClient()
    status = mdfcc.check_status(source_id, raw=True)
    if not status["success"]:
        raise RuntimeError(status["error"])
    else:
        print(status["status"]["status_message"])
        return status["status"]["status_message"]
Exemplo n.º 3
0
                         sample.growing_steps))))
        if len(list_of_sources):
            return list_of_sources[0]
        else:
            return ""
        return list(
            filter(
                None,
                list(map(lambda step: step.carbon_source,
                         sample.growing_steps))))[0]
    else:
        return ""


# Get a handle to MDF Client Connect
mdfcc = MDFConnectClient(test=True, service_instance="dev")

# This will prompt for a developer Token from Box
# See https://developer.box.com/docs/authenticate-with-developer-token
client = DevelopmentClient()

# Download a hardcoded, specific folder from box for now
mdf_folder = client.folder("50410951565").get()

staging_folder = client.folder("59557678760").get()

filepath = "../data"
var_map = pd.read_csv(os.path.join(filepath, 'varmap2.csv')).to_dict()
data = pd.read_csv(os.path.join(filepath,
                                'recipe_2018_11_26.csv')).iloc[:-1, :]
Exemplo n.º 4
0
def submit_test_submissions(service_instance,
                            submission_dir=TEST_SUBMISSIONS_DIR,
                            verbose=True):
    """Submit the test submissions to Connect.

    Arguments:
        service_instance (str): The instance of Connect to test (dev or prod).
        submission_dir (str): Path to a dir containing the files with submissions.
                Default TEST_SUBMISSIONS_DIR.
        verbose (bool): Should intermediate status messages be printed? Default True.

    Returns:
        dict: The testing results.
            success (bool): True iff all the tests succeeded.
            passed (list of str): The source_ids that passed testing.
            failed (list of dicts): The testing failures, with details.

    Note:
        If the source_name of a submission contains a keyword flag,
        it will be processed differently.
    Keyword flags include:
        "fail": The test is a success if the submission fails, and vice-versa.
    """
    mdfcc = MDFConnectClient(service_instance=service_instance)
    source_ids = []
    successes = []
    failures = []
    curated = []
    for file_name in os.listdir(submission_dir):
        path = os.path.join(submission_dir, file_name)
        if verbose:
            print("Submitting", file_name)
        with open(path) as f:
            submission = json.load(f)
        submit_res = mdfcc.submit_dataset(submission=submission)
        if not submit_res["success"]:
            if verbose:
                print("Error:", submit_res["error"])
            failures.append(submit_res)
            continue
        source_ids.append(submit_res["source_id"])
        if verbose:
            print(submit_res["source_id"], "submitted")
    if verbose:
        print("All tests submitted.")
    # Check statuses until finished
    while len(source_ids) > 0:
        sid = source_ids.pop(0)

        # If submission requires curation, curate it
        # Don't double-curate - this can cause issues
        if (mdfcc.get_curation_task(sid, raw=True)["success"]
                and sid not in curated):
            # Reject "fail" submissions
            if "fail" in sid:
                curation_res = mdfcc.reject_curation_submission(
                    sid, reason="Intentional failure", prompt=False, raw=True)
            # Accept all other submissions
            else:
                curation_res = mdfcc.accept_curation_submission(
                    sid, reason="Testing curation", prompt=False, raw=True)
            if not curation_res["success"]:
                if verbose:
                    print("Could not curate curation submission {}: {}".format(
                        sid, curation_res["error"]))
                failures.append(deepcopy(curation_res))
                # Skip status check - test has failed
                continue
            else:
                curated.append(sid)

        # Now check the current status
        status = mdfcc.check_status(sid, raw=True)
        if not status["success"]:
            if verbose:
                print("Could not fetch status for", sid, status["error"])
            # Re-queue source_id
            # Risk of infinite loop here if service is down; recommend using 'verbose' to catch
            source_ids.append(sid)
            continue
        status_res = validate_status(status["status"])
        # If failed or succeeded, put result in appropriate list
        if status_res["result"] == "failed":
            failures.append(deepcopy(status_res))
            if verbose:
                print(sid, "failed")
        elif status_res["result"] == "success":
            successes.append(sid)
            if verbose:
                print(sid, "passed")
        # Otherwise, is not finished and should be re-checked
        else:
            source_ids.append(sid)
    return {
        "success": (len(failures) == 0),
        "passed": successes,
        "failed": failures
    }
Exemplo n.º 5
0
def generate_base_failure():
    file_name = "base_failure.json"
    path = os.path.join(TEST_SUBMISSIONS_DIR, file_name)
    mdfcc = MDFConnectClient()  # service_instance is irrelevant
    mdfcc.create_dc_block(title="Base Deploy Fail Dataset",
                          authors="jgaff",
                          affiliations="UChicago")
    mdfcc.add_data_source(DATA_SOURCES)
    # Failure point: Submission not in nanomfg organization,
    # but attempting to write to nanomfg project block
    mdfcc.set_project_block("nanomfg", {"catalyst": "bad data testing"})
    mdfcc.set_test(True)
    mdfcc.update = UPDATE_FLAG
    submission = mdfcc.get_submission()
    with open(path, 'w') as f:
        json.dump(submission, f)
    return {"success": True}
Exemplo n.º 6
0
"""
Little script to force MDF to authenticate with Globus. This can be used
to generate a ~/.mdf/credentials folder on demand
"""
from mdf_connect_client import MDFConnectClient
mdfcc = MDFConnectClient(test=True, service_instance="prod")
Exemplo n.º 7
0
 def __init__(self):
     self.mdfcc = MDFConnectClient(test=True, service_instance="prod")
Exemplo n.º 8
0
class MDFAdaptor:
    def __init__(self):
        self.mdfcc = MDFConnectClient(test=True, service_instance="prod")

    def upload_recipe(self, recipe, box_file):
        experiment_date = datetime.now()

        self.mdfcc.create_dc_block(
            title="Graphene Synthesis Sample " + "TBD",
            authors=[
                "%s, %s" % (auth["last_name"], auth["first_name"])
                for auth in recipe.authors
            ],
            affiliations=[auth["institution"] for auth in recipe.authors],
            publication_year=recipe.experiment_year,
        )
        self.mdfcc.add_data_source(
            box_file.get_shared_link_download_url(access="open"))

        # Don't publish specific recipes. Later on, we will bundle datasets and
        # and publish an omnibus dataset
        # mdfcc.add_service("globus_publish")
        self.mdfcc.set_source_name(str(uuid.uuid4()))

        submission = self.mdfcc.get_submission()

        submission["projects"] = {}
        submission["projects"]["nanomfg"] = {
            "catalyst": recipe.catalyst,
            "max_temperature": recipe.max_temp(),
            "carbon_source": recipe.carbon_source(),
            "base_pressure": recipe.base_pressure
            # "sample_surface_area": recipe.sample_surface_area,
            # "sample_thickness": recipe.thickness,
            # "orientation": "",
            # "grain_size": ""
        }

        print("\n\n\n\n------>", submission)

        try:
            mdf_result = self.mdfcc.submit_dataset(submission=submission)
        except Exception as e:
            print("Exception submitting dataset to mdf ", str(e))
            raise MDFException(e)

        if not mdf_result["success"]:
            self.mdfcc.reset_submission()
            print("\n\n\n--->Error-----> " + mdf_result["error"])
            raise MDFException(mdf_result["error"])

        print("Submitted to MDF -----> " + str(mdf_result))
        self.mdfcc.reset_submission()
        return mdf_result["source_id"]

    def upload_raman_analysis(self, recipe, recipe_dataset_id, raman_set,
                              raman_box_file):

        self.mdfcc.create_dc_block(
            title="Graphene Synthesis Raman Analysis",
            authors=[
                "%s, %s" % (auth["last_name"], auth["first_name"])
                for auth in recipe.authors
            ],
            affiliations=[auth["institution"] for auth in recipe.authors],
            publication_year=recipe.experiment_year,
            related_dois=recipe_dataset_id,
        )
        self.mdfcc.add_data_source(
            raman_box_file.get_shared_link_download_url(access="open"))

        self.mdfcc.set_source_name(str(uuid.uuid4()))

        submission = self.mdfcc.get_submission()

        submission["projects"] = {}
        submission["projects"]["nanomfg"] = {
            "d_to_g": raman_set["d_to_g"],
            "gp_to_g": raman_set["gp_to_g"],
            "d_peak_shift": raman_set["d_peak_shift"],
            "d_peak_amplitude": raman_set["d_peak_amplitude"],
            "d_fwhm": raman_set["d_fwhm"],
            "g_peak_shift": raman_set["g_peak_shift"],
            "g_peak_amplitude": raman_set["g_peak_amplitude"],
            "g_fwhm": raman_set["g_fwhm"],
            "g_prime_peak_shift": raman_set["g_prime_peak_shift"],
            "g_prime_peak_amplitude": raman_set["g_prime_peak_amplitude"],
            "g_prime_fwhm": raman_set["g_prime_fwhm"],
        }

        print("\n\n\n\n------>", submission)

        try:
            mdf_result = self.mdfcc.submit_dataset(submission=submission)
        except Exception as e:
            print("Exception submitting raman analysis dataset to mdf ",
                  str(e))
            raise MDFException(e)

        if not mdf_result["success"]:
            self.mdfcc.reset_submission()
            print("\n\n\n--->Error-----> " + mdf_result["error"])
            raise MDFException(mdf_result["error"])

        print("Submitted raman analysis to MDF -----> " + str(mdf_result))
        self.mdfcc.reset_submission()
        return mdf_result["source_id"]
def mdf_submit(local_json_path,
               globus_uri,
               test=True,
               with_doi=True,
               update=False):
    """Submit dataset to MDF Connect.

    Arguments:
        local_json_path (str): The path to the local JSON metadata file.
        globus_uri (str): The URI to the Globus Endpoint and path, in the form:
                "globus://[endpoint id]/[path to data directory]"
        test (bool): Is this a test submission (test submissions generate test DOIs
                and populate the Search index "mdf-test" instead of "mdf")?
                Default True.
        with_doi (bool): Should a DOI be minted? (Includes test DOI.)
                Default True.
        update (bool): Has this submission been made before? If so, an update will be made
                to the previous submission. Test submissions and non-test submissions
                are separate.
    """
    import json
    import os
    from mdf_connect_client import MDFConnectClient

    mapping = {
        "custom.dynamic_mean_window_size": "xpcs.dynamic_mean_window_size",
        "custom.lld": "xpcs.lld",
        "custom.sigma": "xpcs.sigma",
        "custom.snophi": "xpcs.snophi",
        "custom.snoq": "xpcs.snoq"
    }
    mdfcc = MDFConnectClient()
    with open(local_json_path) as f:
        md = json.load(f)
    # DC block (title, authors, publisher, pub year, subjects)
    mdfcc.create_dc_block(
        title=os.path.basename(local_json_path).replace(".json", ""),
        authors=[creator["creatorName"] for creator in md["creators"]],
        publisher=md.get("publisher"),
        publication_year=md.get("publication_year"),
        subjects=[subject["subject"] for subject in md.get("subjects", [])])
    # Add data
    mdfcc.add_data_source(globus_uri)
    # Add JSON mapping
    mdfcc.add_index("json", mapping)
    # Set test flag
    mdfcc.set_test(test)
    # Add XPCS as organization
    mdfcc.add_organization("XPCS 8-ID")
    # Set group-by-dir flag
    mdfcc.set_conversion_config({"group_by_dir": True})
    # Add MDF Publish service
    if with_doi:
        mdfcc.add_service("mdf_publish")

    # Submit dataset
    sub_res = mdfcc.submit_dataset(update=update)
    if not sub_res["success"]:
        raise RuntimeError(sub_res["error"])
    else:
        print("Submission '{}' started".format(sub_res["source_id"]))
        return "Submission '{}' started".format(sub_res["source_id"])