Exemplo n.º 1
0
def get_subject_dats_material(cache, p_subject, gh_subject, var_lookup):
    subj_id = p_subject['SUBJID']['mapped_value']

    # retrieve id reference for the Identifier of the DATS Dimension for the "all subjects" consent group version of the variable
    def get_var_id(name):
        return var_lookup[name]['dim'].get("identifier").getIdRef()

    # human experimental subject/patient
    subject_sex = DatsObj(
        "Dimension", [("name", util.get_value_annotation("Gender", cache)),
                      ("description", "Gender of the subject"),
                      ("identifier", get_var_id("SEX")),
                      ("values", [p_subject['SEX']['mapped_value']])])

    subject_age = DatsObj(
        "Dimension", [("name", util.get_value_annotation("Age range", cache)),
                      ("description", "Age range of the subject"),
                      ("identifier", get_var_id("AGE")),
                      ("values", [p_subject['AGE']['mapped_value']])])

    subject_hardy_scale = DatsObj(
        "Dimension",
        [("name", util.get_value_annotation("Hardy scale", cache)),
         ("description", "Hardy scale death classification for the subject"),
         ("identifier", get_var_id("DTHHRDY")),
         ("values", [p_subject['DTHHRDY']['mapped_value']])])

    subject_characteristics = [subject_sex, subject_age, subject_hardy_scale]

    # use URI from GTEx id dump if present
    identifier = subj_id
    if gh_subject is not None:
        identifier = gh_subject['Destination URL']['raw_value']

    # human experimental subject/patient
    subject_material = DatsObj(
        "Material",
        [("name", subj_id),
         ("identifier", DatsObj("Identifier", [("identifier", identifier)])),
         ("description", "GTEx subject " + subj_id),
         ("characteristics", subject_characteristics),
         ("taxonomy", [util.get_taxon_human(cache)]),
         ("roles", util.get_donor_roles(cache))])

    # add to the cache
    subj_key = ":".join(["Material", subj_id])
    dats_subj = cache.get_obj_or_ref(subj_key, lambda: subject_material)

    return dats_subj
Exemplo n.º 2
0
def get_single_sample_json(sample, dats_obj_cache):
    #    print("converting sample to json: " + str(sample))
    samp_id = sample['SAMPID']['mapped_value']
    subj_id = sample['SUBJID']['mapped_value']
    subject = sample['subject']

    # Uberon id (or EFO id, contrary to the documentation)
    anat_id = sample['SMUBRID']['mapped_value']
    if anat_id is None:
        print("No Uberon/anatomy ID specified for sample " + samp_id)
        sys.exit(1)

    anatomy_identifier = None
    anatomy_alt_ids = None
    # TODO - query anatomy term from UBERON/EFO instead?
    anatomy_name = sample['SMTSD']['mapped_value']

    # EFO id
    if re.match(r'^EFO_\d+', anat_id):
        anatomy_identifier = OrderedDict([("identifier", anat_id),
                                          ("identifierSource", "EFO")])
        anatomy_alt_ids = [
            OrderedDict([
                ("identifier",
                 "https://www.ebi.ac.uk/ols/ontologies/efo/terms?short_form=" +
                 str(anat_id)), ("identifierSource", "EFO")
            ])
        ]
    # Uberon id
    else:
        anatomy_identifier = OrderedDict([("identifier",
                                           "UBERON:" + str(anat_id)),
                                          ("identifierSource", "UBERON")])
        anatomy_alt_ids = [
            OrderedDict([
                ("identifier",
                 "http://purl.obolibrary.org/obo/UBERON_" + str(anat_id)),
                ("identifierSource", "UBERON")
            ])
        ]

    # anatomical part
    anat_part_key = ":".join(["AnatomicalPart", anatomy_name])
    if anat_part_key in dats_obj_cache:
        anatomical_part = dats_obj_cache[anat_part_key]
    else:
        anatomical_part = DatsObj("AnatomicalPart",
                                  [("name", anatomy_name),
                                   ("identifier", anatomy_identifier),
                                   ("alternateIdentifiers", anatomy_alt_ids)])
        dats_obj_cache[anat_part_key] = anatomical_part

    # human experimental subject/patient
    subject_sex = DatsObj(
        "Dimension", [("name", DatsObj("Annotation", [("value", "Gender")])),
                      ("description", "Gender of the subject"),
                      ("identifier",
                       DatsObj("Identifier", [("identifier", "SEX"),
                                              ("identifierSource", "GTEx")])),
                      ("values", [subject['SEX']['mapped_value']])])

    subject_age = DatsObj(
        "Dimension",
        [("name", DatsObj("Annotation", [("value", "Age range")])),
         ("description", "Age range of the subject"),
         ("identifier",
          DatsObj("Identifier", [("identifier", "AGE"),
                                 ("identifierSource", "GTEx")])),
         ("values", [subject['AGE']['mapped_value']])])

    subject_hardy_scale = DatsObj(
        "Dimension",
        [("name", DatsObj("Annotation", [("value", "Hardy scale")])),
         ("description", "Hardy scale death classification for the subject"),
         ("identifier",
          DatsObj("Identifier", [("identifier", "DTHHRDY"),
                                 ("identifierSource", "GTEx")])),
         ("values", [subject['DTHHRDY']['mapped_value']])])

    subject_characteristics = [subject_sex, subject_age, subject_hardy_scale]

    # human experimental subject/patient
    subj_key = ":".join(["Material", subj_id])
    if subj_key in dats_obj_cache:
        subject_material = dats_obj_cache[subj_key]
    else:
        subject_material = DatsObj(
            "Material", [("name", subj_id),
                         ("identifier", {
                             "identifier": subj_id
                         }), ("description", "GTEx subject " + subj_id),
                         ("characteristics", subject_characteristics),
                         ("taxonomy", [util.get_taxon_human(dats_obj_cache)]),
                         ("roles", util.get_donor_roles(dats_obj_cache))])
        dats_obj_cache[subj_key] = subject_material

    specimen_annot = util.get_annotation("specimen", dats_obj_cache)
    rna_extract_annot = util.get_annotation("RNA extract", dats_obj_cache)

    # biological/tissue sample
    sample_name = samp_id
    biological_sample_material = DatsObj(
        "Material",
        [("name", sample_name), ("identifier", {
            "identifier": samp_id
        }),
         ("description",
          anatomy_name + " specimen collected from subject " + subj_id),
         ("taxonomy", [util.get_taxon_human(dats_obj_cache)]),
         ("roles", [specimen_annot]),
         ("derivesFrom", [subject_material, anatomical_part])])

    # RNA extracted from tissue sample
    rna_material = DatsObj(
        "Material",
        [("name", "RNA from " + sample_name),
         ("description", "total RNA extracted from " + anatomy_name +
          " specimen collected from subject " + subj_id),
         ("taxonomy", [util.get_taxon_human(dats_obj_cache)]),
         ("roles", [rna_extract_annot]),
         ("derivesFrom", [biological_sample_material])])

    return rna_material
Exemplo n.º 3
0
def get_subject_dats_material(cache, study, study_md, subj_var_values):

    # extract subject attributes
    gender = None
    age = None
    visit_year = None
    sys_bp = None
    dias_bp = None
    disease = {}
    disease['hypertension'] = "unknown"
    
    for name in subj_var_values:
        name_upper = name.upper()
        if name_upper == "GENDER" or name_upper == "SEX":
            gender = subj_var_values[name]['value'].lower()
        elif name_upper == "VISIT_AGE" or name_upper == "AGE" or name_upper == "AGE_ENROLL": #need to confirm that these  allmean the same thing
            age = subj_var_values[name]['value']
        elif name_upper == "VISIT_YEAR":
            visit_year =  subj_var_values[name]['value']
        elif name_upper == "SYSBP":
            sys_bp = subj_var_values[name]['value']
        elif name_upper == "DIASBP":
            dias_bp = subj_var_values[name]['value']
        elif name_upper == "HYPERTENSION" or name_upper == "HIGHBLOODPRES":
            if subj_var_values[name]['value'].lower() == "yes" or subj_var_values[name]['value'] == '1':
                disease['hypertension'] = "yes"
            elif re.match(r'\S', subj_var_values[name]['value']):
                disease['hypertension'] = "no"

    subject_characteristics = []
    subject_bearerOfDisease = []

    # harmonized/standardized characteristics
    if gender is not None:
        subject_sex = DatsObj("Dimension", [
                ("name", DatsObj("Annotation", [("value", "Gender")])),
                ("description", "Gender of the subject"),
                ("values", [ gender ])
                ])
        subject_characteristics.append(subject_sex)

    if age is not None:
        subject_age = DatsObj("Dimension", [
                ("name", DatsObj("Annotation", [("value", "Age")])),
                ("description", "Age of the subject"),
                ("values", [ age ])
                ])
        subject_characteristics.append(subject_age)
    
    if visit_year is not None:
        subject_visitYear = DatsObj("Dimension", [
                ("name", DatsObj("Annotation", [("value", "Visit year")])),
                ("description", "Year of visit, to use for longitudinal analysis"),
                ("values", [ visit_year ])
                ])
        subject_characteristics.append(subject_visitYear)
    
    if sys_bp is not None:
        subject_sysBP = DatsObj("Dimension", [
                ("name", DatsObj("Annotation", [("value", "Systolic blood pressure")])),
                ("description", "Systolic blood pressure of subject, measured in mmHg"),
                ("values", [ sys_bp ])
                ])
        subject_characteristics.append(subject_sysBP)
        
    if dias_bp is not None:
        subject_diasBP = DatsObj("Dimension", [
                ("name", DatsObj("Annotation", [("value", "Diastolic blood pressure")])),
                ("description", "Diastolic blood pressure of subject, measured in mmHg"),
                ("values", [ dias_bp ])
                ])
        subject_characteristics.append(subject_diasBP)                                      
    
    if disease['hypertension'] != "unknown":
        disease_name = "hypertension"
        disease_id = "10763"
        disease_identifier = OrderedDict([
            ("identifier",  "DOID:" + str(disease_id)),
            ("identifierSource", "Disease Ontology")])
        disease_alt_ids = [OrderedDict([
            ("identifier", "http://purl.obolibrary.org/obo/DOID_" + str(disease_id)),
            ("identifierSource", "Disease Ontology")])]
        subject_hypertension = DatsObj("Disease", [
            ("name", "Hypertension"),
            ("identifier", disease_identifier),
            ("alternateIdentifiers", disease_alt_ids),
            ("diseaseStatus", DatsObj("Annotation", [("value", disease['hypertension'] ), ("valueIRI", "")])), 
            ])
        subject_bearerOfDisease.append(subject_hypertension)

    # create a DATS Dimension from a dbGaP variable value
    def make_var_dimension(name, var_value):
        value = var_value["value"]

        dim = DatsObj("Dimension", 
                      [("name", DatsObj("Annotation", [( "value",  name )])), 
                       ("values", [ value ])
                       ])

        # find existing DATS identifier for the corresponding Dataset Dimension 
        if "var" in var_value:
            dbgap_var_dim = var_value["var"]["dim"]
            dim.setProperty("identifier", dbgap_var_dim.get("identifier").getIdRef())
        return dim

    # create DATS Dimensions for dbGaP subject metadata
    subject_dimensions = [ make_var_dimension(vname, subj_var_values[vname]) for vname in sorted(subj_var_values) ]

    # "raw" characteristics from dbGaP metadata
    subject_characteristics.extend(subject_dimensions)
    
    human_t = util.get_taxon_human(cache)
    subj_id = subj_var_values['SUBJECT_ID']['value']
    dbgap_subj_id = subj_var_values['dbGaP_Subject_ID']['value']

    study_title = study.get("title")

    # human experimental subject/patient
    subject_material = DatsObj("Material", [
            ("name", subj_id),
            ("identifier", { "identifier": subj_id }),
            ("alternateIdentifiers", [ util.get_alt_id(dbgap_subj_id, "dbGaP") ]),
            ("description", study_title + " subject " + subj_id),
            ("characteristics", subject_characteristics),
            ("bearerOfDisease", subject_bearerOfDisease),
            ("taxonomy", [ human_t ]),
            ("roles", util.get_donor_roles(cache))
            ])

    # add to the cache
    subj_key = ":".join(["Material", subj_id])
    dats_subj = cache.get_obj_or_ref(subj_key, lambda: subject_material)

    return dats_subj
Exemplo n.º 4
0
def get_single_dna_extract_json(cache, study, study_md, subj_var_values,
                                samp_var_values):
    # Almost all samples in TOPMed WGS phase are blood samples, named "Blood", "Peripheral Blood"...
    # Few samples are saliva samples probably due to sample collection issues
    name = None
    if 'BODY_SITE' in samp_var_values:
        name = 'BODY_SITE'
    elif 'Body_Site' in samp_var_values:
        name = 'Body_Site'
    elif 'Body Site' in samp_var_values:
        name = 'Body Site'

    anat_id = None
    anatomy_name = None

    if name is not None:
        if "blood" in samp_var_values[name]['value'].lower():
            anatomy_name = "blood"
            anat_id = "0000178"
        elif samp_var_values[name]['value'].lower() == "saliva":
            anatomy_name = "saliva"
            anat_id = "0001836"
        else:
            logging.fatal(
                "encountered BODY_SITE other than 'Blood' and 'Saliva' in TOPMed sample metadata - "
                + samp_var_values['BODY_SITE']['value'])
            sys.exit(1)

    if anat_id is not None:
        anatomy_identifier = OrderedDict([("identifier",
                                           "UBERON:" + str(anat_id)),
                                          ("identifierSource", "UBERON")])
        anatomy_alt_ids = [
            OrderedDict([
                ("identifier",
                 "http://purl.obolibrary.org/obo/UBERON_" + str(anat_id)),
                ("identifierSource", "UBERON")
            ])
        ]

    # extract subject attributes
    gender = None
    age = None
    visit_year = None
    sys_bp = None
    dias_bp = None
    disease = {}
    disease['hypertension'] = "unknown"

    for name in subj_var_values:
        name_upper = name.upper()
        if name_upper == "GENDER" or name_upper == "SEX":
            gender = subj_var_values[name]['value'].lower()
        elif name_upper == "VISIT_AGE" or name_upper == "AGE" or name_upper == "AGE_ENROLL":  #need to confirm that these  allmean the same thing
            age = subj_var_values[name]['value']
        elif name_upper == "VISIT_YEAR":
            visit_year = subj_var_values[name]['value']
        elif name_upper == "SYSBP":
            sys_bp = subj_var_values[name]['value']
        elif name_upper == "DIASBP":
            dias_bp = subj_var_values[name]['value']
        elif name_upper == "HYPERTENSION" or name_upper == "HIGHBLOODPRES":
            if subj_var_values[name]['value'].lower(
            ) == "yes" or subj_var_values[name]['value'] == '1':
                disease['hypertension'] = "yes"
            elif re.match(r'\S', subj_var_values[name]['value']):
                disease['hypertension'] = "no"

    # anatomical part
    anatomical_part = None
    if anatomy_name is not None:
        anatomical_part = DatsObj("AnatomicalPart",
                                  [("name", anatomy_name),
                                   ("identifier", anatomy_identifier),
                                   ("alternateIdentifiers", anatomy_alt_ids)])

    subject_characteristics = []
    subject_bearerOfDisease = []

    # harmonized/standardized characteristics
    if gender is not None:
        subject_sex = DatsObj(
            "Dimension",
            [("name", DatsObj("Annotation", [("value", "Gender")])),
             ("description", "Gender of the subject"), ("values", [gender])])
        subject_characteristics.append(subject_sex)

    if age is not None:
        subject_age = DatsObj(
            "Dimension", [("name", DatsObj("Annotation", [("value", "Age")])),
                          ("description", "Age of the subject"),
                          ("values", [age])])
        subject_characteristics.append(subject_age)

    if visit_year is not None:
        subject_visitYear = DatsObj("Dimension", [
            ("name", DatsObj("Annotation", [("value", "Visit year")])),
            ("description", "Year of visit, to use for longitudinal analysis"),
            ("values", [visit_year])
        ])
        subject_characteristics.append(subject_visitYear)

    if sys_bp is not None:
        subject_sysBP = DatsObj(
            "Dimension",
            [("name",
              DatsObj("Annotation", [("value", "Systolic blood pressure")])),
             ("description",
              "Systolic blood pressure of subject, measured in mmHg"),
             ("values", [sys_bp])])
        subject_characteristics.append(subject_sysBP)

    if dias_bp is not None:
        subject_diasBP = DatsObj(
            "Dimension",
            [("name",
              DatsObj("Annotation", [("value", "Diastolic blood pressure")])),
             ("description",
              "Diastolic blood pressure of subject, measured in mmHg"),
             ("values", [dias_bp])])
        subject_characteristics.append(subject_diasBP)

    if disease['hypertension'] != "unknown":
        disease_name = "hypertension"
        disease_id = "10763"
        disease_identifier = OrderedDict([
            ("identifier", "DOID:" + str(disease_id)),
            ("identifierSource", "Disease Ontology")
        ])
        disease_alt_ids = [
            OrderedDict([
                ("identifier",
                 "http://purl.obolibrary.org/obo/DOID_" + str(disease_id)),
                ("identifierSource", "Disease Ontology")
            ])
        ]
        subject_hypertension = DatsObj("Disease", [
            ("name", "Hypertension"),
            ("identifier", disease_identifier),
            ("alternateIdentifiers", disease_alt_ids),
            ("diseaseStatus",
             DatsObj("Annotation", [("value", disease['hypertension']),
                                    ("valueIRI", "")])),
        ])
        subject_bearerOfDisease.append(subject_hypertension)

    # create a DATS Dimension from a dbGaP variable value
    def make_var_dimension(name, var_value):
        value = var_value["value"]

        dim = DatsObj("Dimension",
                      [("name", DatsObj("Annotation", [("value", name)])),
                       ("values", [value])])

        # find existing DATS identifier for the corresponding Dataset Dimension
        if "var" in var_value:
            id = var_value["var"]["id"]
            dbgap_var_dim = study_md['dbgap_vars'][id]
            dim.setProperty("identifier",
                            dbgap_var_dim.get("identifier").getIdRef())

        return dim

    # create DATS Dimensions for dbGaP subject metadata
    subject_dimensions = [
        make_var_dimension(vname, subj_var_values[vname])
        for vname in sorted(subj_var_values)
    ]

    # create DATS Dimensions for dbGaP sample metadata
    sample_dimensions = [
        make_var_dimension(vname, samp_var_values[vname])
        for vname in sorted(samp_var_values)
    ]

    # "raw" characteristics from dbGaP metadata
    subject_characteristics.extend(subject_dimensions)
    sample_characteristics = sample_dimensions

    human_t = util.get_taxon_human(cache)
    subj_id = subj_var_values['SUBJECT_ID']['value']
    dbgap_subj_id = subj_var_values['dbGaP_Subject_ID']['value']
    samp_id = samp_var_values['SAMPLE_ID']['value']
    dbgap_samp_id = samp_var_values['dbGaP_Sample_ID']['value']

    study_title = study.get("title")

    # human experimental subject/patient
    subject_material = DatsObj(
        "Material",
        [("name", subj_id), ("identifier", {
            "identifier": subj_id
        }),
         ("alternateIdentifiers", [util.get_alt_id(dbgap_subj_id, "dbGaP")]),
         ("description", study_title + " subject " + subj_id),
         ("characteristics", subject_characteristics),
         ("bearerOfDisease", subject_bearerOfDisease), ("taxonomy", [human_t]),
         ("roles", util.get_donor_roles(cache))])

    # TODO - use DatsObjCache
    specimen_annot = util.get_annotation("specimen")
    dna_extract_annot = util.get_annotation("DNA extract")

    # biological/tissue sample
    sample_name = samp_id
    sample_derives_from = [subject_material]
    sample_descr = "specimen collected from subject " + subj_id
    if anatomical_part is not None:
        sample_derives_from.append(anatomical_part)
        sample_descr = anatomy_name + " " + sample_descr

    biological_sample_material = DatsObj(
        "Material",
        [("name", sample_name), ("identifier", {
            "identifier": samp_id
        }),
         ("alternateIdentifiers", [util.get_alt_id(dbgap_samp_id, "dbGaP")]),
         ("description", sample_descr),
         ("characteristics", sample_characteristics), ("taxonomy", [human_t]),
         ("roles", [specimen_annot]), ("derivesFrom", sample_derives_from)])

    # DNA extracted from tissue sample
    dna_descr = "DNA extracted from specimen collected from subject " + subj_id
    if anatomical_part is not None:
        dna_descr = "DNA extracted from " + anatomy_name + " specimen collected from subject " + subj_id

    dna_material = DatsObj("Material",
                           [("name", "DNA from " + sample_name),
                            ("description", dna_descr),
                            ("taxonomy", [human_t]),
                            ("roles", [dna_extract_annot]),
                            ("derivesFrom", [biological_sample_material])])

    return dna_material
Exemplo n.º 5
0
def get_single_dna_extract_json(study, subj_var_values, samp_var_values):

    # all samples in TOPMed WGS phase are blood samples
    if samp_var_values['BODY_SITE'] != 'Blood':
        logging.fatal(
            "encountered BODY_SITE other than 'Blood' in TOPMed sample metadata - "
            + samp_var_values['BODY_SITE'])
        sys.exit(1)

    anatomy_name = "blood"
    anat_id = "0000178"

    anatomy_identifier = OrderedDict([("identifier", "UBERON:" + str(anat_id)),
                                      ("identifierSource", "UBERON")])
    anatomy_alt_ids = [
        OrderedDict([("identifier",
                      "http://purl.obolibrary.org/obo/UBERON_" + str(anat_id)),
                     ("identifierSource", "UBERON")])
    ]

    # extract subject attributes
    gender = None
    age = None
    for name in subj_var_values:
        if name == "GENDER":
            gender = subj_var_values[name].lower()
        elif name == "VISIT_AGE":
            age = subj_var_values[name]
    # TODO - determine what other subject attributes can be mapped directly to core DATS objects

    # place original dbGaP subject metadata into extraProperties
    # TODO - consider alternative of doing this only for un-harmonized metadata
    subj_extra_props = [
        DatsObj("CategoryValuesPair", [("category", xp),
                                       ("values", [subj_var_values[xp]])])
        for xp in sorted(subj_var_values)
    ]

    # extract sample attributes
    for name in samp_var_values:
        if name == 'SEQUENCING_CENTER':
            # TODO - determine which DATS objects (e.g., biological sample, DNA prep, sequence data) this property should attach to
            pass

    # TODO - determine what other subject attributes can be mapped directly to core DATS objects
    # e.g., IS_TUMOR -> bearerOfDisease ("the pathology affecting the material...")

    # place original dbGaP sample metadata into extraProperties
    samp_extra_props = [
        DatsObj("CategoryValuesPair", [("category", xp),
                                       ("values", [samp_var_values[xp]])])
        for xp in sorted(samp_var_values)
    ]

    # anatomical part
    anatomical_part = DatsObj("AnatomicalPart",
                              [("name", anatomy_name),
                               ("identifier", anatomy_identifier),
                               ("alternateIdentifiers", anatomy_alt_ids)])

    subject_sex = DatsObj("Dimension", [("name", {
        "value": "Gender"
    }), ("description", "Gender of the subject"), ("values", [gender])])

    subject_age = DatsObj("Dimension", [("name", {
        "value": "Age"
    }), ("description", "Age of the subject"), ("values", [age])])

    subject_characteristics = [subject_sex, subject_age]

    human_t = util.get_taxon_human()
    subj_id = subj_var_values['SUBJECT_ID']
    dbgap_subj_id = subj_var_values['dbGaP_Subject_ID']
    samp_id = samp_var_values['SAMPLE_ID']
    dbgap_samp_id = samp_var_values['dbGaP_Sample_ID']

    study_title = study.get("title")

    # human experimental subject/patient
    subject_material = DatsObj(
        "Material",
        [("name", subj_id), ("identifier", {
            "identifier": subj_id
        }),
         ("alternateIdentifiers", [util.get_alt_id(dbgap_subj_id, "dbGaP")]),
         ("description", study_title + " subject " + subj_id),
         ("characteristics", subject_characteristics), ("taxonomy", human_t),
         ("roles", util.get_donor_roles()),
         ("extraProperties", subj_extra_props)])

    # biological/tissue sample
    sample_name = samp_id
    biological_sample_material = DatsObj(
        "Material",
        [("name", sample_name), ("identifier", {
            "identifier": samp_id
        }),
         ("alternateIdentifiers", [util.get_alt_id(dbgap_samp_id, "dbGaP")]),
         ("description",
          anatomy_name + " specimen collected from subject " + subj_id),
         ("taxonomy", human_t),
         ("roles", [OrderedDict([("value", "specimen"), ("valueIRI", "")])]),
         ("derivesFrom", [subject_material, anatomical_part]),
         ("extraProperties", samp_extra_props)])

    # DNA extracted from tissue sample
    dna_material = DatsObj(
        "Material",
        [("name", "DNA from " + sample_name),
         ("description", "DNA extracted from " +
          anatomy_name + " specimen collected from subject " + subj_id),
         ("taxonomy", human_t),
         ("roles", [OrderedDict([("value", "DNA extract"),
                                 ("valueIRI", "")])]),
         ("derivesFrom", [biological_sample_material])])

    return dna_material