Пример #1
0
def language_string(s, lang="en"):
    """
    Function to encode a literal as being in a specific language.

    Parameters
    ----------
    s : string

    lang : string
        ISO character code, default="en"

    Returns
    -------
    s : string
        triple quoted Turtle literal with language encoding

    Example
    -------
    >>> print(language_string("Canada goose"))
    \"""Canada goose\"""@en
    """
    return(
        "\"\"\"{0}\"\"\"@{1}".format(
            return_string(
                s,
                [
                    '"'
                ],
                [
                    "'"
                ]
            ),
            lang
        )
    )
Пример #2
0
def create_label(input_string):
    """
    Clean up a string and create a corresponding (shortened) label.

    Parameters
    ----------
    input_string : string
        arbitrary string

    Returns
    -------
    output_string : string
        stripped input_string
    label_string : string
        alphanumeric characters of input_string

    """
    from mhdb.spreadsheet_io import return_string
    from mhdb.spreadsheet_io import convert_string_to_label

    if input_string:
        if isinstance(input_string, str):
            output_string = return_string(input_string,
                                          replace=['"', '\n'],
                                          replace_with=['', ''])
            if output_string:
                label_string = convert_string_to_label(output_string)
                return output_string, label_string
            else:
                return '', ''
        else:
            raise Exception('input_string is not a string!')
    else:
        raise Exception('input_string is None!')
Пример #3
0
def build_rdf(uri_stem,
              rdf_type,
              label,
              comment=None,
              index=None,
              worksheet=None,
              worksheet2=None,
              equivalent_class_uri=None,
              subclassof_uri=None,
              property_domain=None,
              property_range=None,
              exclude=[],
              conceptualizations={}):  #, no_nan=True):
    """
    Build a generic RDF text document (with \" to escape for some strings).

    Parameters
    ----------
    uri_stem : string
        class URI stem
    rdf_type : string
        rdf:type, such as:
        ':Disorder',
        'owl:Class',
        'owl:ObjectProperty',
        'owl:DatatypeProperty',
        'owl:FunctionalProperty'
    label : string
        label
    comment : string
        comment
    index : integer
        index to row of worksheet
    worksheet : pandas dataframe
        spreadsheet worksheet containing properties
    worksheet2 : pandas dataframe
        second worksheet containing references
    equivalent_class_uri : string
        equivalentClass URI (override worksheet)
    subclassof_uri : string
        subClassOf URI (override worksheet)
    property_domain : string
        property domain (override worksheet)
    property_range : string
        property range (override worksheet)
    exclude : list
        exclusions
    conceptualizations : dictionary
        conceptualizaiton scheme (i.e., OWL or SKOS) for a given prefix
    #no_nan : Boolean
    #    return None if NaN?

    Returns
    -------
    rdf_string : string
        RDF triples

    """
    from mhdb.spreadsheet_io import return_string, get_cells  #, get_cell

    # Get worksheet contents:
    class_uri, subclass_uri, prop_domain, prop_range, \
    definition, definition_ref, definition_uri = get_cells(worksheet, index,
                                                           worksheet2, exclude,
                                                           True)
    #try:
    #    coding_system = get_cell(worksheet, "health-lifesci:codingSystem",
    #                            index, exclude=[], no_nan=True)
    #except:
    #    coding_system = None
    # If arguments not provided, get from worksheet:
    if comment in exclude:
        comment = definition
    if equivalent_class_uri in exclude:
        equivalent_class_uri = class_uri
    if subclassof_uri in exclude:
        subclassof_uri = subclass_uri
    if property_domain in exclude:
        property_domain = prop_domain
    if property_range in exclude:
        property_range = prop_range
    l_con = owl_or_skos(uri_stem, conceptualizations)
    if ":" in uri_stem:
        rdf_string = """
### {0}
{1} rdf:type {2} """.format(label, uri_stem, rdf_type)
    else:
        rdf_string = """
### {0}
:{1} rdf:type {2} """.format(label, uri_stem, rdf_type)

    if label not in exclude:
        rdf_string += """;
    rdfs:label \"\"\"{0}\"\"\"^^rdfs:Literal """.format(
            label if not label[-1] == "\"" else "".join([label[:-1], "\\\""]))

    if comment not in exclude:
        if definition_ref in exclude:
            refstring = ""
        else:
            refstring = " [from: {0}]".format(
                return_string(definition_ref, ['"'], ["'"]))
        rdf_string += """;
    rdfs:comment \"\"\"{0}{1}\"\"\"^^rdfs:Literal """.\
            format(return_string(comment, ['"'], ["'"]), refstring)

    if definition_uri not in exclude:
        rdf_string += """;
    rdfs:isDefinedBy "{0}"^^rdfs:Literal """.format(
            return_string(definition_uri))

    if equivalent_class_uri not in exclude:
        rel = owl_or_skos_prop(l_con, equivalent_class_uri, conceptualizations,
                               "equivalence")
        if rdf_type == 'owl:ObjectProperty':
            rdf_string += """;
        owl:equivalentProperty {0} """.format(
                return_string(equivalent_class_uri))
        else:
            rdf_string += """;
    {0} {1} """.format(rel, return_string(equivalent_class_uri))

    if subclassof_uri not in exclude:
        rel = owl_or_skos_prop(l_con, subclassof_uri, conceptualizations,
                               "subtype")
        if not subclassof_uri.startswith(':') and "//" in subclassof_uri:
            subclassof_uri = "{0}".format(return_string(subclassof_uri))
        if rdf_type == 'owl:ObjectProperty':
            rdf_string += """;
    rdfs:subPropertyOf {0} """.format(return_string(subclassof_uri))
        else:
            rdf_string += """;
    {0} {1} """.format(rel, return_string(subclassof_uri))

    if property_domain not in exclude:
        rdf_string += """;
    rdfs:domain :{0} """.format(return_string(property_domain))

    if property_range not in exclude:
        rdf_string += """;
    rdfs:range :{0} """.format(return_string(property_range))

    rdf_string += """.
"""

    return rdf_string