示例#1
0
    def __init__(
        self,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        citation_clearing: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Initialize the control statement parser.

        :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing
        :param annotation_to_pattern: A dictionary of {annotation: regular expression string}
        :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
        :param required_annotations: Annotations that are required
        """
        self.citation_clearing = citation_clearing

        self.annotation_to_term = annotation_to_term or {}
        self.annotation_to_pattern = annotation_to_pattern or {}
        self.annotation_to_local = annotation_to_local or {}

        self.statement_group = None
        self.citation_db = None
        self.citation_db_id = None
        self.evidence = None
        self.annotations = {}
        self.required_annotations = required_annotations or []

        annotation_key = ppc.identifier('key').setParseAction(
            self.handle_annotation_key)

        self.set_statement_group = set_statement_group_stub().setParseAction(
            self.handle_set_statement_group)
        self.set_citation = set_citation_stub.setParseAction(
            self.handle_set_citation)
        self.set_evidence = set_evidence_stub.setParseAction(
            self.handle_set_evidence)

        set_command_prefix = And([annotation_key('key'), Suppress('=')])
        self.set_command = set_command_prefix + qid('value')
        self.set_command.setParseAction(self.handle_set_command)

        self.set_command_list = set_command_prefix + delimited_quoted_list(
            'values')
        self.set_command_list.setParseAction(self.handle_set_command_list)

        self.unset_command = annotation_key('key')
        self.unset_command.addParseAction(self.handle_unset_command)

        self.unset_evidence = supporting_text_tags(EVIDENCE)
        self.unset_evidence.setParseAction(self.handle_unset_evidence)

        self.unset_citation = Suppress(BEL_KEYWORD_CITATION)
        self.unset_citation.setParseAction(self.handle_unset_citation)

        self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP)
        self.unset_statement_group.setParseAction(
            self.handle_unset_statement_group)

        self.unset_list = delimited_unquoted_list('values')
        self.unset_list.setParseAction(self.handle_unset_list)

        self.unset_all = unset_all.setParseAction(self.handle_unset_all)

        self.set_statements = set_tag + MatchFirst([
            self.set_statement_group,
            self.set_citation,
            self.set_evidence,
            self.set_command,
            self.set_command_list,
        ])

        self.unset_statements = unset_tag + MatchFirst([
            self.unset_all,
            self.unset_citation,
            self.unset_evidence,
            self.unset_statement_group,
            self.unset_command,
            self.unset_list,
        ])

        self.language = self.set_statements | self.unset_statements

        super(ControlParser, self).__init__(self.language)
示例#2
0
    def __init__(self,
                 manager,
                 namespace_dict=None,
                 annotation_dict=None,
                 namespace_regex=None,
                 annotation_regex=None,
                 default_namespace=None,
                 allow_redefinition=False):
        """
        :param pybel.manager.Manager manager: A cache manager
        :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from
                                {namespace keyword: {name: encoding}}
        :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from
                                {annotation keyword: set of valid values}
        :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from
                                {namespace keyword: regex string}
        :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from
                                {annotation keyword: regex string}
        :param set[str] default_namespace: A set of strings that can be used without a namespace
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager

        self.disallow_redefinition = not allow_redefinition

        #: A dictionary of cached {namespace keyword: {name: encoding}}
        self.namespace_dict = {} if namespace_dict is None else namespace_dict
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_dict = {} if annotation_dict is None else annotation_dict
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_regex = {} if namespace_regex is None else namespace_regex
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_regex = {} if annotation_regex is None else annotation_regex

        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {namespace keyword: OWL namespace URL}
        self.namespace_owl_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}
        #: A dictionary from {annotation keyword: OWL annotation URL}
        self.annotation_owl_dict = {}
        #: A set of annotation keywords that are defined ad-hoc in the BEL script
        self.annotation_lists = set()

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value')
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_owl = And([
            namespace_tag, owl_tag,
            Optional(function_tags('functions')),
            quote('url')
        ])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_owl = And([annotation_tag, owl_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_owl.setParseAction(self.handle_namespace_owl)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_owl.setParseAction(self.handle_annotation_owl)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document, self.namespace_url, self.namespace_owl,
            self.annotation_url, self.annotation_list, self.annotation_owl,
            self.annotation_pattern, self.namespace_pattern
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)
示例#3
0
# -*- coding: utf-8 -*-

"""Parse the TSV template format with PyParsing then make a new parser."""

from pyparsing import Group, MatchFirst, Optional, Suppress, delimitedList, nestedExpr, pyparsing_common as ppc

from table_validator import parse_tsv

keyword = ppc.identifier + Suppress('=') + ppc.identifier
te_keywords = nestedExpr(content=delimitedList(keyword))
te_content = ppc.identifier + Optional(te_keywords)
template_command = nestedExpr(opener='{', closer='}', content=te_content)

cell = MatchFirst([
    Group(template_command)('command'),
    Group(template_command)('command') + ppc.identifier('text'),
    ppc.identifier('text'),
])

if __name__ == '__main__':
    with open('../../tests/repeat_template.tsv') as file:
        t = [
            [
                cell.parseString(col)
                for col in row
            ]
            for row in parse_tsv(file)
        ]

    for i, row in enumerate(t):
        for j, col in enumerate(row):
示例#4
0
    def __init__(
        self,
        manager,
        namespace_to_term_to_encoding: Optional[
            NamespaceTermEncodingMapping] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        default_namespace: Optional[Set[str]] = None,
        allow_redefinition: bool = False,
        skip_validation: bool = False,
        upgrade_urls: bool = False,
    ) -> None:
        """Build a metadata parser.

        :param manager: A cache manager
        :param namespace_to_term_to_encoding:
          An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}}
        :param namespace_to_pattern:
          A regular expression namespace mapping from {namespace keyword: regex string}
        :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values}
        :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string}
        :param default_namespace: A set of strings that can be used without a namespace
        :param skip_validation: If true, don't download and cache namespaces/annotations
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager
        self.disallow_redefinition = not allow_redefinition
        self.skip_validation = skip_validation
        self.upgrade_urls = upgrade_urls

        #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}}
        self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {}
        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_to_pattern = namespace_to_pattern or {}
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None

        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_term = annotation_to_term or {}
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_to_pattern = annotation_to_pattern or {}
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_local = annotation_to_local or {}

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value'),
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document,
            self.namespace_url,
            self.annotation_url,
            self.annotation_list,
            self.annotation_pattern,
            self.namespace_pattern,
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)
示例#5
0
    def __init__(self,
                 annotation_dict=None,
                 annotation_regex=None,
                 citation_clearing=True,
                 required_annotations=None):
        """
        :param annotation_dict: A dictionary of {annotation: set of valid values} for parsing
        :type annotation_dict: Optional[dict[str,set[str]]]
        :param annotation_regex: A dictionary of {annotation: regular expression string}
        :type annotation_regex: Optional[dict[str,str]]
        :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
        :param Optional[list[str]] required_annotations: Annotations that are required
        """
        self.citation_clearing = citation_clearing

        self._annotation_dict = {} if annotation_dict is None else annotation_dict
        self._annotation_regex = {} if annotation_regex is None else annotation_regex
        self._annotation_regex_compiled = {
            keyword: re.compile(value)
            for keyword, value in self.annotation_regex.items()
        }

        self.statement_group = None
        self.citation = {}
        self.evidence = None
        self.annotations = {}
        self.required_annotations = required_annotations or []

        annotation_key = ppc.identifier('key').setParseAction(
            self.handle_annotation_key)

        self.set_statement_group = set_statement_group_stub().setParseAction(
            self.handle_set_statement_group)
        self.set_citation = set_citation_stub().setParseAction(
            self.handle_set_citation)
        self.set_evidence = set_evidence_stub().setParseAction(
            self.handle_set_evidence)

        set_command_prefix = And([annotation_key('key'), Suppress('=')])
        self.set_command = set_command_prefix + qid('value')
        self.set_command.setParseAction(self.handle_set_command)

        self.set_command_list = set_command_prefix + delimited_quoted_list(
            'values')
        self.set_command_list.setParseAction(self.handle_set_command_list)

        self.unset_command = annotation_key('key')
        self.unset_command.addParseAction(self.handle_unset_command)

        self.unset_evidence = supporting_text_tags(EVIDENCE)
        self.unset_evidence.setParseAction(self.handle_unset_evidence)

        self.unset_citation = Suppress(BEL_KEYWORD_CITATION)
        self.unset_citation.setParseAction(self.handle_unset_citation)

        self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP)
        self.unset_statement_group.setParseAction(
            self.handle_unset_statement_group)

        self.unset_list = delimited_unquoted_list('values')
        self.unset_list.setParseAction(self.handle_unset_list)

        self.unset_all = unset_all.setParseAction(self.handle_unset_all)

        self.set_statements = set_tag + MatchFirst([
            self.set_statement_group,
            self.set_citation,
            self.set_evidence,
            self.set_command,
            self.set_command_list,
        ])

        self.unset_statements = unset_tag + MatchFirst([
            self.unset_all, self.unset_citation, self.unset_evidence,
            self.unset_statement_group, self.unset_command, self.unset_list
        ])

        self.language = self.set_statements | self.unset_statements

        super(ControlParser, self).__init__(self.language)