Python Base примеры, vertnet.parsers.base.Base Python примеры использования

Пример #1

0

Показать файл

Файл: forearm_length.py Проект: rafelafrance/traiter_vertnet

FOREARM_LENGTH = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Units are in the key, like: ForearmLengthInMillimeters
        VOCAB.term(
            "key_with_units",
            r"""( forearm \s* )? \s* ( length | len ) \s* in \s*
                    (?P<units> millimeters | mm )
            """,
        ),
        # Standard keywords that indicate a forearm length follows
        VOCAB.term(
            "key",
            r"""
                forearm ( \s* ( length | len | l ) )?
                | fore? \s? [.]? \s? a
                | fa
            """,
        ),
        # Some patterns require a separator
        VOCAB.part("sep", r" [;,] | $ ", capture=False),
        VOCAB.grouper("noise", " word dash ".split()),
        # Handle fractional values like: forearm 9/16"
        VOCAB.producer(
            fraction,
            [
                "key len_fraction units",  # E.g.: forearm = 9/16 inches
                "key len_fraction",  # E.g.: forearm = 9/16
            ],
        ),
        # A typical hind-foot notation
        VOCAB.producer(
            simple,
            [
                "key_with_units len_range",  # E.g.: forearmLengthInMM=9-10
                "key noise? len_range units ",  # E.g.: forearmLength=9-10 mm
                "key noise? len_range",  # Missing units like: forearm 9-10
                "key dash number units?",
                "number key units?",
            ],
        ),
        VOCAB.producer(
            partial(shorthand_length, measurement="shorthand_fa"),
            [
                "shorthand",
                "shorthand_bats",
            ],
        ),
    ],
)

Пример #2

0

Показать файл

NIPPLE_COUNT = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        VOCAB.term("id", r" \d+-\d+ "),
        VOCAB.term("adj", r""" inguinal ing pectoral pec pr """.split()),
        VOCAB.part("number", r" number | no | [#] "),
        VOCAB.part("eq", r" is | eq | equals? | [=] "),
        # Skip arbitrary words
        VOCAB["word"],
        VOCAB["sep"],
        VOCAB.grouper("count", " (?: integer | none )(?! side ) "),
        VOCAB.grouper("modifier", "adj visible".split()),
        VOCAB.grouper("skip", " number eq? integer "),
        VOCAB.producer(
            typed,
            """ (?P<notation>
                    (?P<value1> count) modifier
                    (?P<value2> count) modifier
                ) nipple
            """,
        ),
        # Eg: 1:2 = 6 mammae
        VOCAB.producer(
            convert,
            """ nipple op?
                (?P<notation> count modifier?
                    op? count modifier?
                    (eq (?P<value> count))? )
            """,
        ),
        # Eg: 1:2 = 6 mammae
        VOCAB.producer(
            convert,
            """ (?P<notation> count modifier? op? count modifier?
                (eq (?P<value> count))? ) nipple """,
        ),
        # Eg: 6 mammae
        VOCAB.producer(convert, """ (?P<value> count ) modifier? nipple """),
        # Eg: nipples 5
        VOCAB.producer(convert, """ nipple (?P<value> count ) """),
    ],
)

Пример #3

0

Показать файл

Файл: nipples_enlarged.py Проект: rafelafrance/traiter_vertnet

def convert(token):
    """Convert parsed token into a trait."""
    trait = Trait(
        value="enlarged" if token.group.get("pos") else "not enlarged",
        start=token.start,
        end=token.end,
    )
    return trait


NIPPLES_ENLARGED = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["conj"],
        VOCAB.part("separator", r' [;"?/,] '),
        VOCAB.term("enlarged_abbrev", r"[oc]e[ln]"),
        VOCAB.term("not_enlarged_abbrev", r"[oc]s[ln]"),
        VOCAB.term("false", """ false """),

        VOCAB.producer(convert, """ (?P<pos> nipple enlarged ) """),
        VOCAB.producer(convert, """ (?P<pos> enlarged nipple ) """),
        VOCAB.producer(convert, """ (?P<pos> enlarged_abbrev ) """),

        VOCAB.producer(convert, """ (?P<neg> none nipple ) """),
        VOCAB.producer(convert, """ (?P<neg> nipple none ) """),
        VOCAB.producer(convert, """ (?P<neg> nipple not_enlarged ) """),
        VOCAB.producer(convert, """ (?P<neg> not_enlarged false? nipple ) """),
        VOCAB.producer(convert, """ (?P<neg> not_enlarged_abbrev ) """),
    ],
)

Пример #4

0

Показать файл

Файл: life_stage.py Проект: rafelafrance/traiter_vertnet

LIFE_STAGE = Base(
    name=__name__.split(".")[-1],
    rules=[
        # JSON keys for life stage
        VOCAB.term(
            "json_key",
            [
                r" life \s* stage \s* (remarks?)? ",
                r" age \s* class ",
                r" age \s* in \s* (?P<time_units> {}) ".format(TIME_OPTIONS),
                r" age ",
            ],
        ),
        # These words are life stages without a keyword indicator
        VOCAB.term(
            "intrinsic",
            [
                r" yolk \s? sac ",
                r" young [\s-]? of [\s-]? the [\s-]? year ",
                r" adult \s* young ",
                r" young \s* adult ",
            ] + """
                ads? adulte?s?
                chicks?
                fledgelings? fleglings? fry
                hatched hatchlings?
                imagos? imms? immatures?
                jeunes? juvs? juveniles? juvéniles?
                larvae? larvals? larves? leptocephales? leptocephalus
                matures? metamorphs?
                neonates? nestlings? nulliparous
                premetamorphs?
                sub-adults? subads? subadulte?s?
                tadpoles? têtard
                yearlings? yg ygs young
            """.split(),
        ),
        # This indicates that the following words are NOT a life stage
        VOCAB.term("skip", r" determin \w* "),
        # Compound words separated by dashes or slashes
        # E.g. adult/juvenile or over-winter
        VOCAB.part("joiner", r" \s* [/-] \s* "),
        # Use this to find the end of a life stage pattern
        VOCAB.part("separator", r' [;,"?] | $ '),
        # For life stages with numbers as words in them
        VOCAB["ordinals"],
        VOCAB["time_units"],
        VOCAB.part("after", "after"),
        VOCAB.part("hatching", "hatching"),
        # Match any word
        VOCAB.part("word", r" \b \w [\w?.-]* (?! [./-] ) "),
        VOCAB.grouper("as_time", " after? (ordinals | hatching) time_units"),
        # E.g.: life stage juvenile/yearling
        VOCAB.producer(
            convert,
            "json_key (?P<value> ( intrinsic | word ) joiner intrinsic )"),
        # E.g.: life stage young adult
        VOCAB.producer(convert,
                       "json_key (?P<value> ( intrinsic | word ) intrinsic )"),
        # E.g.: life stage yearling
        VOCAB.producer(convert, "json_key (?P<value> intrinsic )"),
        # A sequence of words bracketed by a keyword and a separator
        # E.g.: LifeStage Remarks: 5-6 wks;
        VOCAB.producer(
            convert,
            """ json_key (?P<value> ( intrinsic | word | joiner ){1,5} )
            separator """,
        ),
        # E.g.: LifeStage = 1st month
        VOCAB.producer(convert, "json_key (?P<value> as_time )"),
        # E.g.: Juvenile
        VOCAB.producer(convert, "(?P<value> intrinsic )"),
        # E.g.: 1st year
        VOCAB.producer(convert, "(?P<value> as_time )"),
    ],
)

Пример #5

0

Показать файл

Файл: lactation_state.py Проект: rafelafrance/traiter_vertnet

    return trait


LACTATION_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.part(
            "lactating",
            r""" (
                lactating | lactation | lactated | lactate | lact
                | lactaing | lactacting | lactataing | lactational
                | oelact | celact | lactati | lactacting | lactatin
                | lactatting | lactatng
                | nursing | suckling
                ) \b
            """,
        ),
        VOCAB.term("lactating_abbrev", r"[oc][esm]l"),
        VOCAB.term("not_lactating_abbrev", r"[oc][esm]n"),
        VOCAB.term("post", r""" post | finished """),

        # Separates measurements
        VOCAB.part("separator", r' [;"/] '),
        VOCAB.producer(convert, """ (?P<pos> lactating ) """),
        VOCAB.producer(convert, """ (?P<pos> lactating_abbrev ) """),
        VOCAB.producer(convert, """ (?P<neg> (none | post) lactating ) """),
        VOCAB.producer(convert, """ (?P<neg> lactating (none | post) ) """),
        VOCAB.producer(convert, """ (?P<neg> not_lactating_abbrev ) """),
    ],
)

Пример #6

0

Показать файл

Файл: vagina_state.py Проект: rafelafrance/traiter_vertnet

"""Parse v****a state notations."""

from traiter.old.vocabulary import Vocabulary

import vertnet.pylib.shared_reproductive_patterns as patterns
from vertnet.parsers.base import Base, convert

VOCAB = Vocabulary(patterns.VOCAB)

VAGINA_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.part("v****a", r""" (?<! sal ) ( v****a | vag | vulva ) """),
        VOCAB.term("abbrev", r""" ov cv [oc][sme][ln] vc vo """.split()),
        VOCAB.part(
            "closed",
            r"""
                closed | imperforated | imperf | cerrada | non [-\s] perforated
                | unperforate | non  [-\s] perf | clsd | imp
            """,
        ),
        VOCAB.part("open", r""" open | perforated? | perf | abrir """),
        VOCAB.part("other", r""" swollen | plugged | plug | sealed """),
        VOCAB.grouper("state", """ closed | open | other """),
        VOCAB.producer(convert, """ (?P<value> v****a partially? state ) """),
        VOCAB.producer(convert, """ (?P<value> state v****a state? ) """),
        VOCAB.producer(convert,
                       """ (?P<value> ( state | abbrev )  v****a? ) """),
    ],
)

Пример #7

0

Показать файл

Файл: total_length.py Проект: rafelafrance/traiter_vertnet

TOTAL_LENGTH = Base(
    name=__name__.split(".")[-1],
    fix_up=fix_up,
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Units are in the key, like: TotalLengthInMillimeters
        VOCAB.term(
            "key_with_units",
            r"""
                ( total | snout \s* vent | head \s* body | fork ) \s*
                ( length | len )? \s* in \s* (?P<units> millimeters | mm )
            """,
        ),
        # Various total length keys
        VOCAB.part(
            "len_key",
            r"""
                t \s* [o.]? \s* l [._]? (?! [a-z] )
                | total  [\s-]* length [\s-]* in
                | ( total | max | standard ) [\s-]* lengths? \b
                | meas [\s*:]? \s* length [\s(]* [l] [)\s:]*
                | meas ( [a-z]* )? \.? : \s* l (?! [a-z.] )
                | s \.? \s? l \.? (?! [a-z.] )
                | label [\s.]* lengths? \b
                | ( fork | mean | body ) [\s-]* lengths? \b
                | s \.? \s? v \.? \s? l \.? (?! [a-z.] )
                | snout [\s-]* vent [\s-]* lengths? \b
            """,
        ),
        # Words that indicate we don't have a total length
        VOCAB.term("skip", " horns? tag ".split()),
        # The word length on its own. Make sure it isn't proceeded by a letter
        VOCAB.part(
            "ambiguous",
            r""" (?<! [a-z] \s* ) (?P<ambiguous_key> lengths? ) """,
        ),
        # # We don't know if this is a length until we see the units
        VOCAB.part("key_units_req", "measurements? body total".split()),
        # The abbreviation key, just: t. This can be a problem.
        VOCAB.part("char_key", r" \b (?P<ambiguous_key> l ) (?= [:=-] ) "),
        # Some patterns require a separator
        VOCAB["semicolon"],
        VOCAB["comma"],
        VOCAB.grouper(
            "key",
            """ ( key_with_units | len_key | ambiguous | char_key ) ( eq | dash )? """,
        ),
        VOCAB.grouper(
            "value",
            """ len_range | number (?P<units> len_units )? (?! mass_units ) """,
        ),
        VOCAB.grouper(
            "value_units",
            """ len_range | number (?P<units> len_units ) """,
        ),
        # E.g.: 10 to 11 inches TL
        VOCAB.producer(simple, "value (?P<units> len_units ) key"),
        VOCAB.producer(simple, """ key value key? """),
        VOCAB.producer(simple, """ key (?P<units> len_units ) value """),
        VOCAB.producer(
            simple,
            """ key_units_req ( value_units | triple_key ) """,
        ),
        # E.g.: total length 4 feet 7 inches
        VOCAB.producer(compound, " key? compound_len "),
        # Handle fractional values like: total length 9/16"
        # E.g.: total = 9/16 inches
        VOCAB.producer(fraction,
                       "key_units_req len_fraction (?P<units> len_units )"),
        # E.g.: svl 9/16 inches
        VOCAB.producer(fraction, "key len_fraction (?P<units> len_units )"),
        # E.g.: len 9/16 in
        VOCAB.producer(
            fraction,
            """ (?P<ambiguous_key> ambiguous) len_fraction (?P<units> len_units ) """,
        ),
        # E.g.: total length: 10-29-39 10-11
        VOCAB.producer(
            simple,
            """ ( key | key_units_req ) shorthand_triple? len_range """,
        ),
        # E.g.: L 12.4 cm
        VOCAB.producer(
            simple,
            """
            char_key value (?P<units> len_units )? (?! mass_units ) """,
        ),
        VOCAB.producer(
            partial(numeric.shorthand_length, measurement="shorthand_tl"),
            ["( key | key_units_req ) shorthand", "shorthand"],  # With a key
        ),  # Without a key
        # Handle a truncated shorthand notation
        VOCAB.producer(
            partial(numeric.shorthand_length, measurement="shorthand_tl"),
            [
                "key shorthand",
                "shorthand",
                "key shorthand_bats",
                "shorthand_bats",
                """ ( key | key_units_req ) shorthand_triple
                    (?! shorthand | len_range )
                """,
            ],
        ),
    ],
)

Пример #8

0

Показать файл

Файл: ovaries_state.py Проект: rafelafrance/traiter_vertnet

OVARIES_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.term("other", """ sev somewhat few """.split()),
        # Skip words
        VOCAB.term("skip", " womb nullip ".split()),
        # VOCAB['comma'],
        VOCAB.part("sep", r" [;\(] "),
        # E.g.: ovaries and uterine horns
        # Or:   ovaries and fallopian tubes
        VOCAB.grouper(
            "ovaries",
            r"""
                ovary ( ( and? uterus horns? ) | and? fallopian )?
            """,
        ),
        # E.g.: covered in copious fat
        VOCAB.grouper("coverage", " covered word{0,2} fat "),
        # E.g.: +corpus luteum
        VOCAB.grouper("luteum", " sign? corpus? (alb | lut) "),
        VOCAB.grouper(
            "value_words",
            """
                size mature coverage luteum color corpus other active destroyed alb
                visible developed cyst texture fallopian luteum
            """.split(),
        ),
        VOCAB.grouper(
            "values",
            """
            ( value_words ( and | comma ) | non )?
            value_words """,
        ),
        VOCAB.producer(
            convert,
            """
                side? ovaries side? ( word | number | comma ){0,5}
                (?P<value> values+ )
            """,
        ),
        VOCAB.producer(
            convert,
            """
            (?P<value> values+ ) ( word | number | comma ){0,5}
               ( (?<! comma ) side )? (?<! comma ) ovaries """,
        ),
        # Get left and right side measurements
        # E.g.: ovaries: R 2 c. alb, L sev c. alb
        VOCAB.producer(
            double,
            r"""
                ovaries
                    (?P<side> side) number? (?P<value> word? values+ )
                    ( and | comma )?
                    (?P<side> side) number? (?P<value> word? values+ )
            """,
        ),
    ],
)

Пример #9

0

Показать файл

Файл: scrotal_state.py Проект: rafelafrance/traiter_vertnet

SCROTAL_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.term("testes_abbrev", "tes ts tnd td tns ta t".split()),
        VOCAB.term("scrotal_abbrev_pos", "sc".split()),
        VOCAB.term("scrotal_abbrev_neg", "ns ".split()),

        # If possible exclude length. Ex: reproductive data=testes: 11x7 mm
        VOCAB.grouper("length", "cross len_units?"),
        VOCAB.producer(convert, """ (?P<pos> scrotal_pos ) """),
        VOCAB.producer(
            convert,
            """ (?P<pos> (testes | testes_abbrev | label) scrotal_abbrev_pos ) """
        ),
        VOCAB.producer(
            convert,
            """ (?P<pos> scrotal_abbrev_pos (testes | testes_abbrev) ) """),
        VOCAB.producer(convert, """ (?P<neg> scrotal_neg ) """),
        VOCAB.producer(convert, """ (?P<neg> scrotal_pos none ) """),
        VOCAB.producer(convert, """ (?P<neg> none scrotal_pos ) """),
        VOCAB.producer(
            convert,
            """ (?P<neg> (testes | testes_abbrev | label) scrotal_abbrev_neg ) """
        ),
        VOCAB.producer(
            convert,
            """ (?P<neg> scrotal_abbrev_neg ) (testes | testes_abbrev) """),
    ],
)

Пример #10

0

Показать файл

Файл: ovaries_size.py Проект: rafelafrance/traiter_vertnet

OVARY_SIZE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # A key with units, like: gonadLengthInMM
        VOCAB.term(
            "key_with_units",
            r"""
                (?P<ambiguous_key> gonad ) \s*
                    (?P<dim> length | len | width ) \s* in \s*
                    (?P<len_units> millimeters | mm )
            """,
        ),
        VOCAB.grouper("value", " cross | number len_units? "),
        # E.g.: active, Or: immature
        VOCAB.grouper("state",
                      "active mature destroyed visible developed".split()),
        # Male or female ambiguous, like: gonadLength1
        VOCAB.grouper(
            "ambiguous",
            """
                ambiguous_key dim_side
                | side ambiguous_key dimension
                | ambiguous_key dimension
            """,
        ),
        # These patterns contain measurements to both left & right ovaries
        # E.g.: reproductive data: ovaries left 10x5 mm, right 10x6 mm
        VOCAB.producer(double, """ label ovary side_cross """),
        # As above but without the ovaries marker:
        # E.g.: reproductive data: left 10x5 mm, right 10x6 mm
        VOCAB.producer(double, """label side_cross"""),
        # Has the ovaries marker but is lacking the label
        # E.g.: ovaries left 10x5 mm, right 10x6 mm
        VOCAB.producer(double, """ ovary side_cross """),
        # A typical testes size notation
        # E.g.: reproductive data: ovaries 10x5 mm
        VOCAB.producer(convert, " label ovary value "),
        # E.g.: reproductive data: left ovaries 10x5 mm
        VOCAB.producer(convert, " label side ovary value "),
        # E.g.: left ovaries 10x5 mm
        VOCAB.producer(convert, " side ovary value "),
        # May have a few words between the label and the measurement
        VOCAB.producer(
            convert,
            """
                label ( ovary | state | word | sep ){0,3}
                ( ovary | state ) value
            """,
        ),
        # Handles: gonadLengthInMM 4x3
        # And:     gonadLength 4x3
        VOCAB.producer(convert, "( ambiguous | key_with_units ) value"),
        # E.g.: gonadLengthInMM 6 x 8
        VOCAB.producer(
            convert,
            """
                ( key_with_units | ambiguous )
                ( ovary | state | word | sep ){0,3}
                ( ovary | state ) value
            """,
        ),
        # Anchored by ovaries but with words between
        VOCAB.producer(convert,
                       "ovary ( state | word | sep ){0,3} state value"),
        # Anchored by ovaries but with only one word in between
        # E.g.: ovaries 9mm
        VOCAB.producer(convert, "side? ovary ( state | word ) value"),
        # E.g.: Ovaries 5 x 3
        VOCAB.producer(convert, "side? ovary value"),
    ],
)

Пример #11

0

Показать файл

Файл: pregnancy_state.py Проект: rafelafrance/traiter_vertnet

from vertnet.parsers.base import Base
from vertnet.pylib.trait import Trait

VOCAB = Vocabulary(patterns.VOCAB)


def convert(token):
    """Convert parsed token into a trait."""
    trait = Trait(
        value="pregnant" if token.group.get("pos") else "not pregnant",
        start=token.start,
        end=token.end,
    )
    return trait


PREGNANCY_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.term(
            "pregnant",
            r""" prega?n?ant pregnan preg pregnancy pregnancies gravid """.
            split(),
        ),
        VOCAB.part("separator", r' [;,"] '),
        VOCAB.producer(convert, """ (?P<neg> pregnant none) """),
        VOCAB.producer(convert, """ (?P<neg> none pregnant ) """),
        VOCAB.producer(convert, """ (?P<pos> pregnant ) """),
    ],
)

Пример #12

0

Показать файл

PREGNANCY_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.term(
            "pregnant",
            r"""
                prega?n?ant pregnan preg pregnancy pregnancies gravid
                post[\s\-]?parous multiparous nulliparous parous primiparous
            """.split(),
        ),
        VOCAB.term("joiner", r""" of were """.split()),
        VOCAB.term(
            "recent",
            r""" recently recent was previously prev """.split(),
        ),
        VOCAB.term(
            "probably",
            r"""
                probably prob possibly possible
                appears? very
                visible visibly
                evidence evident
            """.split(),
        ),
        VOCAB.term("stage", r" early late mid ".split()),
        VOCAB.part("separator", r' [;,"] '),
        # E.g.: pregnancy visible
        VOCAB.producer(
            convert, """ (?P<value> pregnant joiner? none? probably quest? ) """
        ),
        # E.g.: Probably early pregnancy
        VOCAB.producer(
            convert,
            """ (?P<value> none? (recent | probably)?
                stage? (none | joiner)? pregnant quest? )
            """,
        ),
    ],
)

Пример #13

0

Показать файл

NIPPLE_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.term("false", """ false """),
        VOCAB.term("much", """ much """),
        VOCAB.term(
            "lactation",
            r"""
                (indicate \s+)?
                (( previous | post | prior ) [\s-] )
                (lactation | lactating | lac )
            """,
        ),
        VOCAB.term(
            "other",
            """
                protuberant prominent showing worn distended
            """.split(),
        ),
        # Separates measurements
        VOCAB.part("separator", r' [;"?/,] '),
        # Skip arbitrary words
        VOCAB["word"],
        VOCAB.grouper(
            "state_end",
            """
                ( size | fully | partially | other | lactation | color | false
                    | visible | tissue | present | active | developed )
            """,
        ),
        VOCAB.grouper("state_mid", """ ( uterus | and ) """),
        VOCAB.producer(
            convert,
            """(?P<value> non?
                (state_end | much) (state_mid | state_end){0,2} nipple)
            """,
        ),
        VOCAB.producer(
            convert,
            """(?P<value> non? nipple
                (state_end | much) (state_mid | state_end){0,2} )
            """,
        ),
        VOCAB.producer(
            convert,
            """(?P<value> nipple non?
                (state_end | much) (state_mid | state_end){0,2} )
            """,
        ),
    ],
)

Пример #14

0

Показать файл

Файл: body_mass.py Проект: rafelafrance/traiter_vertnet

BODY_MASS = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Looking for keys like: MassInGrams
        VOCAB.term(
            "key_with_units",
            r"""
                ( weight | mass) [\s-]* in [\s-]*
                (?P<mass_units> grams | g | lbs )
            """,
        ),
        # These words indicate a body mass follows
        VOCAB.part("key_leader", "full observed total".split()),
        # Words for weight
        VOCAB.part("weight", "weights? weigh(ed|ing|s)?".split()),
        # Keys like: w.t.
        VOCAB.part("key_with_dots", r" \b w \.? \s? t s? \.? "),
        # Common prefixes that indicate a body mass
        VOCAB.part("mass", "mass"),
        VOCAB.part("body", "body"),
        # These indicate that the mass is NOT a body mass
        VOCAB.term(
            "other_wt",
            """
                femur baculum bacu bac spleen thymus kidney
                testes testis ovaries epididymis epid
            """.split(),
        ),
        # Separators
        VOCAB["word"],
        VOCAB["semicolon"],
        VOCAB["comma"],
        # Any key not preceding by "other_wt" is considered a weight key
        VOCAB.grouper(
            "wt_key",
            """
                (?<! other_wt )
                ( key_leader weight | key_leader mass
                    | body weight | body mass | body
                    | weight | mass | key_with_dots )
            """,
        ),
        VOCAB.grouper("key", " wt_key ".split()),
        VOCAB.producer(compound, " key? compound_wt "),
        # Shorthand notation like: on tag: 11-22-33-44=99g
        VOCAB.producer(
            shorthand,
            [
                "key shorthand",
                "shorthand",
                "key shorthand_bats",
                "shorthand_bats",
            ],
        ),
        VOCAB.producer(simple_mass,
                       " wt_key mass_units number (?! len_units ) "),
        VOCAB.producer(simple_mass, " wt_key mass_range "),
        VOCAB.producer(simple_mass,
                       " ( key | triple_key ) mass_range mass_units "),
        VOCAB.producer(simple_mass, " (?P<key> key_with_units ) mass_range "),
    ],
)

Пример #15

0

Показать файл

HIND_FOOT_LENGTH = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Units are in the key, like: HindFootLengthInMillimeters
        VOCAB.term(
            "key_with_units",
            r"""( hind \s* )? foot \s* ( length | len ) \s* in \s*
                    (?P<units> millimeters | mm )
            """,
        ),
        # Standard keywords that indicate a hind foot length follows
        VOCAB.term(
            "key",
            [
                r"hind \s* foot \s* with \s* (?P<includes> claw )",
                r"hind \s* foot ( \s* ( length | len ) )?",
                "hfl | hf",
            ],
        ),
        # Some patterns require a separator
        VOCAB.part("sep", r" [;,] | $ ", capture=False),
        VOCAB.grouper("noise", " word dash ".split()),
        # Handle fractional values like: hindFoot 9/16"
        VOCAB.producer(
            fraction,
            [
                "key len_fraction units",  # E.g.: hindFoot = 9/16 inches
                "key len_fraction",  # E.g.: hindFoot = 9/16
            ],
        ),
        # A typical hind-foot notation
        VOCAB.producer(
            simple,
            [
                "key_with_units len_range",  # E.g.: hindFootLengthInMM=9-10
                "key noise? len_range units ",  # E.g.: hindFootLength=9-10 mm
                "key noise? len_range",  # Missing units like: hindFootLength 9-10
                "key dash number units",
            ],
        ),
        VOCAB.producer(
            partial(shorthand_length, measurement="shorthand_hfl"),
            [
                "shorthand",
                "key shorthand_bats",
                "shorthand_bats",
                # Handle a truncated shorthand notation
                "triple_key shorthand_triple (?! shorthand | len_range )",
            ],
        ),
    ],
)

Пример #16

0

Показать файл

Файл: placental_scar_count.py Проект: rafelafrance/traiter_vertnet

PLACENTAL_SCAR_COUNT = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        VOCAB["shorthand"],
        # Adjectives to placental scars
        VOCAB.term(
            "adj",
            r"""
            faint prominent recent old possible """.split(),
        ),
        # Skip arbitrary words
        VOCAB["word"],
        VOCAB.part("sep", r" [;/] "),
        VOCAB.grouper(
            "count",
            """
                none embryo conj | none visible | integer | none
            """,
        ),
        VOCAB.producer(
            convert_count,
            """(?P<count1> count ) op (?P<count2> count )
                ( eq (?P<value> count ) )? plac_scar
            """,
        ),
        VOCAB.producer(
            convert_count,
            """plac_scar op?
                  (?P<count1> count ) prep? (?P<side1> side )
                ( (?P<count2> count ) prep? (?P<side2> side ) )?
            """,
        ),
        VOCAB.producer(
            convert_count,
            """ (?P<count1> count ) prep? (?P<side1> side ) plac_scar
                ( (?P<count2> count ) prep? (?P<side2> side )
                    (plac_scar)? )?
            """,
        ),
        VOCAB.producer(
            convert_count,
            """ (?P<side1> side ) (?P<count1> count )
                    (visible | op)? plac_scar
                ( (?P<side2> side ) (?P<count2> count )
                    (visible)? (visible | op)? plac_scar? )? """,
        ),
        VOCAB.producer(
            convert_count,
            """ (?<! lut )
                (?P<count1> count ) prep? (?P<side1> side )
                ( (?P<count2> count ) prep? (?P<side2> side ) )?
                plac_scar
            """,
        ),
        VOCAB.producer(
            convert_count,
            """ (?P<count1> count ) plac_scar (?P<side1> side )
                ( (?P<count2> count ) plac_scar? (?P<side2> side ) )?
            """,
        ),
        VOCAB.producer(
            convert_count,
            """ plac_scar (?P<side1> side ) (?P<count1> count )
                ( plac_scar (?P<side2> side ) (?P<count2> count ) )?
            """,
        ),
        VOCAB.producer(
            convert_count,
            """ plac_scar
                (?P<count1> count )
                  op (?P<count2> count )
                ( eq (?P<value> count ) )?
            """,
        ),
        VOCAB.producer(
            convert_count,
            """ (?P<value> count ) adj? plac_scar op?
                (
                    (?P<count1> count ) (?P<side1> side )
                    op?
                    (?P<count2> count ) (?P<side2> side )
                )?
            """,
        ),
        VOCAB.producer(
            convert_count, """ (?P<value> count ) embryo? plac_scar (?! count ) """
        ),
        VOCAB.producer(
            convert_count, """ plac_scar eq? (?P<count1> count ) (?P<side1> side ) """
        ),
        VOCAB.producer(convert_count, """ plac_scar eq? (?P<value> count ) """),
        VOCAB.producer(convert_state, """ plac_scar """),
    ],
)

Пример #17

0

Показать файл

EAR_LENGTH = Base(
    name=__name__.split(".")[-1],
    fix_up=fix_up,
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Units are in the key, like: EarLengthInMillimeters
        VOCAB.term(
            "key_with_units",
            r"""
                ear \s* ( length | len ) \s* in \s*
                (?P<len_units> millimeters | mm )
            """,
        ),
        # Abbreviation containing the measured from notation, like: e/n or e/c
        VOCAB.part(
            "char_measured_from",
            r"""
                (?<! [a-z] ) (?<! [a-z] \s )
                (?P<ambiguous_key> e ) /? (?P<measured_from1> n | c ) [-]?
                (?! \.? [a-z] )
            """,
        ),
        # The abbreviation key, just: e. This can be a problem.
        VOCAB.part(
            "char_key",
            r"""
                (?<! \w ) (?<! \w \s )
                (?P<ambiguous_key> e )
                (?! \.? \s? [a-z\(] )
            """,
        ),
        # Standard keywords that indicate an ear length follows
        VOCAB.term(
            "keyword",
            [
                r" ear \s* from \s* (?P<measured_from1> notch | crown )",
                r" ear \s* ( length | len )",
                r" ear (?! \s* tag )",
                r" ef (?P<measured_from2> n | c ) [-]?",
            ],
        ),
        # Some patterns require a separator
        VOCAB["word"],
        VOCAB.part("sep", " [;,] "),
        # Consider any of the following as just a key
        VOCAB.grouper("key", "keyword char_key char_measured_from".split()),
        # Handle fractional values like: ear 9/16"
        VOCAB.producer(fraction, "key len_fraction (?P<units> len_units )?"),
        # E.g.: earLengthInMM 9-10
        VOCAB.producer(simple_len, "(?P<key> key_with_units ) len_range"),
        # E.g.: ear 9-10 mm
        VOCAB.producer(simple_len, "key len_range (?P<units> len_units )?"),
        # Shorthand notation like: on tag: 11-22-33-44=99g
        VOCAB.producer(
            partial(shorthand_length, measurement="shorthand_el"),
            [
                "shorthand",
                "shorthand_bats",
            ],
        ),
    ],
)

Пример #18

0

Показать файл

Файл: testes_size.py Проект: rafelafrance/traiter_vertnet

TESTES_SIZE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Note: abbrev differs from the one in the testes_state_trait
        VOCAB.term("abbrev", "tes ts tnd td tns ta".split()),
        # The abbreviation key, just: t. This can be a problem.
        VOCAB.part("char_key", r" \b t (?! [a-z] )"),
        # A key with units, like: gonadLengthInMM
        VOCAB.term(
            "key_with_units",
            r"""
                (?P<ambiguous_key> gonad ) \s*
                    (?P<dim> length | len | width ) \s* in \s*
                    (?P<len_units> millimeters | mm )
            """,
        ),
        VOCAB.grouper(
            "value",
            """ cross | number len_units? (?! mass_units ) """,
        ),
        VOCAB.grouper(
            "state",
            ["""(non | partially | fully )? descended """]
            + """ scrotal abdominal size other """.split(),
        ),
        # Male or female ambiguous, like: gonadLength1
        VOCAB.grouper(
            "ambiguous",
            """
                ambiguous_key dim_side
                | side ambiguous_key dimension
                | ambiguous_key dimension
            """,
        ),
        # These patterns contain measurements to both left & right testes
        # E.g.: reproductive data: tests left 10x5 mm, right 10x6 mm
        VOCAB.producer(double, """label ( testes | abbrev | char_key ) side_cross """),
        # As above but without the testes marker:
        # E.g.: reproductive data: left 10x5 mm, right 10x6 mm
        VOCAB.producer(double, """ label side_cross """),
        # Has the testes marker but is lacking the label
        # E.g.: testes left 10x5 mm, right 10x6 mm
        VOCAB.producer(
            double,
            """
                ( testes | abbrev | char_key ) side_cross
            """,
        ),
        # E.g.: reproductive data: left 10x5 mm
        VOCAB.producer(
            double,
            """
                label
                    (?P<side_1> side ) (?P<value_1> number )
                        (?P<units_1> len_units )?
                    (?P<side_2> side ) (?P<value_2> number )
                        (?P<units_2> len_units )?
            """,
        ),
        # E.g.: reproductive data: left 10x5 mm
        VOCAB.producer(
            convert,
            """
                ( testes | abbrev | char_key )
                    (?P<value_1> number ) (?P<units_1> len_units )?
                    dash
                    (?P<value_2> number ) (?P<units_2> len_units )?
            """,
        ),
        # A typical testes size notation
        # E.g.: reproductive data: tests 10x5 mm
        VOCAB.producer(
            convert,
            """ label ( testes | abbrev | char_key ) side_cross """,
        ),
        # E.g.: reproductive data: left tests 10x5 mm
        VOCAB.producer(
            convert,
            """ label side ( testes | abbrev | char_key ) (dash | comma)? value""",
        ),
        # E.g.: reproductive data=T: L-2x4mm
        VOCAB.producer(
            convert,
            """ label ( testes | abbrev | char_key ) side dash? value """,
        ),
        # E.g.: reproductive data: left 10x5 mm
        VOCAB.producer(convert, "label side dash? value len_units?"),
        # E.g.: reproductive data: 10x5 mm
        VOCAB.producer(convert, "label value len_units?"),
        # Has the testes marker but is lacking the label
        # E.g.: testes left 10x5 mm, right 10x6 mm
        VOCAB.producer(convert, """( testes | abbrev ) value """),
        # May have a few words between the label and the measurement
        # E.g.: reproductive data=testes not descended - 6 mm
        VOCAB.producer(
            convert,
            """ label ( testes | abbrev | state | word | sep | char_key){0,3}
                ( testes | abbrev | state | char_key )
                ( dash | comma )? value
            """,
        ),
        # Handles: gonadLengthInMM 4x3
        # And:     gonadLength 4x3
        VOCAB.producer(convert, "( ambiguous | key_with_units ) value"),
        # E.g.: gonadLengthInMM 6 x 8
        VOCAB.producer(
            convert,
            """ ( key_with_units | ambiguous )
                ( testes | abbrev | state | word | sep | char_key ){0,3}
                ( testes | abbrev | state | char_key ) value
            """,
        ),
        # Anchored by testes but with words between
        # E.g.: testes scrotal; T = 9mm
        VOCAB.producer(
            convert,
            """ testes ( abbrev | state | word | sep | char_key ){0,3}
                ( abbrev | state | char_key ) value
            """,
        ),
        # Anchored by testes but with only one word in between
        # E.g.: testes scrotal 9mm
        VOCAB.producer(
            convert,
            """ testes ( abbrev | state | word | char_key ) ( comma | dash )? value """,
        ),
        # E.g.: Testes 5 x 3
        VOCAB.producer(
            convert,
            """ ( testes | state | abbrev ) (comma | dash | x )? value """,
        ),
        # E.g.: T 5 x 4
        VOCAB.producer(convert, " (?P<ambiguous_char> char_key ) value "),
    ],
)

Пример #19

0

Показать файл

Файл: lactation_state_old.py Проект: rafelafrance/traiter_vertnet

LACTATION_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.part(
            "lactating",
            r""" (
                lactating | lactation | lactated | lactate | lact
                | lactaing | lactacting | lactataing | lactational
                | oelact | celact | lactati | lactacting | lactatin
                | lactatting | lactatng
                | nursing | suckling
                ) \b
            """,
        ),
        VOCAB.part("not", r" \b ( not | non | no ) "),
        VOCAB.part(
            "post",
            r""" \b (
                (( just | recently ) \s+ )? finished
                | post | recently | recent | had | pre
            ) """,
        ),
        VOCAB.part("pre", r" \b pre [\s\-]? "),
        # Separates measurements
        VOCAB.part("separator", r' [;"/] '),
        VOCAB["word"],
        VOCAB.grouper("prefix", "not post pre".split()),
        VOCAB.producer(convert, """ (?P<value> prefix? lactating quest? ) """),
    ],
)

Пример #20

0

Показать файл

VOCAB = Vocabulary(patterns.VOCAB)


def convert(token):
    """Convert parsed token into a trait producer."""
    trait = Trait(value=token.group["value"].lower(),
                  start=token.start,
                  end=token.end)
    trait.is_flag_in_token(token, "ambiguous_key")
    return trait


SCROTAL_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB.term("testes_abbrev", "tes ts tnd td tns ta t".split()),
        VOCAB.term("scrotal_abbrev", "ns sc".split()),
        # If possible exclude length. Ex: reproductive data=testes: 11x7 mm
        VOCAB.grouper("length", "cross len_units?"),
        VOCAB.producer(
            convert,
            """ (?P<value>
                ( testes | testes_abbrev ) non? ( scrotal | scrotal_abbrev ) )
            """,
        ),
        VOCAB.producer(convert, """ (?P<value> non? scrotal ) """),
        VOCAB.producer(convert, """ label (?P<value> scrotal_abbrev )  """),
    ],
)

Пример #21

0

Показать файл

Файл: embryo_count.py Проект: rafelafrance/traiter_vertnet

EMBRYO_COUNT = Base(
    name=__name__.split(".")[-1],
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        VOCAB["shorthand"],
        VOCAB["metric_mass"],
        VOCAB.part(
            "sex",
            r""" males? | females? | (?<! [a-z] ) [mf] (?! [a-z] ) """,
        ),
        VOCAB.term("repo_key", r""" reproductive \s data """),
        VOCAB.term("near_term", r" near[\s-]?term"),
        VOCAB.term("each_side", r" each \s side "),
        VOCAB.term("skip", r" w  wt ".split()),
        VOCAB.part("sep", r" [;] "),
        VOCAB.part("bang", r" [!] "),
        VOCAB.grouper(
            "count",
            """ none (word | plac_scar) conj | integer | none | num_words | bang """,
        ),
        VOCAB.grouper("present", " found | near_term "),
        VOCAB.grouper("numeric", " integer | real "),
        VOCAB.grouper("skip_len",
                      " ( x? numeric metric_len ) | (x numeric metric_len?) "),
        VOCAB.grouper("skip_words", " word | numeric | metric_len | eq "),
        VOCAB.grouper("side_link", " x | conj | word "),
        VOCAB.grouper("between", "side_link? | skip_words{,4}"),
        VOCAB.producer(
            convert,
            """ embryo eq? (?P<total> count ) skip_len?
                (?P<sub> side ) (?P<subcount> count ) between
                (?P<sub> side ) (?P<subcount> count )
            """,
        ),
        VOCAB.producer(
            convert,
            """ embryo eq? (?P<sub> side ) (?P<subcount> count ) between
                embryo?    (?P<sub> side ) (?P<subcount> count ) embryo?
            """,
        ),
        VOCAB.producer(
            convert,
            """ embryo eq? (?P<total> count ) skip_words{,4}
                    (?P<subcount> count ) (?P<sub> side ) between
                    (?P<subcount> count ) (?P<sub> side )
            """,
        ),
        VOCAB.producer(
            convert,
            """ embryo eq?
                (?P<subcount> count ) (?P<sub> side ) between
                (?P<subcount> count ) (?P<sub> side ) eq
                (?P<total> count )
            """,
        ),
        VOCAB.producer(
            convert,
            """ embryo eq? (?P<subcount> count ) (?P<sub> side ) between
                           (?P<subcount> count ) (?P<sub> side )
            """,
        ),
        VOCAB.producer(
            convert,
            """ embryo eq? (?P<subcount> count ) skip_len (?P<sub> side ) """,
        ),
        VOCAB.producer(found,
                       """ embryo word? (?P<sub> side ) (?! plac_scar ) """),
        VOCAB.producer(found, """ embryo present | present embryo """),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) near_term? embryo  (?! plac_scar ) """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) near_term? embryo (?! plac_scar ) skip_len?
                (?P<subcount> count ) (?P<sub> side | sex ) side_link?
                (?P<subcount> count ) (?P<sub> side | sex )
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) ( size | word )? embryo (?! plac_scar ) """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) ( size | word )? embryo (?! plac_scar )
                (?P<subcount> count ) (?P<sub> side ) side_link?
                (?P<subcount> count ) (?P<sub> side )
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) skip_len? embryo (?! plac_scar )
                (?P<subcount> count ) (?P<sub> side ) side_link?
                (?P<subcount> count ) (?P<sub> side )
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) skip_len embryo (?! plac_scar ) """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<sub> side ) eq? (?P<subcount> count ) eq? side_link?
                (?P<sub> side ) eq? (?P<subcount> count ) eq? numeric? embryo
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<subcount> count ) (?P<sub> side ) side_link?
                (?P<subcount> count ) (?P<sub> side ) embryo
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<subcount> count ) embryo word? (?P<sub> side ) side_link?
                (?P<subcount> count ) word? (?P<sub> side )
            """,
        ),
        VOCAB.producer(
            convert,
            """ repo_key ( eq | word ){,2}
                    (?P<subcount> count ) (?P<sub> side ) side_link?
                    (?P<subcount> count ) (?P<sub> side )
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<total> count ) embryo
                    (?P<sub> side ) eq? (?P<subcount> count ) side_link?
                    (?P<sub> side ) eq? (?P<subcount> count )
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<sub> side ) (?P<subcount> count ) embryo skip_len?
                (?P<sub> side ) (?P<subcount> count ) embryo?
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<subcount> count ) (?P<sub> side ) x
                (?P<subcount> count ) (?P<sub> side ) x
                eq? skip_len? embryo
            """,
        ),
        VOCAB.producer(
            convert,
            """ (?P<sub> side ) skip_words{,4} (?P<subcount> count ) embryo? skip_len
                (?P<sub> side ) skip_words{,4} (?P<subcount> count )
            """,
        ),
        VOCAB.producer(convert,
                       """ (?P<subcount> count ) embryo (?P<sub> side )"""),
        VOCAB.producer(convert, """ embryo eq? (?P<total> count )"""),
        VOCAB.producer(
            convert,
            """ (?P<subcount> count ) (?P<sub> side ) skip_words{,3} embryo """,
        ),
        VOCAB.producer(each_side,
                       """ (?P<subcount> count ) embryo each_side """),
    ],
)

Пример #22

0

Показать файл

Файл: sex.py Проект: rafelafrance/traiter_vertnet

import vertnet.pylib.patterns as patterns
from vertnet.parsers.base import Base, convert

VOCAB = Vocabulary(patterns.VOCAB)

SEX = Base(
    name=__name__.split(".")[-1],
    rules=[
        # JSON keys for sex
        VOCAB.term("sex_key", "sex"),
        # The sexes
        VOCAB.term("sex_vocab", "females? males?".split()),
        # These are words that indicate that "sex" is not a key
        VOCAB.term("not_sex", "and is was".split()),
        # Allow arbitrary words in some cases
        VOCAB.part("word", r' \b [a-z] [^;,"=:\s]* '),
        # Some patterns need a terminator
        VOCAB.part("separator", ' [;,"] | $ '),
        # E.g.: sex might be female;
        VOCAB.producer(
            convert,
            """ sex_key (?P<value> ( sex_vocab | word ){1,2} quest? ) separator """,
        ),
        # E.g.: sex=female?, Or: sex=unknown
        VOCAB.producer(convert,
                       " sex_key (?P<value> ( sex_vocab | word ) quest? ) "),
        # E.g.: male, Or: male?
        VOCAB.producer(convert, " (?P<value> sex_vocab quest? ) "),
    ],
)

Пример #23

0

Показать файл

EMBRYO_LENGTH = Base(
    name=__name__.split(".")[-1],
    fix_up=fix_up,
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        VOCAB["shorthand"],
        VOCAB.part(
            "embryo_len_key",
            r"""
            (?<! collector [\s=:.] ) (?<! reg [\s=:.] ) (
                ( crown | cr ) ( [_\s\-] | \s+ to \s+ )? rump
                | (?<! [a-z] ) crl (?! [a-z] )
                | (?<! [a-z] ) c \.? r \.? (?! [a-z] )
            )""",
        ),
        VOCAB.part("len", r" (length | len) (?! [a-z] ) "),
        VOCAB.part("other", r" \( \s* \d+ \s* \w+ \s* \) "),
        VOCAB.part("separator", r' [;"/.] '),
        VOCAB.grouper("value", """ cross | number len_units? (?! sex ) """),
        VOCAB.grouper("key", """ embryo_len_key len? ( eq | colon )? """),
        VOCAB.grouper(
            "count",
            """
            number side number side eq?
            | number plus number ( eq number )?
            """,
        ),
        VOCAB.grouper("skip", " prep word cross | other | side "),
        VOCAB.producer(convert, """ embryo? key value quest? """),
        VOCAB.producer(convert, """ embryo? x? value key quest? """),
        VOCAB.producer(convert_many,
                       """ embryo count? value{2,} (?! skip ) quest? """),
        VOCAB.producer(convert, """ embryo? key x? value quest? """),
        VOCAB.producer(convert, """ embryo? x? value key quest? """),
        VOCAB.producer(convert, """ embryo x? value (?! skip ) quest? """),
        VOCAB.producer(isolate,
                       """ embryo colon? count? value len_units quest? """),
    ],
)

Пример #24

0

Показать файл

TAIL_LENGTH = Base(
    name=__name__.split(".")[-1],
    fix_up=fix_up,
    rules=[
        VOCAB["uuid"],  # UUIDs cause problems with numbers
        # Looking for keys like: tailLengthInMM
        VOCAB.term(
            "key_with_units",
            r"""
                tail \s* ( length | len ) \s* in \s*
                (?P<units> millimeters | mm )
            """,
        ),
        # The abbreviation key, just: t. This can be a problem.
        VOCAB.part(
            "char_key",
            r"""
                \b (?P<ambiguous_key> t ) (?! [a-z] ) (?! _ \D )
            """,
        ),
        # Standard keywords that indicate a tail length follows
        VOCAB.term("keyword",
                   [r" tail \s* length ", r" tail \s* len ", "tail", "tal"]),
        # Some patterns require a separator
        VOCAB.part("sep", r" [;,] | $ ", capture=False),
        # Consider all of these tokens a key
        VOCAB.grouper("key", "keyword char_key".split()),
        # Handle fractional values like: tailLength 9/16"
        VOCAB.producer(
            fraction,
            [
                # E.g.: tail = 9/16 in
                "key len_fraction (?P<units> len_units )",
                "key len_fraction",  # Without units, like: tail = 9/16
            ],
        ),
        VOCAB.producer(
            simple,
            [
                "key_with_units len_range",  # E.g.: tailLengthInMM=9-10
                "key len_range (?P<units> len_units )",  # E.g.: tailLength=9-10 mm
                "key len_range",  # Missing units like: tailLength 9-10
            ],
        ),
        VOCAB.producer(
            partial(shorthand_length, measurement="shorthand_tal"),
            [
                "shorthand",
                "key shorthand_bats",
                "shorthand_bats",
                # Handle a truncated shorthand notation
                "triple_key shorthand_triple (?! shorthand | len_range )",
            ],
        ),
    ],
)

Пример #25

0

Показать файл

Файл: testes_state.py Проект: rafelafrance/traiter_vertnet

TESTES_STATE = Base(
    name=__name__.split(".")[-1],
    rules=[
        # Abbreviations for "testes"
        VOCAB.term("abbrev", "tes ts tnd td tns ta t".split()),
        VOCAB["uterus"],
        VOCAB.grouper(
            "state",
            [
                "non fully descended",
                "abdominal non descended",
                "abdominal descended",
                "non descended",
                "fully descended",
                "partially descended",
                "size non descended",
                "size descended",
                "descended",
            ],
        ),
        # Simplify the testes length so it can be skipped easily
        VOCAB.grouper("length", "cross len_units?"),
        VOCAB.producer(
            convert,
            r""" (?P<value>
                ( testes | abbrev | ambiguous_key ) length?
                    ( state | abdominal | size )
                    ( conj? ( state | size ) )?
            ) """,
        ),
        VOCAB.producer(
            convert,
            r""" (?P<value> non ( testes | abbrev | ambiguous_key ) ( state )? ) """,
        ),
        VOCAB.producer(
            convert,
            """ label
                (?P<value> ( testes | abbrev )? length? size ( conj? state )? )
            """,
        ),
    ],
)

Python Base примеры использования