Exemplo n.º 1
0
def temporalTest(tok, include_relative=True):
    #remove punctuation
    #tok = tok.translate(str.maketrans("", "", string.punctuation))

    #if the token has a dollar sign or percent sign it is not temporal
    m = re.search('[#$%]', tok)
    if m is not None:
        return False

    #look for date patterns mm[/-]dd[/-]yyyy, mm[/-]dd[/-]yy, yyyy[/-]mm[/-]dd, yy[/-]mm[/-]dd
    m = re.search('([0-9]{1,4}[-/][0-9]{1,2}[-/][0-9]{1,4})', tok)
    if m is not None:
        return True

    #look for date patterns mm[/-]dd, mm[/-]yy, mm[/-]yyyy
    m = re.search('([0-9]{1,2}[-/][0-9]{2,4})', tok)
    if m is not None:
        return True

    #looks for a string of 8 digits that could possibly be a date in the format 19980304 or 03041998 or 980304
    m = re.search('([0-9]{4,8})', tok)
    if m is not None:
        if tt.has24HourTime(m.group(0)):
            return True
        if tt.hasDateOrTime(m.group(0)):
            return True

    #look for time patterns hh:mm:ss
    m = re.search('([0-9]{2}:[0-9]{2}:[0-9]{2})', tok)
    if m is not None:
        return True

    if tt.hasTextMonth(tok):
        return True
    if tt.hasDayOfWeek(tok):
        return True
    if tt.hasPeriodInterval(tok):
        return True
    if tt.hasAMPM(tok):
        return True
    if tt.hasPartOfWeek(tok):
        return True
    if tt.hasSeasonOfYear(tok):
        return True
    if tt.hasPartOfDay(tok):
        return True
    if tt.hasTimeZone(tok):
        return True
    if tt.hasTempText(tok) and include_relative:
        return True
    if tt.hasModifierText(tok):
        return True
    if tt.hasClinAbr(tok):
        return True

    return False
Exemplo n.º 2
0
def temporalTest(tok):
    # remove punctuation

    # if the token has a dollar sign or percent sign it is not temporal
    m = re.search('[#$%]', tok)
    if m is not None:
        return False, -1
    # look for date patterns mm[/-]dd[/-]yyyy, mm[/-]dd[/-]yy, yyyy[/-]mm[/-]dd, yy[/-]mm[/-]dd
    # m = re.search('([0-9]{1,4}[-/][0-9]{1,2}[-/][0-9]{1,4})', tok)
    # if m is not None:
    #    return True, 12
    # looks for a string of 8 digits that could possibly be a date in the format 19980304 or 03041998 or 980304
    # m = re.search('([0-9]{4,8})', tok)
    # if m is not None:
    # if tt.has24HourTime(m.group(0)):
    #    return True, 0
    # if tt.hasDateOrTime(m.group(0)):
    #    return True, 12
    ### NOT USEFUL FOR FREQUENCY ###

    # look for time patterns hh:mm:ss
    # m = re.search('([0-9]{2}:[0-9]{2}:[0-9]{2})', tok)
    # if m is not None:
    #    return True, 1
    # if tt.hasTextMonth(tok):
    # return True, 2
    ## THIS RETURNS TRUE FOR STRINGS SUCH AS "DOCTOR" BECAUSE it contains "OCT"
    if tt.hasDayOfWeek(tok):
        return True, 3
    if tt.hasPeriodInterval(tok):
        return True, 4
    if tt.hasAMPM(tok):
        return True, 5
    #if tt.hasPartOfWeek(tok):
    #    return True, 6
    # if tt.hasSeasonOfYear(tok):
    #    return True, 7
    if tt.hasPartOfDay(tok):
        return True, 8
    #if tt.hasTimeZone(tok):
    #    return True, 9
    # if tt.hasTempText(tok):
    #    return True, 10
    # not useful to us
    if tt.hasDoseDuration(tok):
        return True, -1
    # if tt.hasFor(tok):     I'm not sure if I'm going to use this or not yet
    #   return True, -1
    else:
        return False, -1