Пример #1
0
def temporalTest(tok, include_relative=True):
    #remove punctuation
    #tok = tok.translate(str.maketrans("", "", string.punctuation))

    #if the token has a dollar sign or percent sign it is not temporal
    m = re.search('[#$%]', tok)
    if m is not None:
        return False

    #look for date patterns mm[/-]dd[/-]yyyy, mm[/-]dd[/-]yy, yyyy[/-]mm[/-]dd, yy[/-]mm[/-]dd
    m = re.search('([0-9]{1,4}[-/][0-9]{1,2}[-/][0-9]{1,4})', tok)
    if m is not None:
        return True

    #look for date patterns mm[/-]dd, mm[/-]yy, mm[/-]yyyy
    m = re.search('([0-9]{1,2}[-/][0-9]{2,4})', tok)
    if m is not None:
        return True

    #looks for a string of 8 digits that could possibly be a date in the format 19980304 or 03041998 or 980304
    m = re.search('([0-9]{4,8})', tok)
    if m is not None:
        if tt.has24HourTime(m.group(0)):
            return True
        if tt.hasDateOrTime(m.group(0)):
            return True

    #look for time patterns hh:mm:ss
    m = re.search('([0-9]{2}:[0-9]{2}:[0-9]{2})', tok)
    if m is not None:
        return True

    if tt.hasTextMonth(tok):
        return True
    if tt.hasDayOfWeek(tok):
        return True
    if tt.hasPeriodInterval(tok):
        return True
    if tt.hasAMPM(tok):
        return True
    if tt.hasPartOfWeek(tok):
        return True
    if tt.hasSeasonOfYear(tok):
        return True
    if tt.hasPartOfDay(tok):
        return True
    if tt.hasTimeZone(tok):
        return True
    if tt.hasTempText(tok) and include_relative:
        return True
    if tt.hasModifierText(tok):
        return True
    if tt.hasClinAbr(tok):
        return True

    return False
Пример #2
0
def temporalTest(tok):
    #remove punctuation

    #if the token has a dollar sign or percent sign it is not temporal
    m = re.search('[#$%]', tok)
    if m is not None:
        return False, -1
    #look for date patterns mm[/-]dd[/-]yyyy, mm[/-]dd[/-]yy, yyyy[/-]mm[/-]dd, yy[/-]mm[/-]dd
    m = re.search('([0-9]{1,4}[-/][0-9]{1,2}[-/][0-9]{1,4})', tok)
    if m is not None:
        return True, 12
    #looks for a string of 8 digits that could possibly be a date in the format 19980304 or 03041998 or 980304
    m = re.search('([0-9]{4,8})', tok)
    if m is not None:
        if tt.has24HourTime(m.group(0)):
            return True, 0
        if tt.hasDateOrTime(m.group(0)):
            return True, 12


    #look for time patterns hh:mm:ss
    m = re.search('([0-9]{2}:[0-9]{2}:[0-9]{2})', tok)
    if m is not None:
        return True, 1
    #if tt.hasTextMonth(tok):
        #return True, 2
        ## THIS RETURNS TRUE FOR STRINGS SUCH AS "DOCTOR" BECAUSE it contains "OCT"
    if tt.hasDayOfWeek(tok):
       return True, 3
    if tt.hasPeriodInterval(tok):
        return True, 4
    if tt.hasAMPM(tok):
        return True, 5
    if tt.hasPartOfWeek(tok):
        return True, 6
    if tt.hasSeasonOfYear(tok):
        return True, 7
    if tt.hasPartOfDay(tok):
        return True, 8
    if tt.hasTimeZone(tok):
        return True, 9
    #if tt.hasTempText(tok):
     #   return True, 10
    if tt.hasDoseDuration(tok):
        return True, -1
    #if tt.hasFor(tok):     I'm not sure if I'm going to use this or not yet
     #   return True, -1
    else:
        return False, -1