Пример #1
0
def buildChronoList(
    TimePhraseList,
    chrono_id,
    ref_list,
    PIclassifier,
    PIfeatures,
):
    chrono_list = []

    ## Do some further pre-processing on the ref token list
    ## Replace all punctuation with spaces
    ref_list = referenceToken.replacePunctuation(ref_list)
    ## Convert to lowercase
    ref_list = referenceToken.lowercase(ref_list)

    for s in TimePhraseList:
        chrono_tmp_list = []

        # this is the new chrono time flags so we don't duplicate effort.  Will ned to eventually re-write this flow.
        # The flags are in the order: [loneDigitYear, month, day, hour, minute, second]

        chrono_tmp_list, chrono_id = DoseDuration.buildDoseDuration(
            s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures)
        #chrono_tmp_list, chrono_id = buildFrequency(s, chrono_id, chrono_tmp_list, ref_list)

        # tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list)
        chrono_list = chrono_list + chrono_tmp_list
        chrono_tmp_list = []

    return chrono_list, chrono_id
Пример #2
0
def buildChronoListML(TimePhraseList, chrono_id, ref_list, X, classifier):
    chrono_list = []

    ## Do some further pre-processing on the ref token list
    ## Replace all punctuation with spaces
    ref_list = referenceToken.replacePunctuation(ref_list)
    ## Convert to lowercase
    ref_list = referenceToken.lowercase(ref_list)
    n = 0
    while n < len(TimePhraseList):
        s = TimePhraseList[n]
        if (hasSingular(s.getItems())):
            chrono_list.append(
                chrono.ChronoFrequencyEntity(id=str(chrono_id) + "entity",
                                             label="Frequency",
                                             span=s.getSpan(),
                                             text=s.getText()))
            chrono_id += 1
            TimePhraseList.pop(n)
            n -= 1
        n += 1
    if len(TimePhraseList) != len(X):
        print("FATAL ERROR: LEN(PHRASE FEATURES)!=LEN(PHRASES)")
        exit(1)
    for s, x in zip(TimePhraseList, X):
        chrono_tmp_list = []
        # this is the new chrono time flags so we don't duplicate effort.  Will ned to eventually re-write this flow.
        # The flags are in the order: [loneDigitYear, month, day, hour, minute, second]

        #chrono_tmp_list, chrono_id = DoseDuration.buildDoseDuration(s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures)
        chrono_tmp_list, chrono_id = buildFrequencyML(s, chrono_id,
                                                      chrono_tmp_list, x,
                                                      classifier)
        chrono_list = chrono_list + chrono_tmp_list
        chrono_tmp_list = []
        # tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list)
    return chrono_list, chrono_id
Пример #3
0
def buildChronoList(TimePhraseList, chrono_id, ref_list, PIclassifier, PIfeatures, dct=None):
    chrono_list = []
    
    ## Do some further pre-processing on the ref token list
    ## Replace all punctuation with spaces
    ref_list = referenceToken.replacePunctuation(ref_list)
    ## Convert to lowercase
    ref_list = referenceToken.lowercase(ref_list)
    
    for s in TimePhraseList:
        print(s)
        chrono_tmp_list = []
        
        # this is the new chrono time flags so we don't duplicate effort.  Will ned to eventually re-write this flow.
        # The flags are in the order: [loneDigitYear, month, day, hour, minute, second]
        chrono_time_flags = {"loneDigitYear":False, "month":False, "day":False, "hour":False, "minute":False, "second":False, "fourdigityear":False, "twodigityear":False}
        
        #Parse out Year function
        chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildYear(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out Two-Digit Year 
        chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.build2DigitYear(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out Month-of-Year
        chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildMonthOfYear(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out Day-of-Month
        chrono_tmp_list, chrono_id, chrono_time_flags = DayOfMonth.buildDayOfMonth(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out HourOfDay
        chrono_tmp_list, chrono_id, chrono_time_flags = HourOfDay.buildHourOfDay(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out MinuteOfHour
        chrono_tmp_list, chrono_id, chrono_time_flags = MinuteOfHour.buildMinuteOfHour(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out SecondOfMinute
        chrono_tmp_list, chrono_id, chrono_time_flags = SecondOfMinute.buildSecondOfMinute(s, chrono_id, chrono_tmp_list, chrono_time_flags)

        
        #Parse modifier text
        chrono_tmp_list, chrono_id = Modifier.buildModifierText(s, chrono_id, chrono_tmp_list)

        #call non-standard formatting temporal phrases
        chrono_tmp_list, chrono_id, chrono_time_flags = NumericDate.buildNumericDate(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        chrono_tmp_list, chrono_id, chrono_time_flags = TwentyFourHourTime.build24HourTime(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        
        chrono_tmp_list, chrono_id = DayOfWeek.buildDayOfWeek(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id, chrono_time_flags = TextMonthAndDay.buildTextMonthAndDay(s, chrono_id, chrono_tmp_list, chrono_time_flags, dct, ref_list)
        chrono_tmp_list, chrono_id = AMPM.buildAMPM(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        chrono_tmp_list, chrono_id = PartOfDay.buildPartOfDay(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = PartOfWeek.buildPartOfWeek(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = Season.buildSeasonOfYear(s, chrono_id, chrono_tmp_list, ref_list)
        chrono_tmp_list, chrono_id = PeriodInterval.buildPeriodInterval(s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures)
        chrono_tmp_list, chrono_id = TextYear.buildTextYear(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = This.buildThis(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = BeforeAfter.buildBeforeAfter(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = NthFromStart.buildNthFromStart(s, chrono_id, chrono_tmp_list, ref_list)
        chrono_tmp_list, chrono_id = TimeZone.buildTimeZone(s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = Last.buildLast(s, chrono_id, chrono_tmp_list)
        
    #    print("XXXXXXXXX")
    #    print(s)
    #    for e in chrono_tmp_list:
    #        print(e)
        
        
        tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct, ref_list)
        chrono_list = chrono_list+tmplist
        #Going to incorporate in future builds
        #chrono_list, chrono_id = buildDuration(s, chrono_id, chrono_list)
        #chrono_list, chrono_id = buildSet(s, chrono_id, chrono_list)
      
    return chrono_list, chrono_id
Пример #4
0
def buildChronoList(TimePhraseList,
                    chrono_id,
                    ref_list,
                    PIclassifier,
                    PIfeatures,
                    dct=None):
    chrono_list = []

    ## Do some further pre-processing on the ref token list
    ## Replace all punctuation with spaces
    ref_list = referenceToken.replacePunctuation(ref_list)
    ## Convert to lowercase
    ref_list = referenceToken.lowercase(ref_list)

    ## this list will contain only the phrases that have a temporal component with a scate entity.
    timex_list = []

    for s in TimePhraseList:
        print("\nNOW PARSING PHRASE: " + s.getText() + "\n")
        chrono_tmp_list = []

        # this is the new chrono time flags so we don't duplicate effort.  Will ned to eventually re-write this flow.
        # The flags are in the order: [loneDigitYear, month, day, hour, minute, second]
        chrono_time_flags = {
            "loneDigitYear": False,
            "month": False,
            "day": False,
            "hour": False,
            "minute": False,
            "second": False,
            "fourdigityear": False,
            "twodigityear": False
        }

        #Parse out Year function
        chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildYear(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out Two-Digit Year
        chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.build2DigitYear(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out Month-of-Year
        chrono_tmp_list, chrono_id, chrono_time_flags = MonthYear.buildMonthOfYear(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out Day-of-Month
        chrono_tmp_list, chrono_id, chrono_time_flags = DayOfMonth.buildDayOfMonth(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse AMPM before Hour of Day
        chrono_tmp_list, chrono_id = AMPM.buildAMPM(s, chrono_id,
                                                    chrono_tmp_list,
                                                    chrono_time_flags)
        #Parse out HourOfDay
        chrono_tmp_list, chrono_id, chrono_time_flags = HourOfDay.buildHourOfDay(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out MinuteOfHour
        chrono_tmp_list, chrono_id, chrono_time_flags = MinuteOfHour.buildMinuteOfHour(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        #Parse out SecondOfMinute
        chrono_tmp_list, chrono_id, chrono_time_flags = SecondOfMinute.buildSecondOfMinute(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)

        #Parse modifier text
        chrono_tmp_list, chrono_id = Modifier.buildModifierText(
            s, chrono_id, chrono_tmp_list)

        #call non-standard formatting temporal phrases
        chrono_tmp_list, chrono_id, chrono_time_flags = NumericDate.buildNumericDate(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)
        chrono_tmp_list, chrono_id, chrono_time_flags = TwentyFourHourTime.build24HourTime(
            s, chrono_id, chrono_tmp_list, chrono_time_flags)

        chrono_tmp_list, chrono_id = DayOfWeek.buildDayOfWeek(
            s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id, chrono_time_flags = TextMonthAndDay.buildTextMonthAndDay(
            s, chrono_id, chrono_tmp_list, chrono_time_flags, dct, ref_list)
        #chrono_tmp_list, chrono_id = AMPM.buildAMPM(s, chrono_id, chrono_tmp_list, chrono_time_flags)
        chrono_tmp_list, chrono_id = PartOfDay.buildPartOfDay(
            s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = PartOfWeek.buildPartOfWeek(
            s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = Season.buildSeasonOfYear(
            s, chrono_id, chrono_tmp_list, ref_list)
        chrono_tmp_list, chrono_id = PeriodInterval.buildPeriodInterval(
            s, chrono_id, chrono_tmp_list, ref_list, PIclassifier, PIfeatures)
        chrono_tmp_list, chrono_id = TextYear.buildTextYear(
            s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = This.buildThis(s, chrono_id,
                                                    chrono_tmp_list)
        chrono_tmp_list, chrono_id = BeforeAfter.buildBeforeAfter(
            s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = NthFromStart.buildNthFromStart(
            s, chrono_id, chrono_tmp_list, ref_list)
        chrono_tmp_list, chrono_id = TimeZone.buildTimeZone(
            s, chrono_id, chrono_tmp_list)
        chrono_tmp_list, chrono_id = Last.buildLast(s, chrono_id,
                                                    chrono_tmp_list)
        chrono_tmp_list, chrono_id = Frequency.buildFrequency(
            s, chrono_id, chrono_tmp_list)

        print("XXXXXXXXX")

        # if len(chrono_tmp_list) > 0:
        #    print(s)
        #   timex_list.append(s)
        #  for e in chrono_tmp_list:
        #     print(e)

        tmplist, chrono_id = buildSubIntervals(chrono_tmp_list, chrono_id, dct,
                                               ref_list)
        ## tmplist is a list of ChronoEntities for a single phrase, but can be returned empty
        ## Need to add ISO conversion here!

        if len(tmplist) > 0:
            print("Converting phrase to ISO: " + str(s))
            s.getISO(tmplist)
            print("ISO Value: " + str(s))
            print("TIMEX3 String: " + s.i2b2format())
            timex_list.append(s)

        chrono_list = chrono_list + tmplist  ##chrono_list is a list of ChronoEntities, and phrase information is lost
        #print(chrono_list)

        #Going to incorporate in future builds
        #chrono_list, chrono_id = buildDuration(s, chrono_id, chrono_list)
        #chrono_list, chrono_id = buildSet(s, chrono_id, chrono_list)

    #print("TIMEX LIST: " + str(timex_list))

    return chrono_list, chrono_id, timex_list