Exemplo n.º 1
0
def process_files(files, opts):
    """ Process each file
        @param files the file(s) to process
    """
    annotationdata.aio = sppas_tools.getAnnotationdataAio()
    # import annotationdata.aio or annotationdata.io
    from annotationdata import Transcription, Tier  #, TimePoint, TimeInterval, Label, Annotation
    for f in files:
        print("[%s] Loading annotation file..." % f)
        # Read an annotated file
        trs = annotationdata.aio.read(f)
        print("[%s] Number of tiers:%d" % (f, trs.GetSize()))

        # Prepare the output Transcription
        destTrs = trs
        destAppendProcessed = False
        # default/'all' => work directly on trs
        if (opts.keep_tiers.startswith('process')):
            destTrs = Transcription(trs.GetName(), trs.GetMinTime(),
                                    trs.GetMaxTime())
            # empty copy of trs
            destAppendProcessed = True
            # append processed tiers
        elif ((opts.keep_tiers == 'any') or (opts.keep_tiers == 'bilou')):
            destTrs = Transcription(trs.GetName(), trs.GetMinTime(),
                                    trs.GetMaxTime())
            # empty copy of trs

        # Look for the tier to process
        for tier_name in opts.tiers_names:
            tier = sppas_tools.tierFind(trs, tier_name)
            if tier is None:
                print("[%s] Any tier with name similar to '%s' ;-(" %
                      (f, opts.tier_name))
                print("[%s] Tiers are : %s" % (f, ''.join([
                    "{}[{}] '{}'".format("\n   " if (i % 4) == 0 else ", ", i,
                                         t.GetName())
                    for i, t in enumerate(trs)
                ])))
                break
            print("[%s] Searched tier '%s' has %d annotations" %
                  (f, tier.GetName(), tier.GetSize()))
            if (destAppendProcessed):
                destTrs.Append(tier)
            # Create the BILOU tier
            bilouName = opts.out_tier_format.format(tier_name, opts.base_time)
            bilouTier = Tier(bilouName)
            splitIntervals(bilouTier, opts.base_time, trs.GetMaxTime(),
                           trs.GetMinTime(), opts.radius)
            bilouTags(tier, bilouTier, opts.labels, opts.bilu_format,
                      opts.o_label)
            print("[%s] BILOU tier '%s' has %d annotations" %
                  (f, bilouTier.GetName(), bilouTier.GetSize()))
            destTrs.Append(bilouTier)
        # Saving file
        (root, ext) = os.path.splitext(f)
        of = opts.out_file_format.format(root, "+".join(
            opts.tiers_names)) + ext
        print("[%s] Saving annotations into %s" % (f, of))
        annotationdata.aio.write(of, destTrs)
Exemplo n.º 2
0
def getTier(trs, tierName=None, errorMsg=None):
    """
    Get the corresponding tier
    @param trs: an annotation file or a tier
    @param tierName: the tier name
    @param errorMsg:    (optional) message (format) for the ValueError raised if no tier is found
        if falsy, any ValueError is raised (=> return is None)
    @return:    the tier found or None
    """
    from annotationdata import Tier
    if isinstance(trs, Tier):
        #TODO? check tierName
        return trs
    tier = sppas_tools.tierFind(trs, tierName)
    if tier is None and errorMsg:
        raise ValueError(errorMsg.format(**locals()))
    return tier
Exemplo n.º 3
0
def process_files(files, opts):
    """ Process each file
        @param files the file(s) to process
    """
    annotationdata.aio = sppas_tools.getAnnotationdataAio(); # import annotationdata.aio or annotationdata.io
    from annotationdata import Transcription
    for f in files:
        print("[%s] Loading annotation file..." % f)
        # Read an annotated file, put content in a Transcription object.
        trs = annotationdata.aio.read(f)
        print("[%s] Number of tiers:%d" % (f, trs.GetSize()))
        tier = sppas_tools.tierFind(trs, opts.tier_name)
        if tier is None:
            print("[%s] Any tier with name similar to '%s' ;-(" %  (f, opts.tier_name))
            print("[%s] Tiers are : %s" % (f, 
                ''.join([ "{}[{}] '{}'".format("\n   " if (i % 4)==0 else ", ", i, t.GetName()) for i, t in enumerate(trs)])
                ))
            break;
        print("[%s] Searched tier '%s' has %d annotations" % (f, tier.GetName(), tier.GetSize()))
        csv = Transcription()
        csv.Append(tier)
        of = re.sub(r"\.\w+$", "-"+opts.tier_name+".csv", f)
        print("[%s] Saving tier into %s" % (f, of))
        annotationdata.aio.write(of, csv)
Exemplo n.º 4
0
def process_feedback_per_phases(trs,
                                fb_tierName='P-Feedback',
                                phases_tierName='Script',
                                most_common=False):
    """
    Process relation between feedbacks and 'phases' (Script, eye's directions, ...)
    """
    from annotationdata import Filter, SingleFilter, Sel, RelationFilter, Rel
    from annotationdata.filter.delay_relations import IntervalsDelay, OrPredicates

    #TODO? parameters
    fb_phases_min_overlap = 1.  # minimum overlap time for overlaps/overlappedby relations

    res = namedtuple(
        'res',
        "phases_tier, phases, phases_counter, phases_radius, perphases" +
        ", fb_tier, fb_durations, fb_duration_stats, fb_radius"
    )  # list of fields

    # looking for phases labels
    not_found = 0
    res.phases_tier = sppas_tools.tierFind(trs, phases_tierName)
    # (a) Phases
    if res.phases_tier is None:
        print("\t[{phases_tierName}] No phases tier found ;-(".format(
            **locals()))
        not_found += 1
    # (b) 'P-Feedback'
    res.fb_tier = sppas_tools.tierFind(trs, fb_tierName)
    if res.fb_tier is None:
        print("[{fb_tierName}] No feedbacks tier found ;-(".format(**locals()))
        not_found += 1
    if not_found:
        print("[%s] %d unfound tier(s) => skip this file")
        return

    # Look for the various phases
    res.phases = []
    res.perphases = dict()
    res.phases_counter = Counter()
    for ph_ann in res.phases_tier:
        phase = ph_ann.GetLabel().GetValue()
        res.phases_counter[phase] += 1
        if res.phases_counter[phase] == 1:  #init phases
            res.phases.append(phase)
            res.perphases[phase] = namedtuple(
                'perph', "phase, count, tier, durations, sumduration")
            res.perphases[phase].phase = phase
    # sum of annotations/durations of phases_tier
    ptSize = len(res.phases_tier)
    ptSumDurations = sum(durations(res.phases_tier))
    print(
        "\t[{fb_tierName}/{phases_tierName}] {nbphases} phases (#annots:{ptSize}, sum_durations={ptSumDurations:.3f}), {fbsize} feedbacks"
        .format(nbphases=len(res.phases), fbsize=len(res.fb_tier), **locals()))
    if not len(res.phases):
        return res
        # any phases

    # sort phases by occurences
    if most_common:
        res.phases = [
            phase for (phase, count) in res.phases_counter.most_common()
        ]

    # phases_tier filter
    ptFilter = Filter(res.phases_tier)
    # split feedback tier by phases
    fbFilter = Filter(res.fb_tier)
    phRel = OrPredicates(
        Rel('during')  # fb during the phase
        ,
        Rel('starts')  # fb starts with the phase (and is shorter)
        ,
        Rel('finishes')  # fb ends with the phase (and is shorter)
        ,
        Rel(
            overlappedby=fb_phases_min_overlap
        )  # fb overlaped by the phase (i.e. start during the phase but end after)
        #?, Rel('startedby') #? fb starts with the phase and is longer
    )
    #for phase, perph in res.perphases.items():
    for phase in res.phases:
        perph = res.perphases[phase]
        perph.count = res.phases_counter[phase]
        phaseFilter = SingleFilter(Sel(exact=phase), ptFilter)
        perph.tier = phaseFilter.Filter()
        perph.durations = durations(perph.tier)
        perph.sum_durations = sum(perph.durations)
        print(
            "\t  Phase:'{phase}' => #annot={perph.count} ({pannot:.0%}), sum_durations={perph.sum_durations} ({psdur:.0%}) (mean={s.Mean:.3f}, min={s.Min:.3f}, max={s.Max:.3f})"
            .format(s=stats(perph.durations),
                    pannot=float(perph.count) / ptSize,
                    psdur=perph.sum_durations / ptSumDurations,
                    **locals()))
        rf = RelationFilter(phRel, fbFilter, phaseFilter)
        perph.fb_tier = rf.Filter()
        perph.fb_count = len(perph.fb_tier)
        perph.fb_durations = durations(perph.fb_tier)
        perph.fb_sum_durations = sum(perph.fb_durations)
        perph.fb_per_sec = perph.fb_count / perph.sum_durations if perph.fb_count else 0
        perph.sec_per_fb = perph.sum_durations / perph.fb_count if perph.fb_count else 0
        print(
            "\t   #feedback={perph.fb_count}, freq={perph.fb_per_sec:.3f}/s (every {perph.sec_per_fb:.3f}s), sum_durations={perph.fb_sum_durations:.3f} ({sdurpercent:.0%})"
            .format(sdurpercent=perph.fb_sum_durations / perph.sum_durations,
                    **locals()))
        if perph.fb_count:
            print(
                "\t    durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f}, {s.Max:.3f}]"
                .format(s=stats(perph.fb_durations), **locals()))
            statsPerLabel(perph.fb_tier, "\t\t", normLabelWithSep
                          #TODO(pb repeated phases)# , intraDelays
                          )
    return res
Exemplo n.º 5
0
def process_feedback_eyes(trs, fb_tierName='Feedback', eyes_tierName='Regard'):
    """
    Process relation between feedbacks and eyes direction
    """
    from annotationdata import Filter, RelationFilter, Rel
    from annotationdata.filter.delay_relations import IntervalsDelay, OrPredicates

    #TODO? parameters
    fb_eyes_min_overlap = 1.  # minimum overlap time for overlaps/overlappedby relations

    res = namedtuple('res', "eyes_tier, eyes_radius" +
                     ", fb_tier, fb_durations, fb_duration_stats, fb_radius"
                     )  # list of fields
    # Search  Vocabulaire and P-Feedback tiers
    not_found = 0

    # (a) 'Vocabulaire'
    res.eyes_tier = sppas_tools.tierFind(trs, eyes_tierName)
    if res.eyes_tier is None:
        print("\t[{eyes_tierName}] No eyes direction tier found ;-(".format(
            **locals()))
        not_found += 1
    # (b) 'P-Feedback'
    res.fb_tier = sppas_tools.tierFind(trs, fb_tierName)
    if res.fb_tier is None:
        print("[{fb_tierName}] No feedbacks tier found ;-(".format(**locals()))
        not_found += 1
    if not_found:
        print("[%s] %d unfound tier(s) => skip this file")
        return

    # Combine the 2 tiers
    # - create the predicates
    # [1] 'convergent' a combination of Allen relations for 2 intervals that (partly) overlaps
    pStable = IntervalsDelay.create(
        'start_start_after_min', 0, 'end_end_min', 0, name='stable'
    )  # X during Y <=> X starts after Y and X ends before Y <=> (Xs>Ys) and (Xe<Ye)
    pBefore = IntervalsDelay.create(
        'start_start_after_min',
        0,
        'start_end_min',
        0,
        'end_end_after_min',
        0,
        name='before'
    )  # Y ends during X (Y before X) <=> (Xs>Ys) and (Xs<Ye) and (Xe>Ye)
    pInside = IntervalsDelay.create(
        'start_start_min', 0, 'end_end_after_min', 0, name='inside'
    )  # Y inside X <=> X starts before Y and X ends after Y <=> (Xs<Ys) and (Xe>Ye)
    pAfter = IntervalsDelay.create(
        'start_start_min',
        0,
        'end_start_after_min',
        0,
        'end_end_min',
        0,
        name='after'
    )  # Y starts during X (Y after X) <=> (Xs<Ys) and (Xe>Ys) and (Xe<Ye)
    #print("pStable={pStable:r}, str()={pStable:s}; pBefore={pBefore:r}, str()={pBefore:s}; pAfter={pAfter:r}, str()={pAfter:s}; pInside={pInside:r}, str()={pInside:s};".format(**locals()))
    #-- pConv = Rel('convergent')
    #-- pConv = OrPredicates( Rel('during') # X during Y => no eyes direction change during the feedback
    #        # Y starts before  Y
    #        , Rel(overlappedby=fb_eyes_min_overlap) # X overlappedby Y (Y starts before X) => eyes direction changes during the feedback
    #        , Rel('finishes')  # X finishes Y (Y starts before X) => eyes direction changes just after the feedback
    #        , IntervalsDelay.create(start_end=(-fb_eyes_min_overlap, 0)) # add -max < start_end < 0
    #        # Y inside X
    #        , Rel('startedby')  # X startedby Y (same start, X longer) => eyes direction changes when the feedback start, and change anoter time ~ contains
    #        , Rel('contains')   # X contains Y => at least 2 changes during the feedback
    #        , Rel('finishedby') # X finishedby Y (same end, X longer) => eyes direction changes during AND just after the feedback
    #        # Y ends after X
    #        , Rel('starts') # X starts Y (same start, Y longer) => eyes direction changes when the feedback start (an is maintain)
    #        , Rel(overlaps=fb_eyes_min_overlap) # X overlaps Y (Y start during X) => eyes direction changes during the feedback
    #        , IntervalsDelay.create(end_start=(0, fb_eyes_min_overlap)) # add 0 < end_start < max
    #--      )
    # nota: OrPredicates orders
    #    if X=Y, all predicates are true
    #    if X finishes Y, pBefore and pStable are true
    #    if X started by Y, pAfter and pStable are true
    pConv = OrPredicates(pInside, pBefore, pAfter, pStable)

    def sbia(rel):
        """
        Classify relation in 'stable', 'before'/'inside'/'after'
        """
        if str(rel) in ['stable', 'before', 'inside', 'after']:
            return str(rel)
        elif str(rel) == 'during':
            return 'stable'  # X during Y => stable Y before/inside/after
        elif str(rel) in ['overlappedby', 'finishes']:
            return 'before'  # X rel Y => Y starts before X
        elif str(rel) in ['startedby', 'contains', 'finishedby']:
            return 'inside'  # X rel Y => Y inside X
        elif str(rel) in ['starts', 'overlaps']:
            return 'after'  # X rel Y => Y ends after X
        elif str(rel).startswith('start_end'):
            return 'before'  # -max < X start_end Y < 0 => Y ends just before X
        elif str(rel).startswith('end_start'):
            return 'after'  # 0 < X end_start Y < max => Y starts just after X
        return
        #ERROR

    fEyes = Filter(res.eyes_tier)
    fFb = Filter(res.fb_tier)
    rf = RelationFilter(pConv, fFb, fEyes)
    rConv = [(x, rel, y) for (x, rel, y) in rf]
    print(
        "\t[{fb_tierName}|{eyes_tierName}] {tier_len} feedbacks-eyes links (for {fb_len} feedbacks and {eyes_len} eyes)"
        .format(tier_len=len(rConv),
                fb_len=len(res.fb_tier),
                eyes_len=len(res.eyes_tier),
                **locals()))
    if len(rConv) == 0:
        return  # any feedback linked to eyes direction
    # group relations by name
    if False:
        rels = {}
        for (x, rel, y) in rf:
            srel = str(rel)
            if srel not in rels: rels[srel] = []  # init
            rels[srel].append((x, rel, y))
        for srel, lst in rels.items():
            print("\t  '{srel}' {lst_len} feedbacks-eyes links".format(
                lst_len=len(lst), **locals()))
    # group relation by 1st interval
    xrels = {}
    for (x, rel, y) in rf:
        if x not in xrels: xrels[x] = []  # init
        xrels[x].append((x, rel, y))
    # organize the relations associated to x
    xgroups = {}
    groups = {}
    # groups
    xtransitions = {}  # eye's direction transitions
    for (x, lst) in xrels.items():
        # sort lst
        lst.sort(cmp=lambda a, b: cmp(a[2], b[2])
                 )  # sort based on y order (a/b = (x, rel, y))
        xtransitions[x] = []
        xgroups[x] = {}
        # 3 cases:
        # (a) any eyes direction change <=> X during Y  ('stable' group)
        # (b) only one change 'during' X => one Y 'before' (overlappedby; finishes; ~start_end<0) and one Y 'after' (overlaps; ~end_start>0; starts)
        # (b) only various changes 'during' X => one Y 'before' (overlappedby; ~start_end<0), one Y 'after' (overlaps; ~end_start>0), and others Ys 'inside' (contains, startedby, finishedby)
        lastY = None
        for (xi, rel, y) in lst:
            # - transition
            if lastY is not None:
                xtransitions[x].append(' -> '.join([
                    str(lastY.GetLabel().GetValue()),
                    str(y.GetLabel().GetValue())
                ]))
            lastY = y
            # - group
            ygr = sbia(rel)
            if ygr not in xgroups[x]: xgroups[x][ygr] = []
            xgroups[x][ygr].append((x, rel, y))
        # In which case we are ?
        if 'stable' in xgroups[x]:  # case (a)
            xgr = 'any'
        elif 'inside' in xgroups[x]:  # case (c)
            xgr = 'various'
        else:  # case (b)
            xgr = 'one'
        if xgr not in groups: groups[xgr] = []
        groups[xgr].append(x)
    allTransitionsCnt = Counter()
    allGrRels = {'stable': [], 'before': [], 'inside': [], 'after': []}
    for y1 in ['before', 'inside', 'after']:
        allGrRels["(%s + stable)" % y1] = []
    for nb in ['any', 'one', 'various']:
        if nb not in groups: continue
        xAnnots = groups[nb]
        lst_len = len(xAnnots)
        percent = float(lst_len) / len(res.fb_tier)
        # groups
        ygroups = []
        if nb == 'any':  # 0 changement, only 1 interval <=> X during Y
            print(
                "\t  No change : {lst_len} feedbacks-eyes links ({percent:.1%})"
                .format(**locals()))
            ygroups = ['stable']
        elif nb == 'one':  # 1 changement, 2 intervals => before/after (i.e. X overlapped by Y[0] and X overlaps Y[1])
            print(
                "\t  One change : {lst_len} feedbacks-eyes links ({percent:.1%})"
                .format(**locals()))
            ygroups = ['before', 'after']
        else:  # nb changements, nb+1 intervals => before/contains+/after  (i.e. X overlapped by Y[0] and X contains Y[1:-1] and X overlaps Y[-1])
            print(
                "\t  Various changes : {lst_len} feedbacks-eyes links ({percent:.1%})"
                .format(**locals()))
            ygroups = ['before', 'inside', 'after']
        grRels = {}
        yLabels = {}
        yLabelsCnt = {}
        for ygr in ygroups:
            grRels[ygr] = []
            for x in xAnnots:
                if ygr in xgroups[x]:
                    grRels[ygr] += xgroups[x][ygr]
                #TODO   else: print warning
            allGrRels[ygr] += grRels[ygr]
            if ygr == 'stable':
                for y1 in ['before', 'inside', 'after']:
                    allGrRels["(%s + stable)" % y1] += grRels[ygr]
            else:
                allGrRels["(%s + stable)" % ygr] += grRels[ygr]
            yLabels[ygr] = [
                str(y.GetLabel().GetValue()) for (x, rel, y) in grRels[ygr]
            ]
            yLabelsCnt[ygr] = Counter(yLabels[ygr])
            print("\t   {ygr} eyes-direction: {cnt}".format(
                cnt=counterWithPercent(yLabelsCnt[ygr]), **locals()))
        # transitions
        if nb != 'any':
            transitionsCnt = Counter()
            for x in xAnnots:
                if x in xtransitions:
                    transitionsCnt.update(xtransitions[x])
                    #for t in xtransitions[x]: transitionsCnt[t] += 1
            print("\t   transitions: {cnt}".format(cnt=counterWithPercent(
                transitionsCnt, sep="\n\t                "),
                                                   **locals()))
            allTransitionsCnt.update(transitionsCnt)
    # ALL
    print("\t  All : {x_len} feedbacks-eyes".format(x_len=len(xrels.keys()),
                                                    **locals()))
    for ygr in [
            'stable', 'before', '(before + stable)', 'inside',
            '(inside + stable)', 'after', '(after + stable)'
    ]:
        yLabels[ygr] = [
            str(y.GetLabel().GetValue()) for (x, rel, y) in allGrRels[ygr]
        ]
        yLabelsCnt[ygr] = Counter(yLabels[ygr])
        print("\t   {ygr} eyes-direction: {cnt}".format(cnt=counterWithPercent(
            yLabelsCnt[ygr]),
                                                        **locals()))
    print("\t   transitions: {cnt}".format(cnt=counterWithPercent(
        transitionsCnt, sep="\n\t                "),
                                           **locals()))
Exemplo n.º 6
0
def process_feedback_after(trs,
                           pFb_tierName='P-Feedback',
                           mVoc_tierName='Vocabulaire',
                           after_Max=1.,
                           perLabel=False,
                           after_tierAppend=False,
                           after_tierName=None):
    """
    Process relation between (patient) feedbacks during/after another tier (b.e. Vocabulaire, ...)
    """
    from annotationdata import Filter, RelationFilter  #, Rel
    from annotationdata.filter.delay_relations import IntervalsDelay, AndPredicates

    #TODO? parameters
    if isinstance(after_tierName, basestring):
        after_tierName = after_tierName.format(**locals())
    else:
        after_tierName = "{pFb_tierName} after {mVoc_tierName}".format(
            **locals())
        # default
    #after_Max = 1.   # max time between feedback and previous vocabulary

    res = namedtuple(
        'res', "mVoc_tier, mVoc_durations, mVoc_duration_stats, mVoc_radius" +
        ", pFb_tier, pFb_durations, pFb_duration_stats, pFb_radius"
    )  # list of fields
    # Search  Vocabulaire and P-Feedback tiers
    not_found = 0

    # (a) 'Vocabulaire'
    res.mVoc_tier = sppas_tools.tierFind(trs, mVoc_tierName)
    if res.mVoc_tier is None:
        print("\t[{mVoc_tierName}] No tier found ;-(".format(**locals()))
        not_found += 1
    # (b) 'P-Feedback'
    res.pFb_tier = sppas_tools.tierFind(trs, pFb_tierName)
    if res.pFb_tier is None:
        print("[{pFb_tierName}] No tier found ;-(".format(**locals()))
        not_found += 1
    if not_found:
        print("[%s] %d unfound tier(s) => skip this file")
        return

    # Combine the 2 tiers
    # - create the predicates
    # [1] pDuringAfter <=> P-feedback(X) start during or a few time(1s) after Vocabulaire (Y)
    #pDuringAfter = IntervalsDelay.create('start_start', (None, 0) # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0
    #        , 'start_end', (-after_Max, None) # AND X=Pfb starts at least 1s after Y=MVoc ends <=> -inf < Xs-Ye <= 1s <=> -inf > Ye-Xs => 1s
    #        )
    # (a) pStartStart : X (P-feedback) starts after Y (Vocabulaire) starts <=> delay(Xstart - Ystart) >= 0
    pStartStart = IntervalsDelay.create(
        'after', 'start_start_min',
        0)  # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0
    # (b) pStartEnd : X (P-feedback) starts at the latest 1s after Y (Vocabulaire) ends <=> -infinity < delay(Xstart - Yends) <= 1s
    #   nota: -infinity as X can start during Y, the pStartStart allow to eliminate the case of X start before Y
    pStartEnd = IntervalsDelay.create('after', 'start_end', (None, after_Max))
    # =>
    pDuringAfter = AndPredicates(pStartStart, pStartEnd)

    fMVoc = Filter(res.mVoc_tier)
    fPFb = Filter(res.pFb_tier)
    rf = RelationFilter(pDuringAfter, fPFb, fMVoc)
    newtier = rf.Filter(annotformat="{x} [after({y})]")
    res.pFb_mVoc_tier = newtier
    if after_tierAppend:
        newtier.SetName(after_tierName)
        trs.Append(newtier)  # ?
    print(
        "\t[{after_tierName}] {tier_len} (of {pFb_len}) {pFb_tierName} during/after({after_Max}s) a {mVoc_tierName}"
        .format(tier_len=len(res.pFb_mVoc_tier),
                pFb_len=len(res.pFb_tier),
                **locals()))
    #-- # (1) Annotation, duration
    #-- res.pFb_mVoc_durations = durations(res.pFb_mVoc_tier)
    #-- res.pFb_mVoc_duration_stats = stats(res.pFb_mVoc_durations)
    #-- print("\t  durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f}, {s.Max:.3f}]".format(s=res.pFb_mVoc_duration_stats, **locals()))
    # Analyse rf results
    if True:
        groups = {'after': [], 'during': [], 'all': []}
        # group result between after/during
        for x, rel, y in rf:
            groups['all'].append((x, rel, y))
            if rel[1].delay > 0:  # rel[1] correspond to pStartEnd => give use the Xstart-Yend delay
                groups['after'].append((x, rel, y))
                # feedback start strictly after the vocabulaire
            else:
                groups['during'].append((x, rel, y))
                # feedback start during the vocabulaire
        # 'all' annotations
        for gkey in ['all', 'during', 'after']:
            group = groups[gkey]
            if not len(group):
                continue
            ssDelays = [rel[0].delay
                        for (x, rel, y) in group]  # rel[0] is pStartStart
            ssStats = stats(ssDelays)
            seDelays = [rel[1].delay
                        for (x, rel, y) in group]  # rel[1] is pStartEnd
            seStats = stats(seDelays)
            xDurStats = stats(durations([x for (x, rel, y) in group
                                         ]))  # p-Feedback durations
            yDurStats = stats(durations([y for (x, rel, y) in group
                                         ]))  # Vocabulary durations
            linked_to = "'linked' to" if gkey == 'all' else gkey
            print(
                "\t  {gkey}: {gsize} {pFb_tierName} {linked_to} a {mVoc_tierName}"
                .format(gsize=len(group), **locals()))
            print(
                "\t    Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]"
                .format(s=ssStats))
            if gkey != 'during':
                print(
                    "\t    End-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]"
                    .format(s=seStats))
            print(
                "\t    {mVoc_tierName} durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]"
                .format(s=yDurStats, **locals()))
            print(
                "\t    {pFb_tierName} durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]"
                .format(s=xDurStats, **locals()))
            if perLabel:
                statsPerLabel([x for (x, rel, y) in group], "\t\t",
                              normLabelWithSep)
Exemplo n.º 7
0
def process_pFb_mVoc(trs,
                     pFb_tierName='P-Feedback',
                     mVoc_tierName='Vocabulaire',
                     perLabel=False):
    """
    Process relation between patient feedbacks and medical vocabulary
    """
    from annotationdata import Filter, RelationFilter  #, Rel
    from annotationdata.filter.delay_relations import IntervalsDelay, AndPredicates

    #TODO? parameters
    pFb_mVoc_tierName = 'P-fb-after-M-Voc'
    pFbStart_mVocEnd_Max = 1.  # max time between feedback and previous vocabulary

    res = namedtuple(
        'res', "mVoc_tier, mVoc_durations, mVoc_duration_stats, mVoc_radius" +
        ", pFb_tier, pFb_durations, pFb_duration_stats, pFb_radius"
    )  # list of fields
    # Search  Vocabulaire and P-Feedback tiers
    not_found = 0

    # (a) 'Vocabulaire'
    res.mVoc_tier = sppas_tools.tierFind(trs, mVoc_tierName)
    if res.mVoc_tier is None:
        print(
            "\t[{mVoc_tierName}] No medecin's medical vocabulary tier found ;-("
            .format(**locals()))
        not_found += 1
    # (b) 'P-Feedback'
    res.pFb_tier = sppas_tools.tierFind(trs, pFb_tierName)
    if res.pFb_tier is None:
        print("[{pFb_tierName}] No patient's feedbacks tier found ;-(".format(
            **locals()))
        not_found += 1
    if not_found:
        print("[%s] %d unfound tier(s) => skip this file")
        return

    # Combine the 2 tiers
    # - create the predicates
    # [1] pDuringAfter <=> P-feedback(X) start during or a few time(1s) after Vocabulaire (Y)
    #pDuringAfter = IntervalsDelay.create('start_start', (None, 0) # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0
    #        , 'start_end', (-pFbStart_mVocEnd_Max, None) # AND X=Pfb starts at least 1s after Y=MVoc ends <=> -inf < Xs-Ye <= 1s <=> -inf > Ye-Xs => 1s
    #        )
    # (a) pStartStart : X (P-feedback) starts after Y (Vocabulaire) starts <=> delay(Xstart - Ystart) >= 0
    pStartStart = IntervalsDelay.create(
        'after', 'start_start_min',
        0)  # X=Pfb starts after Y=MVoc starts <=> Xs >= Ys <=> Ys-Xs <= 0
    # (b) pStartEnd : X (P-feedback) starts at the latest 1s after Y (Vocabulaire) ends <=> -infinity < delay(Xstart - Yends) <= 1s
    #   nota: -infinity as X can start during Y, the pStartStart allow to eliminate the case of X start before Y
    pStartEnd = IntervalsDelay.create('after', 'start_end',
                                      (None, pFbStart_mVocEnd_Max))
    # =>
    pDuringAfter = AndPredicates(pStartStart, pStartEnd)

    fMVoc = Filter(res.mVoc_tier)
    fPFb = Filter(res.pFb_tier)
    rf = RelationFilter(pDuringAfter, fPFb, fMVoc)
    newtier = rf.Filter(annotformat="{x} [after({y})]")
    newtier.SetName(pFb_mVoc_tierName)
    res.pFb_mVoc_tier = newtier
    print(
        "\t[{pFb_mVoc_tierName}] {tier_len} (of {pFb_len}) patient feedbacks during/after a medic vocabulary"
        .format(tier_len=len(res.pFb_mVoc_tier),
                pFb_len=len(res.pFb_tier),
                **locals()))
    trs.Append(newtier)  # ?
    #-- # (1) Annotation, duration
    #-- res.pFb_mVoc_durations = durations(res.pFb_mVoc_tier)
    #-- res.pFb_mVoc_duration_stats = stats(res.pFb_mVoc_durations)
    #-- print("\t  durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f}, {s.Max:.3f}]".format(s=res.pFb_mVoc_duration_stats, **locals()))
    # Analyse rf results
    if True:
        groups = {'after': [], 'during': [], 'all': []}
        # group result between after/during
        for x, rel, y in rf:
            groups['all'].append((x, rel, y))
            if rel[1].delay > 0:  # rel[1] correspond to pStartEnd => give use the Xstart-Yend delay
                groups['after'].append((x, rel, y))
                # feedback start strictly after the vocabulaire
            else:
                groups['during'].append((x, rel, y))
                # feedback start during the vocabulaire
        # 'all' annotations
        if groups['all']:
            group = groups['all']
            ssDelays = [rel[0].delay
                        for (x, rel, y) in group]  # rel[0] is pStartStart
            ssStats = stats(ssDelays)
            xDurStats = stats(durations([x for (x, rel, y) in group
                                         ]))  # p-Feedback durations
            yDurStats = stats(durations([y for (x, rel, y) in group
                                         ]))  # Vocabulary durations
            print(
                "\t  all: {} feedbacks 'linked' to vocabulaire".format(
                    len(group)) +
                "\n\t    Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]"
                .format(s=ssStats) +
                "\n\t    Vocabulary durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]"
                .format(s=yDurStats) +
                "\n\t    Feedback durations: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f},{s.Max:.3f}]"
                .format(s=xDurStats))
            if perLabel:
                statsPerLabel([x for (x, rel, y) in group], "\t\t",
                              normLabelWithSep)

        if groups['during']:
            group = groups['during']
            ssDelays = [rel[0].delay
                        for (x, rel, y) in group]  # rel[0] is pStartStart
            ssStats = stats(ssDelays)
            ssPercents = [
                float((rel[0].delay) / y.GetLocation().GetDuration())
                for (x, rel, y) in group
            ]  # at each percent of the Vocabulaire starts the Feedback
            ssPStats = stats(ssPercents)
            print(
                "\t  during: {} feedbacks starts during the vocabulaire".
                format(len(group)) +
                "\n\t    Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]"
                .format(s=ssStats) +
                "\n\t    Percent of Vocabulary when P-Feedback starts: mean={s.Mean:.0%}, std.dev.={s.StdDev:.3f} [{s.Min:.0%},{s.Max:.0%}]"
                .format(s=ssPStats))
            if perLabel:
                statsPerLabel([x for (x, rel, y) in group], "\t\t",
                              normLabelWithSep)
        if groups['after']:
            group = groups['after']
            ssDelays = [rel[0].delay
                        for (x, rel, y) in group]  # rel[0] is pStartStart
            ssStats = stats(ssDelays)
            seDelays = [rel[1].delay
                        for (x, rel, y) in group]  # rel[1] is pStartEnd
            seStats = stats(seDelays)
            print(
                "\t  after: {} feedbacks starts (at most {:.3f}s) after the vocabulaire"
                .format(len(group), pFbStart_mVocEnd_Max) +
                "\n\t    Start-Start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]"
                .format(s=ssStats) +
                "\n\t    Vocabulaire end - P-Feedback start delays: mean={s.Mean:.3f}, std.dev.={s.StdDev:.3f} [{s.Min:.3f~},{s.Max:.3f~}]"
                .format(s=seStats))
            if perLabel:
                statsPerLabel([x for (x, rel, y) in group], "\t\t",
                              normLabelWithSep)
Exemplo n.º 8
0
def process_files(files, opts):
    """ Process each file
        @param files the file(s) to process
    """
    annotationdata.aio = sppas_tools.getAnnotationdataAio()
    # import annotationdata.aio or annotationdata.io
    from annotationdata import Transcription, Tier  #, TimePoint, TimeInterval, Label, Annotation
    for f in files:
        print("[%s] Loading annotation file..." % f)
        # Read an annotated file
        trs = annotationdata.aio.read(f)
        print("[%s] Number of tiers:%d" % (f, trs.GetSize()))

        # Prepare the output Transcription
        destTrs = trs
        destAppendProcessed = False
        # default/'all' => work directly on trs
        if (opts.keep_tiers.startswith('process')):
            destTrs = Transcription(trs.GetName(), trs.GetMinTime(),
                                    trs.GetMaxTime())
            # empty copy of trs
            destAppendProcessed = True
            # append processed tiers
        elif ((opts.keep_tiers == 'any')
              or opts.keep_tiers.startwith('bound')):
            destTrs = Transcription(trs.GetName(), trs.GetMinTime(),
                                    trs.GetMaxTime())
            # empty copy of trs

        # Look for the tier to process
        equalsRefBoundTier = None
        for tier_name in opts.tiers_names:
            tier = sppas_tools.tierFind(trs, tier_name)
            if tier is None:
                print("[%s] Any tier with name similar to '%s' ;-(" %
                      (f, opts.tier_name))
                print("[%s] Tiers are : %s" % (f, ''.join([
                    "{}[{}] '{}'".format("\n   " if (i % 4) == 0 else ", ", i,
                                         t.GetName())
                    for i, t in enumerate(trs)
                ])))
                break
            print("[%s] Searched tier '%s' has %d annotations" %
                  (f, tier.GetName(), tier.GetSize()))
            if (destAppendProcessed):
                destTrs.Append(tier)
            # Create the Boundaries tier
            boundName = opts.out_tier_format.format(tier_name)
            boundTier = Tier(boundName)
            bounds = boundaries(tier, opts.radius, opts.bound_type,
                                trs.GetMinTime(), trs.GetMaxTime(),
                                opts.begin_format, opts.end_format)
            for bound in bounds:
                boundTier.Append(bound)
            print("[%s] Boundaries tier '%s' has %d annotations" %
                  (f, boundTier.GetName(), boundTier.GetSize()))
            destTrs.Append(boundTier)
            # Create the 'equals' tier
            if (opts.equals_tier):
                if (tier_name == opts.tiers_names[0]):  # first => reference
                    equalsRefBoundTier = boundTier
                else:
                    equalsTier = filterEquals(boundTier, equalsRefBoundTier,
                                              opts.bound_type,
                                              opts.equals_label_format)
                    equalsName = opts.equals_tier_format.format(
                        tier_name, opts.tiers_names[0])
                    equalsTier.SetName(equalsName)
                    print("[%s] Equals tier '%s' has %d annotations" %
                          (f, equalsTier.GetName(), equalsTier.GetSize()))
                    destTrs.Append(equalsTier)
        # Saving file
        (root, ext) = os.path.splitext(f)
        of = opts.out_file_format.format(root, "+".join(
            opts.tiers_names)) + ext
        print("[%s] Saving annotations into %s" % (f, of))
        annotationdata.aio.write(of, destTrs)