Пример #1
0
def processLattices(lats_sets,folders,statePruneTh=10000,pruneTh=10,silence=False):
    '''Applies standard pre-processing opperations to SMT lattices
    @lats_sets: lattices to be processed
    @folders: output folders for processed lattices
    @statePruneTh: fsts above this threshold are pruned
    @pruneTh: pruning threshold
    @silence: if True, then the function does not print which lattice is being processed'''
    for lats_set,folder in zip(lats_sets,folders):
        print lats_set
        print folder
        for f in sorted(glob.glob(lats_set),key=numericalSort):
            lattice = fst.Fst.read(f)
            if lattice.num_states() > statePruneTh:
                # detminpush = fst.push(fst.arcmap(fst.determinize(lattice.rmepsilon()).minimize(),map_type="to_log"),push_weights=True)
                detminpush = fst.push(fst.arcmap(fst.determinize(lattice.rmepsilon()).minimize(),map_type="to_log"),push_weights=True)
                out = fst.arcmap(fst.push(fst.arcmap(fst.prune(fst.arcmap(detminpush,map_type="to_standard"),weight=pruneTh).minimize(),map_type="to_log"),push_weights=True),map_type="to_standard")
                out.write(folder+os.path.basename(f))
                if not silence:
                    print os.path.basename(f)
            else:
                # detminpush = fst.push(fst.determinize(fst.arcmap(lattice.rmepsilon(),map_type="to_log")).minimize(),push_weights=True)
                detminpush = fst.push(fst.arcmap(fst.determinize(lattice.rmepsilon()).minimize(),map_type="to_log"),push_weights=True)
                out = fst.arcmap(detminpush,map_type="to_standard")
                out.write(folder+os.path.basename(f))
                if not silence:
                    print os.path.basename(f)
Пример #2
0
    def word_sequence_history(self, eeg_saus):
        '''
        generate a probable word
        sequence given the EEG samples
        by intersecting it with word
        language model
        '''

        word_seq = fst.compose(eeg_saus, self.ltr2wrd)
        fst.push(word_seq, push_weights=True, to_final=True)
        word_seq.project(project_output=True)
        word_seq.rmepsilon()
        return word_seq
Пример #3
0
def samplePathFromFst(fst_lattice, id2label):
    """Sample path from a lattice.

    Parameters
    ----------
    fst_lattice : fst.Fst
        Lattice in OpenFst format.
    id2labels : mapping arc label id to human readable labels.

    Returns
    -------
    path : list
        Sequence of (human readable) labels.

    """
    # Make the import here only as some people may not have openfst installed.
    import pywrapfst as fst

    # Transform fst_lattice into a stochastic FST.
    stoc_fst_lattice = fst.push(fst_lattice,
                                push_weights=True,
                                remove_total_weight=True)

    # Random walk on the stochastic FST.
    path = []
    __walkFst(stoc_fst_lattice, stoc_fst_lattice.start, id2label, path)

    return path
Пример #4
0
    def topk_choice(self, word_sequence, topk_wds=None):
        '''
        extracts the topk choices of
        lm given a word history (lattice)
        input: lm.fst and sentence string
        output: topk words to complete the lattice
        '''

        # generate sentence fst
        fstout = fst.intersect(word_sequence, self.lm)
        fst_det = fst.determinize(fstout)
        fst_p = fst.push(fst_det, push_weights=True, to_final=True)
        fst_p.rmepsilon()
        fst_rm = fst.determinize(fst_p)
        short = fst.shortestpath(fst_rm, nshortest=10)
        short_det = fst.determinize(short)
        short_det.rmepsilon()
        two_state = fst.compose(short_det, self.refiner)
        output = two_state.project(project_output=True)
        output.rmepsilon()
        output = fst.determinize(output)
        output.minimize()
        if topk_wds is not None:  # Needs to distinguish None and []
            topk_wds.extend(self.get_topk_words(output))
        return output
Пример #5
0
def fst_finalize(c, last_node, eos_node, path):
  fst_arc(c, last_node, eos_node, args.eos_id)
  c.write("%d\n" % eos_node)
  f = c.compile()
  f.rmepsilon()
  f = fst.determinize(f)
  f.minimize()
  f.topsort()
  f = fst.push(f, push_weights=True)
  f.write(path)
Пример #6
0
    def get_prior(self):
        '''
        set an array with priors
        in future priors are given from rsvp EEG vector

        OUTPUTS:
            an array of tuples, which consists of the character and the
            corresponding probabilities.
        '''
        sigma_h = self.create_machine_history()
        print(sigma_h)
        # intersect
        sigma_h.arcsort(sort_type="olabel")
        output_dist = fst.intersect(sigma_h, self.lm)
        print(output_dist)
        # process result
        output_dist = output_dist.rmepsilon()
        #output_dist = fst.rmepsilon(output_dist)
        output_dist = fst.determinize(output_dist)
        output_dist.minimize()
        output_dist = fst.push(output_dist, push_weights=True, to_final=True)

        # worth converting this history to np.array if vector computations
        # will be involeved
        #output_dist.arcsort(sort_type="olabel")

        # traverses the shortest path until we get to the second to
        # last state. And the arcs from that state to the final state contain
        # the distribution that we want.
        prev_stateid = curr_stateid = None
        for state in output_dist.states():
            if not curr_stateid is None:
                prev_stateid = curr_stateid
            curr_stateid = state
        priors = []
        for arc in output_dist.arcs(prev_stateid):
            ch = self.lm_syms.find(
                arc.ilabel)  #ilabel and olabel are the same.
            w = float(arc.weight)

            # TODO: for this demo we only need distribution over the characters
            # from 'a' to 'z'
            if len(ch) == 1 and ch in self.legit_ch_dict:
                priors.append((ch, w))

        # assuming the EEG input is an array like [("a", 0.3),("b", 0.2),...]
        # sort the array depending on the probability
        priors = sorted(priors, key=lambda prior: prior[1])
        normalized_dist = self._normalize([prob for _, prob in priors])
        return zip([ch for ch, _ in priors], normalized_dist)
Пример #7
0
def printstrings(a,
                 nshortest=1,
                 project_output=False,
                 syms=None,
                 weight=False):
    """
    Return the nshortest unique input strings in the FST a.  The FST a is projected 
    onto the input or output prior to finding the shortest paths. An optional symbol 
    table syms can be provided.  Results are returned as strings; if the weight 
    flag is specified, the path scores are included
    """
    import pywrapfst as fst
    b = a.copy().project(project_output=project_output)
    if nshortest == 1:
        c = fst.shortestpath(b)
    else:
        c = fst.shortestpath(b, nshortest=nshortest, unique=True)
    nba = fst.push(c, push_weights=True).rmepsilon()
    nb = []
    if nba.start() != -1:
        for arc1 in nba.arcs(nba.start()):
            w = arc1.weight
            nextstate = arc1.nextstate
            nbi = []
            if syms:
                nbi.append(syms.find(arc1.ilabel))
            else:
                nbi.append(str(arc1.ilabel))
            while nba.arcs(nextstate):
                try:
                    nextarc = nba.arcs(nextstate).next()
                except StopIteration:
                    break
                if syms:
                    nbi.append(syms.find(nextarc.ilabel))
                else:
                    nbi.append(str(nextarc.ilabel))
                nextstate = nextarc.nextstate
            if weight:
                nb.append((' '.join(nbi), w.to_string()))
            else:
                nb.append(' '.join(nbi))
    return nb