Exemplo n.º 1
0
    def test_class2tokens(self):
        classes = 'T-VKTV-R'
        tokens = 'tʰ ɔ x t ə r'.split(' ')

        out = class2tokens(classes, tokens)
        _ = class2tokens([['T'], ['-VKTV-'], ['R']], 'th o x t e r'.split(),
                         local=True)

        assert out[1] == '-' and out[-2] == '-'
Exemplo n.º 2
0
    def test_class2tokens(self):
        classes = 'T-VKTV-R'
        tokens = 'tʰ ɔ x t ə r'.split(' ')

        out = class2tokens(classes, tokens)
        _ = class2tokens([['T'], ['-VKTV-'], ['R']],
                         'th o x t e r'.split(),
                         local=True)

        assert out[1] == '-' and out[-2] == '-'
Exemplo n.º 3
0
def test_class2tokens():

    classes = 'T-VKTV-R'
    tokens = 'tʰ ɔ x t ə r'.split(' ')

    out = class2tokens(classes, tokens)

    assert out[1] == '-' and out[-2] == '-'
Exemplo n.º 4
0
def test_class2tokens():

    classes = 'T-VKTV-R'
    tokens = 'tʰ ɔ x t ə r'.split(' ')

    out = class2tokens(classes, tokens)

    assert out[1] == '-' and out[-2] == '-'
Exemplo n.º 5
0
def get_correspondences(alms, ref='lexstatid'):
    """
    Compute sound correspondences for a given set of aligned cognates.
    """
    # store all correspondences
    corrs = {}

    # store occurrences
    occs = {}

    for key, msa in alms.msa[ref].items():
        # get basic stuff
        idxs = msa['ID']
        taxa = msa['taxa']
        concept = cgi.escape(alms[idxs[0], 'concept'], True)

        # get numerical representation of alignments
        if 'numbers' in alms.header:
            alignment = [class2tokens(
                alms[idxs[i], 'numbers'],
                msa['alignment'][i]) for i in range(len(idxs))]
        else:
            alignment = msa['alignment']

        # create new array for confidence
        character_matrix = []

        # iterate over each taxon
        for i, taxon in enumerate(taxa):
            # get the numerical sequence
            nums = alignment[i]

            # store chars per line
            chars = []

            # iterate over the sequence
            for j, num in enumerate(nums):
                col = [alm[j] for alm in alignment]

                # get the char
                if num != '-':
                    charA = dotjoin(taxa[i], msa['alignment'][i][j], num.split('.')[2])
                    chars += [charA]
                    try:
                        occs[charA] += [concept]
                    except:
                        occs[charA] = [concept]
                else:
                    chars += ['-']

                for k, numB in enumerate(col):
                    if k != i:
                        if num == '-' and numB == '-':
                            pass
                        else:
                            if numB != '-' and num != '-':
                                # get the second char
                                charB = dotjoin(
                                    taxa[k],
                                    msa['alignment'][k][j],
                                    numB.split('.')[2])
                                try:
                                    corrs[charA][charB] += 1
                                except:
                                    try:
                                        corrs[charA][charB] = 1
                                    except:
                                        corrs[charA] = {charB: 1}

            character_matrix += [chars]

        # append confidence matrix to alignments
        alms.msa[ref][key]['_charmat'] = character_matrix

    return corrs, occs
Exemplo n.º 6
0
def get_confidence(alms, scorer, ref='lexstatid', gap_weight=1):
    """
    Function creates confidence scores for a given set of alignments.

    Parameters
    ----------
    alms : :py:class`~lingpy.align.sca.Alignments`
        An *Alignments* object containing already aligned strings.
    scorer : :py:class:`~lingpy.algorithm._misc.ScoreDict`
        A *ScoreDict* object which gives similarity scores for all segments in
        the alignment.
    ref : str (default="lexstatid")
        The reference entry-type, referring to the cognate-set to be used for
        the analysis.
    """
    # store all values for average scores
    values = []

    # store all correspondences
    corrs = {}

    # store occurrences
    occs = {}

    for key, msa in alms.msa[ref].items():
        # get basic stuff
        idxs = msa['ID']
        taxa = msa['taxa']
        concept = cgi.escape(alms[idxs[0], 'concept'], True)

        # get numerical representation of alignments
        if scorer:
            alignment = [class2tokens(
                alms[idxs[i], 'numbers'],
                msa['alignment'][i]) for i in range(len(idxs))]
        else:
            alignment = msa['alignment']

        # create new array for confidence
        confidence_matrix = []
        character_matrix = []

        # iterate over each taxon
        for i, taxon in enumerate(taxa):
            idx = alms.taxa.index(taxon) + 1

            # get the numerical sequence
            nums = alignment[i]

            # store confidences per line
            confidences = []

            # store chars per line
            chars = []

            # iterate over the sequence
            for j, num in enumerate(nums):
                col = [alm[j] for alm in alignment]
                score = 0
                count = 0

                # get the char
                if num != '-':
                    charA = dotjoin(taxa[i], msa['alignment'][i][j], num.split('.')[2])
                    chars += [charA]
                    try:
                        occs[charA] += [concept]
                    except:
                        occs[charA] = [concept]
                else:
                    chars += ['-']

                for k, numB in enumerate(col):
                    if k != i:
                        if num == '-' and numB == '-':
                            pass
                        else:
                            if numB != '-' and num != '-':
                                # get the second char
                                charB = dotjoin(
                                    taxa[k], msa['alignment'][k][j], numB.split('.')[2])
                                try:
                                    corrs[charA][charB] += 1
                                except:
                                    try:
                                        corrs[charA][charB] = 1
                                    except:
                                        corrs[charA] = {charB: 1}

                            gaps = False
                            if num == '-' and numB != '-':
                                numA = charstring(idx)
                                gaps = True
                            elif numB == '-' and num != '-':
                                numB = charstring(alms.taxa.index(taxa[k]))
                                numA = num
                                gaps = True
                            else:
                                numA = num

                            scoreA = scorer[numA, numB]
                            scoreB = scorer[numB, numA]
                            this_score = max(scoreA, scoreB)

                            if not gaps:
                                score += this_score
                                count += 1
                            else:
                                score += this_score * gap_weight
                                count += gap_weight

                if count:
                    score = score / count
                else:
                    score = -25

                confidences += [int(score + 0.5)]
                values += [int(score + 0.5)]
            confidence_matrix += [confidences]
            character_matrix += [chars]

        # append confidence matrix to alignments
        alms.msa[ref][key]['confidence'] = confidence_matrix
        alms.msa[ref][key]['_charmat'] = character_matrix

    # sort the values
    values = sorted(set(values + [1]))

    # make conversion to scale of 100 values
    converter = {}
    valsA = values[:values.index(1)]
    valsB = values[values.index(1):]
    stepA = 50 / (len(valsA) + 1)
    stepB = 75 / (len(valsB) + 1)
    for i, score in enumerate(valsA):  # values[:values.index(0)):
        converter[score] = int((stepA * i) / 4 + 0.5)
    for i, score in enumerate(valsB):
        converter[score] = int(stepB * i + 0.5) + 50

    # iterate over keys again
    for key, msa in alms.msa[ref].items():
        # get basic stuff
        for i, line in enumerate(msa['confidence']):
            for j, cell in enumerate(line):
                alms.msa[ref][key]['confidence'][i][j] = converter[cell]

    jsond = {}
    for key, corr in corrs.items():
        splits = [c.split('.') + [o] for c, o in corr.items()]
        sorts = sorted(splits, key=lambda x: (x[0], -x[3]))
        new_sorts = []

        # check for rowspan
        spans = {}
        for a, b, c, d in sorts:
            if a in spans:
                if spans[a] < 3 and d > 1:
                    spans[a] += 1
                    new_sorts += [[a, b, c, d]]
            else:
                if d > 1:
                    spans[a] = 1
                    new_sorts += [[a, b, c, d]]

        bestis = []
        old_lang = ''
        counter = 0
        for a, b, c, d in new_sorts:
            new_lang = a
            if new_lang != old_lang:
                old_lang = new_lang

                tmp = '<tr class="display">'
                tmp += '<td class="display" rowspan={0}>'.format(spans[a])
                tmp += a + '</td>'
                tmp += '<td class="display" onclick="show({0});"><span '.format(
                    "'" + dotjoin(a, b, c) + "'")
                tmp += 'class="char {0}">' + b + '</span></td>'
                tmp += '<td class="display">'
                tmp += c + '</td>'
                tmp += '<td class="display">' + str(d) + '</td>'
                tmp += '<td class="display">' + str(len(occs[dotjoin(a, b, c)])) + '</td>'
                tmp += '</tr>'
                t = 'dolgo_' + token2class(b, rcParams['dolgo'])

                # bad check for three classes named differently
                if t == 'dolgo__':
                    t = 'dolgo_X'
                elif t == 'dolgo_1':
                    t = 'dolgo_TONE'
                elif t == 'dolgo_0':
                    t = 'dolgo_ERROR'

                bestis += [tmp.format(t)]
                counter += 1

            elif counter > 0:
                tmp = '<tr class="display">'
                tmp += '<td class="display" onclick="show({0});"><span '.format(
                    "'" + dotjoin(a, b, c) + "'")
                tmp += 'class="char {0}">' + b + '</span></td>'
                tmp += '<td class="display">' + c + '</td>'
                tmp += '<td class="display">' + str(d) + '</td>'
                tmp += '<td class="display">' + str(len(occs[dotjoin(a, b, c)])) + '</td>'
                tmp += '</tr>'

                t = 'dolgo_' + token2class(b, rcParams['dolgo'])

                # bad check for three classes named differently
                if t == 'dolgo__':
                    t = 'dolgo_X'
                elif t == 'dolgo_1':
                    t = 'dolgo_TONE'
                elif t == 'dolgo_0':
                    t = 'dolgo_ERROR'

                bestis += [tmp.format(t)]
                counter += 1
                old_lang = new_lang
            else:
                old_lang = new_lang
                counter = 0

        jsond[key] = [''.join(bestis), occs[key]]

    return jsond
Exemplo n.º 7
0
    def align(self, **keywords):
        """
        Align a pair of sequences or multiple sequence pairs.

        Parameters
        ----------
        gop : int (default=-1)
            The gap opening penalty (GOP).
        scale : float (default=0.5)
            The gap extension penalty (GEP), calculated with help of a scaling
            factor.
        mode : {"global","local","overlap","dialign"}
            The alignment mode, see :evobib:`List2012a` for details.
        factor : float (default = 0.3)
            The factor by which matches in identical prosodic position are
            increased.
        restricted_chars : str (default="T\_")
            The restricted chars that function as an indicator of syllable or
            morpheme breaks for secondary alignment, see :evobib:`List2012c`
            for details.
        distance : bool (default=False)
            If set to *True*, return the distance instead of the similarity
            score. Distance is calculated using the formula by
            :evobib:`Downey2008`.
        model : { None, ~lingpy.data.model.Model }
            Specify the sound class model that shall be used for the analysis.
            If no model is specified, the default model of :evobib:`List2012a`
            will be used.
        pprint : bool (default=False)
            If set to *True*, the alignments are printed to the screen.

        """
        setdefaults(
            keywords,
            gop=-1,
            scale=0.5,
            mode='global',
            factor=0.3,
            restricted_chars='T_',
            distance=False,
            model=rcParams['sca'],
            pprint=False,
            transform=rcParams['align_transform'])

        if hasattr(self, 'model'):
            if keywords['model'] != self.model:
                self._set_model(**keywords)
        else:
            self._set_model(**keywords)

        # create the alignments array
        self._alignments = calign.align_pairs(
            self.classes,
            self.weights,
            self.prostrings,
            keywords['gop'],
            keywords['scale'],
            keywords['factor'],
            self.scoredict,
            keywords['mode'],
            keywords['restricted_chars'],
            distance=1 if keywords['distance'] else 0)

        # switch back to alignments
        self.alignments = []
        for i, (almA, almB, sim) in enumerate(self._alignments):
            self.alignments.append((
                class2tokens(self.tokens[i][0], almA, local=keywords['mode'] == "local"),
                class2tokens(self.tokens[i][1], almB, local=keywords['mode'] == "local"),
                sim))

        # print the alignments, if this is chosen
        as_string(self, pprint=keywords['pprint'])
Exemplo n.º 8
0
def get_correspondences(alms, ref='lexstatid'):
    """
    Compute sound correspondences for a given set of aligned cognates.
    """
    # store all correspondences
    corrs = {}

    # store occurrences
    occs = {}

    for key, msa in alms.msa[ref].items():
        # get basic stuff
        idxs = msa['ID']
        taxa = msa['taxa']
        concept = cgi.escape(alms[idxs[0], 'concept'], True)

        # get numerical representation of alignments
        if 'numbers' in alms.header:
            alignment = [class2tokens(
                alms[idxs[i], 'numbers'],
                msa['alignment'][i]) for i in range(len(idxs))]
        else:
            alignment = msa['alignment']

        # create new array for confidence
        character_matrix = []

        # iterate over each taxon
        for i, taxon in enumerate(taxa):
            # get the numerical sequence
            nums = alignment[i]

            # store chars per line
            chars = []

            # iterate over the sequence
            for j, num in enumerate(nums):
                col = [alm[j] for alm in alignment]

                # get the char
                if num != '-':
                    charA = dotjoin(taxa[i], msa['alignment'][i][j], num.split('.')[2])
                    chars += [charA]
                    try:
                        occs[charA] += [concept]
                    except:
                        occs[charA] = [concept]
                else:
                    chars += ['-']

                for k, numB in enumerate(col):
                    if k != i:
                        if num == '-' and numB == '-':
                            pass
                        else:
                            if numB != '-' and num != '-':
                                # get the second char
                                charB = dotjoin(
                                    taxa[k],
                                    msa['alignment'][k][j],
                                    numB.split('.')[2])
                                try:
                                    corrs[charA][charB] += 1
                                except:
                                    try:
                                        corrs[charA][charB] = 1
                                    except:
                                        corrs[charA] = {charB: 1}

            character_matrix += [chars]

        # append confidence matrix to alignments
        alms.msa[ref][key]['_charmat'] = character_matrix

    return corrs, occs
Exemplo n.º 9
0
def get_confidence(alms, scorer, ref='lexstatid', gap_weight=1):
    """
    Function creates confidence scores for a given set of alignments.

    Parameters
    ----------
    alms : :py:class`~lingpy.align.sca.Alignments`
        An *Alignments* object containing already aligned strings.
    scorer : :py:class:`~lingpy.algorithm._misc.ScoreDict`
        A *ScoreDict* object which gives similarity scores for all segments in
        the alignment.
    ref : str (default="lexstatid")
        The reference entry-type, referring to the cognate-set to be used for
        the analysis.
    """
    # store all values for average scores
    values = []

    # store all correspondences
    corrs = {}

    # store occurrences
    occs = {}

    for key, msa in alms.msa[ref].items():
        # get basic stuff
        idxs = msa['ID']
        taxa = msa['taxa']
        concept = cgi.escape(alms[idxs[0], 'concept'], True)

        # get numerical representation of alignments
        if scorer:
            alignment = [class2tokens(
                alms[idxs[i], 'numbers'],
                msa['alignment'][i]) for i in range(len(idxs))]
        else:
            alignment = msa['alignment']

        # create new array for confidence
        confidence_matrix = []
        character_matrix = []

        # iterate over each taxon
        for i, taxon in enumerate(taxa):
            idx = alms.taxa.index(taxon) + 1

            # get the numerical sequence
            nums = alignment[i]

            # store confidences per line
            confidences = []

            # store chars per line
            chars = []

            # iterate over the sequence
            for j, num in enumerate(nums):
                col = [alm[j] for alm in alignment]
                score = 0
                count = 0

                # get the char
                if num != '-':
                    charA = dotjoin(taxa[i], msa['alignment'][i][j], num.split('.')[2])
                    chars += [charA]
                    try:
                        occs[charA] += [concept]
                    except:
                        occs[charA] = [concept]
                else:
                    chars += ['-']

                for k, numB in enumerate(col):
                    if k != i:
                        if num == '-' and numB == '-':
                            pass
                        else:
                            if numB != '-' and num != '-':
                                # get the second char
                                charB = dotjoin(
                                    taxa[k], msa['alignment'][k][j], numB.split('.')[2])
                                try:
                                    corrs[charA][charB] += 1
                                except:
                                    try:
                                        corrs[charA][charB] = 1
                                    except:
                                        corrs[charA] = {charB: 1}

                            gaps = False
                            if num == '-' and numB != '-':
                                numA = charstring(idx)
                                gaps = True
                            elif numB == '-' and num != '-':
                                numB = charstring(alms.taxa.index(taxa[k]))
                                numA = num
                                gaps = True
                            else:
                                numA = num

                            scoreA = scorer[numA, numB]
                            scoreB = scorer[numB, numA]
                            this_score = max(scoreA, scoreB)

                            if not gaps:
                                score += this_score
                                count += 1
                            else:
                                score += this_score * gap_weight
                                count += gap_weight

                if count:
                    score = score / count
                else:
                    score = -25

                confidences += [int(score + 0.5)]
                values += [int(score + 0.5)]
            confidence_matrix += [confidences]
            character_matrix += [chars]

        # append confidence matrix to alignments
        alms.msa[ref][key]['confidence'] = confidence_matrix
        alms.msa[ref][key]['_charmat'] = character_matrix

    # sort the values
    values = sorted(set(values + [1]))

    # make conversion to scale of 100 values
    converter = {}
    valsA = values[:values.index(1)]
    valsB = values[values.index(1):]
    stepA = 50 / (len(valsA) + 1)
    stepB = 75 / (len(valsB) + 1)
    for i, score in enumerate(valsA):  # values[:values.index(0)):
        converter[score] = int((stepA * i) / 4 + 0.5)
    for i, score in enumerate(valsB):
        converter[score] = int(stepB * i + 0.5) + 50

    # iterate over keys again
    for key, msa in alms.msa[ref].items():
        # get basic stuff
        for i, line in enumerate(msa['confidence']):
            for j, cell in enumerate(line):
                alms.msa[ref][key]['confidence'][i][j] = converter[cell]

    jsond = {}
    for key, corr in corrs.items():
        splits = [c.split('.') + [o] for c, o in corr.items()]
        sorts = sorted(splits, key=lambda x: (x[0], -x[3]))
        new_sorts = []

        # check for rowspan
        spans = {}
        for a, b, c, d in sorts:
            if a in spans:
                if spans[a] < 3 and d > 1:
                    spans[a] += 1
                    new_sorts += [[a, b, c, d]]
            else:
                if d > 1:
                    spans[a] = 1
                    new_sorts += [[a, b, c, d]]

        bestis = []
        old_lang = ''
        counter = 0
        for a, b, c, d in new_sorts:
            new_lang = a
            if new_lang != old_lang:
                old_lang = new_lang

                tmp = '<tr class="display">'
                tmp += '<td class="display" rowspan={0}>'.format(spans[a])
                tmp += a + '</td>'
                tmp += '<td class="display" onclick="show({0});"><span '.format(
                    "'" + dotjoin(a, b, c) + "'")
                tmp += 'class="char {0}">' + b + '</span></td>'
                tmp += '<td class="display">'
                tmp += c + '</td>'
                tmp += '<td class="display">' + str(d) + '</td>'
                tmp += '<td class="display">' + str(len(occs[dotjoin(a, b, c)])) + '</td>'
                tmp += '</tr>'
                t = 'dolgo_' + token2class(b, rcParams['dolgo'])

                # bad check for three classes named differently
                if t == 'dolgo__':
                    t = 'dolgo_X'
                elif t == 'dolgo_1':
                    t = 'dolgo_TONE'
                elif t == 'dolgo_0':
                    t = 'dolgo_ERROR'

                bestis += [tmp.format(t)]
                counter += 1

            elif counter > 0:
                tmp = '<tr class="display">'
                tmp += '<td class="display" onclick="show({0});"><span '.format(
                    "'" + dotjoin(a, b, c) + "'")
                tmp += 'class="char {0}">' + b + '</span></td>'
                tmp += '<td class="display">' + c + '</td>'
                tmp += '<td class="display">' + str(d) + '</td>'
                tmp += '<td class="display">' + str(len(occs[dotjoin(a, b, c)])) + '</td>'
                tmp += '</tr>'

                t = 'dolgo_' + token2class(b, rcParams['dolgo'])

                # bad check for three classes named differently
                if t == 'dolgo__':
                    t = 'dolgo_X'
                elif t == 'dolgo_1':
                    t = 'dolgo_TONE'
                elif t == 'dolgo_0':
                    t = 'dolgo_ERROR'

                bestis += [tmp.format(t)]
                counter += 1
                old_lang = new_lang
            else:
                old_lang = new_lang
                counter = 0

        jsond[key] = [''.join(bestis), occs[key]]

    return jsond
Exemplo n.º 10
0
def get_confidence(alms, scorer, ref='lexstatid', gap_weight=1):
    """
    Function creates confidence scores for a given set of alignments.

    Parameters
    ----------
    alms : :py:class`~lingpy.align.sca.Alignments`
        An *Alignments* object containing already aligned strings.
    scorer : :py:class:`~lingpy.algorithm._misc.ScoreDict`
        A *ScoreDict* object which gives similarity scores for all segments in
        the alignment.
    ref : str (default="lexstatid")
        The reference entry-type, referring to the cognate-set to be used for
        the analysis.
    """
    # store all values for average scores
    values = []

    for key, msa in alms.msa[ref].items():
        # get basic stuff
        idxs = msa['ID']
        taxa = msa['taxa']

        # get numerical representation of alignments
        if scorer:
            alignment = [class2tokens(
                alms[idxs[i], 'numbers'],
                msa['alignment'][i]) for i in range(len(idxs))]
        else:
            alignment = msa['alignment']

        # create new array for confidence
        confidence_matrix = []

        # iterate over each taxon
        for i, taxon in enumerate(taxa):
            idx = alms.taxa.index(taxon) + 1

            # get the numerical sequence
            nums = alignment[i]

            # store confidences per line
            confidences = []

            # iterate over the sequence
            for j, num in enumerate(nums):
                col = [alm[j] for alm in alignment]

                score = 0
                count = 0

                for k, numB in enumerate(col):
                    if k != i:
                        if num == '-' and numB == '-':
                            pass
                        else:
                            gaps = False
                            if num == '-' and numB != '-':
                                numA = str(idx) + '.X.-'
                                gaps = True
                            elif numB == '-' and num != '-':
                                numB = str(alms.taxa.index(taxa[k])) + '.X.-'
                                numA = num
                                gaps = True
                            else:
                                numA = num

                            scoreA = scorer[numA, numB]
                            scoreB = scorer[numB, numA]
                            this_score = max(scoreA, scoreB)

                            if not gaps:
                                score += this_score
                                count += 1
                            else:
                                score += this_score * gap_weight
                                count += gap_weight

                if count:
                    score = score / count  # (len(col) - gaps * gap_weight)
                else:
                    score = -25

                confidences += [int(score + 0.5)]
                values += [int(score + 0.5)]
            confidence_matrix += [confidences]

        # append confidence matrix to alignments
        alms.msa[ref][key]['confidence'] = confidence_matrix

    # sort the values
    values = sorted(set(values))

    # make conversion to scale of 100 values
    converter = {}
    step = 100 / (len(values) + 1)
    for i, score in enumerate(values):
        converter[score] = int(step * score + 0.5)

    # iterate over keys again
    for key, msa in alms.msa[ref].items():
        # get basic stuff
        for i, line in enumerate(msa['confidence']):
            for j, cell in enumerate(line):
                msa['confidence'] = converter[cell]
Exemplo n.º 11
0
    def align(self, **keywords):
        """
        Align a pair of sequences or multiple sequence pairs.

        Parameters
        ----------
        gop : int (default=-1)
            The gap opening penalty (GOP).
        scale : float (default=0.5)
            The gap extension penalty (GEP), calculated with help of a scaling
            factor.
        mode : {"global","local","overlap","dialign"}
            The alignment mode, see :evobib:`List2012a` for details.
        factor : float (default = 0.3)
            The factor by which matches in identical prosodic position are
            increased.
        restricted_chars : str (default="T_")
            The restricted chars that function as an indicator of syllable or
            morpheme breaks for secondary alignment, see :evobib:`List2012c`
            for details.
        distance : bool (default=False)
            If set to *True*, return the distance instead of the similarity
            score. Distance is calculated using the formula by
            :evobib:`Downey2008`.
        model : { None, ~lingpy.data.model.Model }
            Specify the sound class model that shall be used for the analysis.
            If no model is specified, the default model of :evobib:`List2012a`
            will be used.
        pprint : bool (default=False)
            If set to *True*, the alignments are printed to the screen.

        """
        setdefaults(
            keywords,
            gop=-1,
            scale=0.5,
            mode='global',
            factor=0.3,
            restricted_chars='T_',
            distance=False,
            model=rcParams['sca'],
            pprint=False,
            transform=rcParams['align_transform'])

        if hasattr(self, 'model'):
            if keywords['model'] != self.model:
                self._set_model(**keywords)
        else:
            self._set_model(**keywords)

        # create the alignments array
        self._alignments = calign.align_pairs(
            self.classes,
            self.weights,
            self.prostrings,
            keywords['gop'],
            keywords['scale'],
            keywords['factor'],
            self.scoredict,
            keywords['mode'],
            keywords['restricted_chars'],
            distance=1 if keywords['distance'] else 0)

        # switch back to alignments
        self.alignments = []
        for i, (almA, almB, sim) in enumerate(self._alignments):
            self.alignments.append((
                class2tokens(self.tokens[i][0], almA, local=keywords['mode'] == "local"),
                class2tokens(self.tokens[i][1], almB, local=keywords['mode'] == "local"),
                sim))

        # print the alignments, if this is chosen
        as_string(self, pprint=keywords['pprint'])