Exemplo n.º 1
0
 def test_disjoint_windows_04(self):
     tokens = "a b c d e f g h i j k".split()
     wins = ["a b c".split(), "e f g".split(), "i j k".split()]
     output = windows.disjoint_windows(tokens,
                                       window_size=3,
                                       strategy="spread")
     self.assertEqual([o.tokens for o in output], wins)
Exemplo n.º 2
0
 def test_disjoint_windows_01(self):
     tokens = "a b c d e f g h i j k".split()
     wins = ["a b c d".split(), "e f g h".split()]
     output = windows.disjoint_windows(tokens,
                                       window_size=4,
                                       strategy="left")
     self.assertEqual([o.tokens for o in output], wins)
Exemplo n.º 3
0
 def test_disjoint_windows_03(self):
     tokens = "a b c d e f g h i j k l m n".split()
     wins = ["c d e f g".split(), "h i j k l".split()]
     output = windows.disjoint_windows(tokens,
                                       window_size=5,
                                       strategy="center")
     self.assertEqual([o.tokens for o in output], wins)
def bootstrap(measure, tokens, window_size, strategy="spread", **kwargs):
    """Calculate bootstrap for surface-based measures as explained in
    Evert et al. (2017).

    kwargs are passed to measure

    Evert, Stefan, Sebastian Wankerl, Elmar Nöth (2017). Reliable
    measures of syntactic and lexical complexity: The case of Iris
    Murdoch. In: Proceedings of the Corpus Linguistics 2017
    Conference, Birmingham, UK.
    http://purl.org/stefan.evert/PUB/EvertWankerlNoeth2017.pdf

    """
    results = []
    for window in windows.disjoint_windows(tokens, window_size, strategy):
        results.append(measure(window, **kwargs))
    if len(results) == 1:
        return results[0], 0, results
    return statistics.mean(results), misc.confidence_interval(results), results