示例#1
0
    def compile_data_points(self, **kwargs):
        letters = kwargs.get("letters", None)
        freq_iterator = FrequencyIterator(
            inDir=self.in_dir, outDir=None, letters=letters, message="Compiling data points"
        )

        # Keys will be wordclass values (NN, NNS, etc.); values will
        # be a list of data points
        self.data_points = defaultdict(list)

        for entry in freq_iterator.iterate():
            if entry.gram_count() == 1:  # and len(entry.lex_items) == 1:
                lex_items = self.largest_in_each_wordclass(entry.lex_items)
                for item in lex_items:
                    for period in item.frequency_table().data:
                        start, end = PERIODS[period]
                        lifespan = start - item.start
                        if lifespan >= -20:
                            wc = wordclass_category(item.wordclass)
                            row = (
                                item.size(date=start),
                                int(lifespan),
                                start,
                                item.frequency_table().frequency(period=period),
                            )
                            self.data_points[wc].append(row)
                            self.data_points["ALL"].append(row)

        for wordclass in self.data_points:
            self.data_points[wordclass].sort(key=lambda p: p[0])
            filepath = os.path.join(PREDICTIONS_DIR, wordclass + ".txt")
            with (open(filepath, "w")) as fh:
                for data_point in self.data_points[wordclass]:
                    fh.write("%0.3g\t%d\t%d\t%0.4g\n" % data_point)
示例#2
0
 def keyfunc(lex_item):
     return wordclass_category(lex_item.wordclass)