Exemplo n.º 1
0
    def __init__(self, filename, guessers=(YZPTBGuesser, PrefacedPTBGuesser, CCGbankGuesser, PTBGuesser, CPTBGuesser), default=CCGbankGuesser):
        '''Initialises a GuessReader with a given set of guessers.'''
        self.guessers = list(guessers)
        self.default = default
        
        filename_only, index = padded_rsplit(filename, ':', 1)

        with open(filename_only, 'r') as file:
            self.preview = (file
                    .read(max(guessers, key=lambda guesser: guesser.bytes_of_context_needed())
                    .bytes_of_context_needed()))

        self.reader_class = self.determine_reader(self.preview)
        self.reader = self.reader_class(filename)
Exemplo n.º 2
0
    def __iter__(self):
        path, index = padded_rsplit(self.path, ':', 1)

        if not os.path.exists(path):
            # TODO: This doesn't skip the current file (can we do that from inside the iterator?)
            warn("%s does not exist, so skipping.", path)

        if self.reader_class:
            reader_arg = { 'reader': self.reader_class }
        else:
            reader_arg = {}
            
        if os.path.isdir(path):
            reader = MultiGuessReader(path, verbose=self.verbose, **reader_arg)
        else:
            if self.reader_class:
                reader = self.reader_class(self.path)
            else:
                reader = GuessReader(self.path)

        for deriv_bundle in reader:
            yield deriv_bundle
Exemplo n.º 3
0
 def get_offset(filename):
     return padded_rsplit(filename, ':', 1)
Exemplo n.º 4
0
 def get_offset(filename):
     return padded_rsplit(filename, ':', 1)