class TreeCollection: """ Collection of Trees. """ def __init__(self, path, low_filenum, high_filenum): self.file_filter = NumberRangeFileFilter( ".mrg", low_filenum, high_filenum, True) self.files = self.get_files_under(path) self.trees = self.get_trees() self.index = 0 def __iter__(self): return self def next(self): if self.index < len(self.trees): tree = self.trees[self.index] self.index += 1 return tree else: raise StopIteration def get_files_under(self, path): files = [] self.add_files_under(path, files) return files def add_files_under(self, root, files): #if not filter(root, self.file_filter.accept): if not self.file_filter.accept_sequential(root): return if os.path.isfile(root): files.append(root) return if os.path.isdir(root): children = os.listdir(root) for child in children: self.add_files_under(os.path.join(root, child), files) def get_trees(self): trees = [] for i, tree_file in enumerate(self.files): if (i + 1) % 100 == 0: print "Tree %d" % (i + 1) ff = open(tree_file, 'rb') for tree in Trees.PennTreeReader(ff): trees.append(tree) ff.close() return trees
class TreeCollection: """ Collection of Trees. """ def __init__(self, path, extension, low_filenum, high_filenum): self.file_filter = NumberRangeFileFilter( extension, low_filenum, high_filenum, True) self.files = self.get_files_under(path) self.trees = self.get_trees() self.index = 0 def __iter__(self): return self def next(self): if self.index < len(self.trees): tree = self.trees[self.index] self.index += 1 return tree else: raise StopIteration def get_files_under(self, path): files = [] self.add_files_under(path, files) return files def add_files_under(self, root, files): #if not filter(root, self.file_filter.accept): if not self.file_filter.accept(root): return if os.path.isfile(root): files.append(root) return if os.path.isdir(root): children = os.listdir(root) for child in children: self.add_files_under(os.path.join(root, child), files) def get_trees(self): trees = [] for i, tree_file in enumerate(self.files): if (i + 1) % 100 == 0: print "Tree %d" % (i + 1) ff = open(tree_file, 'rb') for tree in Trees.PennTreeReader(ff): trees.append(tree) ff.close() return trees
def __init__(self, path, low_filenum, high_filenum): self.file_filter = NumberRangeFileFilter( ".mrg", low_filenum, high_filenum, True) self.files = self.get_files_under(path) self.trees = self.get_trees() self.index = 0
def __init__(self, path, low_filenum, high_filenum): self.file_filter = NumberRangeFileFilter(".mrg", low_filenum, high_filenum, True) self.files = self.get_files_under(path) self.trees = self.get_trees() self.index = 0