def test_purge(self): # adding graph pairs with identical graphbanks pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc1 += pgc1 graphbanks_before = pgc1._graphbanks() self.assertEqual(len(graphbanks_before), 2) pgc1.purge() graphbanks_after = pgc1._graphbanks() self.assertEqual(graphbanks_before, graphbanks_after) # adding graph pairs with equal graphbanks pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc2 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc1 += pgc2 graphbanks_before = pgc1._graphbanks() self.assertEqual(len(graphbanks_before), 4) pgc1.purge() graphbanks_after = pgc1._graphbanks() self.assertEqual(len(graphbanks_after), 2) # adding graph pairs with different graphbanks pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") pgc1 += pgc2 graphbanks_before = pgc1._graphbanks() self.assertEqual(len(graphbanks_before), 4) pgc1.purge() graphbanks_after = pgc1._graphbanks() self.assertEqual(graphbanks_before, graphbanks_after) # removing graphpairs and thus dependencies on graphbanks del pgc1[:] graphbanks = pgc1._graphbanks() self.assertEqual(len(graphbanks), 0)
) args = parser.parse_args() pgc_fns = multiglob(args.file) def log(s): if args.verbose: print >>sys.stderr, "***", s log("Reading corpus from " + pgc_fns[0]) corpus = ParallelGraphCorpus(inf=pgc_fns[0]) for fn in pgc_fns[1:]: log("Joining corpus from " + fn) # __iadd__ also checks if another corpus is compatible w.r.t. relations # and meta-data corpus += ParallelGraphCorpus(inf=fn) # Purge the corpus of duplicate graphbanks held in memory log("Purging corpus") corpus.purge() log("Writing corpus") corpus.write(pprint=args.format)
"--verbose", action="store_true", help="verbose ouput to stderr") args = parser.parse_args() pgc_fns = multiglob(args.file) def log(s): if args.verbose: print >> sys.stderr, "***", s log("Reading corpus from " + pgc_fns[0]) corpus = ParallelGraphCorpus(inf=pgc_fns[0]) for fn in pgc_fns[1:]: log("Joining corpus from " + fn) # __iadd__ also checks if another corpus is compatible w.r.t. relations # and meta-data corpus += ParallelGraphCorpus(inf=fn) # Purge the corpus of duplicate graphbanks held in memory log("Purging corpus") corpus.purge() log("Writing corpus") corpus.write(pprint=args.format)