convert a parallel text corpus from n-based alignment to id-based alignment """ from sys import stderr from os.path import splitext, join from daeso.utils.cli import ArgumentParser from daeso.ptc.document import HitaextDoc parser = ArgumentParser(description=__doc__) parser.add_argument( "corpus", nargs="+", default="parallel text corpus", help="" ) parser.add_argument( "-V", "--verbose", action="store_true", help="verbose output" )
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. """ convert a parallel text corpus from n-based alignment to id-based alignment """ from sys import stderr from os.path import splitext, join from daeso.utils.cli import ArgumentParser from daeso.ptc.document import HitaextDoc parser = ArgumentParser(description=__doc__) parser.add_argument("corpus", nargs="+", default="parallel text corpus", help="") parser.add_argument("-V", "--verbose", action="store_true", help="verbose output") args = parser.parse_args() if args.verbose: print >> stderr, "Reading corpus from", args.corpus
""" __authors__ = 'Erwin Marsi <*****@*****.**>' from glob import glob from os.path import basename, join from sys import stdout, stderr from string import uppercase from daeso.utils.cli import ArgumentParser from daeso.ptc.document import HitaextDoc parser = ArgumentParser(description=__doc__) parser.add_argument( "corpus", nargs="+", default="parallel text corpus", help="" ) parser.add_argument( "-d", "--dir", default="", help="new directory for source and target files " "(defaults to none, which means stripping the existing directory)" )
# along with this program. If not, see <http://www.gnu.org/licenses/>. """ change directory of source and target files in parallel text corpora """ __authors__ = 'Erwin Marsi <*****@*****.**>' from glob import glob from os.path import basename, join from sys import stdout, stderr from string import uppercase from daeso.utils.cli import ArgumentParser from daeso.ptc.document import HitaextDoc parser = ArgumentParser(description=__doc__) parser.add_argument("corpus", nargs="+", default="parallel text corpus", help="") parser.add_argument( "-d", "--dir", default="", help="new directory for source and target files " "(defaults to none, which means stripping the existing directory)") parser.add_argument( "-t",