示例#1
0
convert a parallel text corpus from n-based alignment to id-based alignment
"""


from sys import stderr
from os.path import splitext, join

from daeso.utils.cli import ArgumentParser
from daeso.ptc.document import HitaextDoc



    


parser = ArgumentParser(description=__doc__)

parser.add_argument(
    "corpus",
    nargs="+",
    default="parallel text corpus",
    help=""
    )


parser.add_argument(
    "-V", "--verbose",
    action="store_true",
    help="verbose output"
    )
示例#2
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
convert a parallel text corpus from n-based alignment to id-based alignment
"""

from sys import stderr
from os.path import splitext, join

from daeso.utils.cli import ArgumentParser
from daeso.ptc.document import HitaextDoc

parser = ArgumentParser(description=__doc__)

parser.add_argument("corpus",
                    nargs="+",
                    default="parallel text corpus",
                    help="")

parser.add_argument("-V",
                    "--verbose",
                    action="store_true",
                    help="verbose output")

args = parser.parse_args()

if args.verbose:
    print >> stderr, "Reading corpus from", args.corpus
示例#3
0
"""


__authors__ = 'Erwin Marsi <*****@*****.**>'


from glob import glob
from os.path import basename, join
from sys import stdout, stderr
from string import uppercase

from daeso.utils.cli import ArgumentParser
from daeso.ptc.document import HitaextDoc


parser = ArgumentParser(description=__doc__)

parser.add_argument(
    "corpus",
    nargs="+",
    default="parallel text corpus",
    help=""
    )

parser.add_argument(
    "-d", "--dir",
    default="",
    help="new directory for source and target files "
    "(defaults to none, which means stripping the existing directory)"
    )
示例#4
0
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
change directory of source and target files in parallel text corpora
"""

__authors__ = 'Erwin Marsi <*****@*****.**>'

from glob import glob
from os.path import basename, join
from sys import stdout, stderr
from string import uppercase

from daeso.utils.cli import ArgumentParser
from daeso.ptc.document import HitaextDoc

parser = ArgumentParser(description=__doc__)

parser.add_argument("corpus",
                    nargs="+",
                    default="parallel text corpus",
                    help="")

parser.add_argument(
    "-d",
    "--dir",
    default="",
    help="new directory for source and target files "
    "(defaults to none, which means stripping the existing directory)")

parser.add_argument(
    "-t",