Exemplo n.º 1
0
# See the License for the specific language governing permissions and
# limitations under the License.
"""English-French WMT Translate dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow_datasets.public_api as tfds
from tensorflow_datasets.translate import wmt

TRANSLATE_DATASETS = {
    "wmt10_giga_fren_enfr":
    wmt.TranslateData(url="http://www.statmt.org/wmt10/training-giga-fren.tar",
                      language_to_file={
                          "en": "giga-fren.release2.fixed.en.gz",
                          "fr": "giga-fren.release2.fixed.fr.gz",
                      }),
    "wmt13_commoncrawl_enfr":
    wmt.TranslateData(
        url="http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz",
        language_to_file={
            "en": "commoncrawl.fr-en.en",
            "fr": "commoncrawl.fr-en.fr",
        }),
    "wmt13_europarl_enfr":
    wmt.TranslateData(
        url="http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz",
        language_to_file={
            "en": "training/europarl-v7.fr-en.en",
            "fr": "training/europarl-v7.fr-en.fr",
Exemplo n.º 2
0
# See the License for the specific language governing permissions and
# limitations under the License.
"""English-German WMT translation dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow_datasets.public_api as tfds
from tensorflow_datasets.translate import wmt

TRANSLATE_DATASETS = {
    "wmt13_commoncrawl_ende":
    wmt.TranslateData(
        url="http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz",
        language_to_file={
            "en": "commoncrawl.de-en.en",
            "de": "commoncrawl.de-en.de",
        }),
    "wmt13_europarl_ende":
    wmt.TranslateData(
        url="http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz",
        language_to_file={
            "en": "training/europarl-v7.de-en.en",
            "de": "training/europarl-v7.de-en.de",
        }),
    "wmt17_newstest13":
    wmt.TranslateData(
        url="http://data.statmt.org/wmt17/translation-task/dev.tgz",
        language_to_file={
            "en": "dev/newstest2013.en",
            "de": "dev/newstest2013.de",