Пример #1
0
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

ROOT_URL = (
    "https://raw.githubusercontent.com/xiul-msr/e2e_dialog_challenge/master/data/"
)

RESOURCES = [
    # raw data files
    DownloadableFile(
        f"{ROOT_URL}/movie_all.tsv",
        "movie_all.tsv",
        "d2291fd898d8c2d92d7c92affa5601a0561a28f07f6147e9c196c5a573a222d6",
        zipped=False,
    ),
    DownloadableFile(
        f"{ROOT_URL}/restaurant_all.tsv",
        "restaurant_all.tsv",
        "0e297b2ac2e29f9771fed3cd348873b729eb079cc26f8c2333a28247671bdb28",
        zipped=False,
    ),
    DownloadableFile(
        f"{ROOT_URL}/taxi_all.tsv",
        "taxi_all.tsv",
        "6d8ee9719b3d294b558eb53516c897108d1276e9dbcac0101d4e19a2ad801d20",
        zipped=False,
    ),
]
Пример #2
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# Download and build the data if it does not exist.

from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
import os

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/empatheticdialogues/empatheticdialogues.tar.gz',
        'empatheticdialogues.tar.gz',
        '56f234d77b7dd1f005fd365bb17769cfe346c3c84295b69bc069c8ccb83be03d',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'empatheticdialogues')
    version = '1.0'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)
Пример #3
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/mnist/mnist.tar.gz',
        'mnist.tar.gz',
        'c4e2f85cdae81ebf3a76d7ac0f0af8c4d91f4d1fb9bc2fd942b669a72b80585d',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'mnist')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)
Пример #4
0
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
import numpy
from parlai.core.build_data import DownloadableFile
from parlai.utils.io import PathManager

RESOURCES = [
    DownloadableFile(
        'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/train.en',
        'train.en',
        '845ee390042259f7512eabc6458b0fdb30db28d254c83232d97d4161c1fdae51',
        zipped=False,
    ),
    DownloadableFile(
        'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/train.de',
        'train.de',
        'a2e292ad1b1f3fec6224dc043460ba6c453932f470109579b8c1ce6d4df65262',
        zipped=False,
    ),
    DownloadableFile(
        'https://nlp.stanford.edu/projects/nmt/data/wmt14.en-de/newstest2014.en',
        'newstest2014.en',
        '2db4575449877142aef9187e5e8f58ec10af73a2589ad7a4690208f5234901bb',
        zipped=False,
    ),
    DownloadableFile(
Пример #5
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import parlai.core.build_data as build_data
import os
from parlai.tasks.light_dialog.builder import build_from_db
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/light/light-dialog-processed-small7.pkl',
        'light_data.pkl',
        '7c83cf49818586db9999ea67a4a6ad087afbd91c26ed629a9f00e21d0b84058f',
        zipped=False,
    ),
    DownloadableFile(
        'http://parl.ai/downloads/light/light-unseen-processed2.pkl',
        'light_unseen_data.pkl',
        '489b98d08dd94eaf1ba95439d04200ccc54623ade056839f87a5c4207bc5699c',
        zipped=False,
    ),
    DownloadableFile(
        'http://parl.ai/downloads/light/light-environment.pkl',
        'light_environment.pkl',
        '162389202f22063e1c32af7f9261aac13d20fc05598388d1e9748735996ec016',
        zipped=False,
    ),
]
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://nlp.cs.washington.edu/zeroshot/relation_splits.tar.bz2',
        'relation_splits.tar.bz2',
        'e33d0e367b6e837370da17a2d09d217e0a92f8d180f7abb3fd543a2d1726b2b4',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'QA-ZRE')
    version = None

    if not build_data.built(dpath, version_string=version):

        print('[building data: ' + dpath + ']')

        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
Пример #7
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import parlai.tasks.wikimovies.build as wikimovies_build
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/mturkwikimovies/mturkwikimovies.tar.gz',
        'mturkwikimovies.tar.gz',
        '41a85a17e813bfecd975d448f9a08178f65aba32fc10eaa1a48c0bed65431361',
    )
]


def build(opt):
    # Depends upon another dataset, wikimovies, build that first.
    wikimovies_build.build(opt)

    dpath = os.path.join(opt['datapath'], 'MTurkWikiMovies')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
Пример #8
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json',
        'train-v2.0.json',
        '68dcfbb971bd3e96d5b46c7177b16c1a4e7d4bdef19fb204502738552dede002',
        zipped=False,
    ),
    DownloadableFile(
        'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json',
        'dev-v2.0.json',
        '80a5225e94905956a6446d296ca1093975c4d3b3260f1d6c8f68bc2ab77182d8',
        zipped=False,
    ),
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'SQuAD2')
    version = None
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
import os

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/cbt/cbt.tar.gz',
        'cbt.tar.gz',
        '932df0cadc1337b2a12b4c696b1041c1d1c6d4b6bd319874c6288f02e4a61e92',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'CBT')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)
Пример #10
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/sensitive_topics_evaluation/data_valid.jsonl',
        'data_valid.jsonl',
        'df3a71da78bd231402237fded6df530c80f91814f03a2c3e0581be14fe24633d',
        zipped=False,
    )
]


def build(opt):
    version = 'v1.0'
    dpath = os.path.join(opt['datapath'], 'sensitive_topics_evaluation')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)
Пример #11
0
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.


from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
import jsonlines as jl
import numpy as np
import os,csv

RESOURCES = [
    DownloadableFile(
        '1FUv2qit9wQ21NV_dbW5HeZVEzng5CMHE',
        'train_self_original.txt',
        '',
        False,
        True
    ),
    DownloadableFile(
        '1lnrgxXCc7Y-6Ic_zl7b3tAXonmuGkjI5',
        'valid_self_original.txt',
        '',
        False,
        True
    )
]

def build_fb_format(q,a,task,dpath):
    if task == 'train':
        N = np.int(len(a)*0.8)
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import parlai.core.build_data as build_data
import os
import subprocess
import shutil
import csv
import time
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://github.com/deepmind/narrativeqa/archive/master.zip',
        'narrative_qa.zip',
        'd9fc92d5f53409f845ba44780e6689676d879c739589861b4805064513d1476b',
    )
]


def get_rows_for_set(reader, req_set):
    selected_rows = [row for row in reader if row['set'].strip() == req_set]
    return selected_rows


def read_csv_to_dict_list(filepath):
    f = open(filepath, 'r')
    return csv.DictReader(f, delimiter=','), f

Пример #13
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://storage.googleapis.com/airdialogue/airdialogue_data.tar.gz',
        'airdialogue.tar.gz',
        '7d2130cdde73a59afd6ad6c463a25453d8ed677c1b3a4a4aaa2406db9c9712cb',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'])
    airdialogue_path = os.path.join(dpath, 'airdialogue_data')
    version = '1.0'

    if not build_data.built(airdialogue_path, version_string=version):
        print('[building data: ' + airdialogue_path + ']')
        if build_data.built(airdialogue_path):
            build_data.remove_dir(airdialogue_path)

        # Download the data.
        for downloadable_file in RESOURCES:
Пример #14
0
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        '1u5zzfENGbRYVo-HsyFXZc3sJ9FgDTNx4',
        'raw_train_data.json',
        '7380e41ca8c65084140af997057eb9e8f974e08a19fdb40de73a9f96e4b5bd6d',
        from_google=True,
        zipped=False,
    ),
    DownloadableFile(
        '1nRsAyuVZu7L2f2YcxNbxsT1gZzFnQy-P',
        'raw_test_data.json',
        '3fd2cc672fbae118f3545640fc4c4f45a2e9037c98eebd1e64ad2e0ce5d1fe35',
        from_google=True,
        zipped=False,
    ),
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'holl_e')
Пример #15
0
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
import json
from parlai.core.build_data import DownloadableFile
from parlai.utils.io import PathManager

RESOURCES = [
    DownloadableFile(
        'https://s3.amazonaws.com/my89public/quac/train_v0.2.json',
        'train_v0.2.json',
        'ff5cca5a2e4b4d1cb5b5ced68b9fce88394ef6d93117426d6d4baafbcc05c56a',
        zipped=False,
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/my89public/quac/val_v0.2.json',
        'val_v0.2.json',
        '09e622916280ba04c9352acb1bc5bbe80f11a2598f6f34e934c51d9e6570f378',
        zipped=False,
    ),
]

VERSION = '0.2'

SHOULD = '__SHOULD__'
MAYBE = '__MAYBE__'
SHOULD_NOT = '__SHOULDNOT__'
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
import os
from parlai.tasks.personality_captions.download_images import download_images

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/image_chat/image_chat.tgz',
        'image_chat.tgz',
        'ad733e181de33f1085166bb7af17fcf228504bd48228ed8cc20c5e7a9fa5d259',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'image_chat')
    image_path = os.path.join(opt['datapath'], 'yfcc_images')
    version = '1.0'
    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
Пример #17
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/personachat/personachat.tgz',
        'personachat.tgz',
        '507cf8641d333240654798870ea584d854ab5261071c5e3521c20d8fa41d5622',
    )
]


def build(opt):
    version = 'v1.0'
    dpath = os.path.join(opt['datapath'], 'Persona-Chat')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)
Пример #18
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
import os

RESOURCES = [
    DownloadableFile(
        'https://dl.dropboxusercontent.com/s/iyz6l7jhbt6jb7q/new_dataset_release.zip',
        'FVQA.zip',
        '66d1831a61d1282fb0c95c01435eda9b465961d507c1e166e4c32b89687c3c26',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'FVQA')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        # An older version exists, so remove these outdated files.
        if build_data.built(dpath):
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
Пример #19
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.

from parlai.core.build_data import DownloadableFile
import parlai.tasks.dbll_babi.build as dbll_babi_build
import parlai.tasks.wikimovies.build as wikimovies_build

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/dbll/dbll.tgz',
        'dbll.tgz',
        'd8c727dac498b652c7f5de6f72155dce711ff46c88401a303399d3fad4db1e68',
    )
]


def build(opt):
    # Depends upon another dataset, wikimovies, build that first.
    wikimovies_build.build(opt)
    dbll_babi_build.build(opt)
Пример #20
0
# Download and build the data if it does not exist.

from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
import subprocess
from os.path import join as pjoin
from os.path import isfile, isdir


# pre-computed files
RESOURCES = [
    # wet.paths.gz is false because the archive format is not recognized
    # It gets unzipped with subprocess after RESOURCES are downloaded.
    DownloadableFile(
        'https://commoncrawl.s3.amazonaws.com/crawl-data/CC-MAIN-2018-34/wet.paths.gz',
        'wet.paths.gz',
        'e3a8addc6a33b54b1dd6488a98c875851ef1aca3b80133d39f6897330a8835fb',
        zipped=False,
    ),
    DownloadableFile(
        'https://dl.fbaipublicfiles.com/eli5qa/explainlikeimfive_ccrawl_ids.json.gz',
        'explainlikeimfive_ccrawl_ids.json.gz',
        '59cd7b6a8580421aecae66cd33d065073f2abf21d86097b3262bd460a7a14f0d',
        zipped=False,
    ),
    DownloadableFile(
        'https://dl.fbaipublicfiles.com/eli5qa/explainlikeimfive_unigram_counts.json',
        'explainlikeimfive_unigram_counts.json',
        '0433a4dda7532ba1dae2f5b6bf70cd5ab91fd2772f75e99b4c15c2e04ba80dfd',
        zipped=False,
    ),
]
Пример #21
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.


import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://storage.googleapis.com/dialog-data-corpus/TASKMASTER-1-2019/self-dialogs.json',
        'self-dialogs.json',
        '1e590ed0ccee279e40c2fb9e083d3b9417477c6bfe35ce5b2277167698dd858d',
        zipped=False,
    ),
    DownloadableFile(
        'https://storage.googleapis.com/dialog-data-corpus/TASKMASTER-1-2019/woz-dialogs.json',
        'woz-dialogs.json',
        'cd3bc4e968487315d412c044d30af2bf0a4b33c3ef8b74c589f1e1fa832bf72f',
        zipped=False,
    ),
]


def build(opt):
    # get path to data directory
    dpath = os.path.join(opt['datapath'], 'taskmaster-1')
    # define version if any
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import gzip
import json
import os
import tqdm
import parlai.core.build_data as build_data
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://msmarco.blob.core.windows.net/msmarco/train_v2.1.json.gz',
        'train.gz',
        'e91745411ca81e441a3bb75deb71ce000dc2fc31334085b7d499982f14218fe2',
        zipped=False,
    ),
    DownloadableFile(
        'https://msmarco.blob.core.windows.net/msmarco/dev_v2.1.json.gz',
        'valid.gz',
        '5b3c9c20d1808ee199a930941b0d96f79e397e9234f77a1496890b138df7cb3c',
        zipped=False,
    ),
    DownloadableFile(
        'https://msmarco.blob.core.windows.net/msmarco/eval_v2.1_public.json.gz',
        'test.gz',
        '05ac0e448450d507e7ff8e37f48a41cc2d015f5bd2c7974d2445f00a53625db6',
        zipped=False,
    ),
]
Пример #23
0
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import parlai.core.build_data as build_data
import os
import gzip
import json
from parlai.core.build_data import DownloadableFile
from parlai.utils.io import PathManager

RESOURCES = [
    DownloadableFile(
        'http://jmcauley.ucsd.edu/data/amazon/qa/qa_Appliances.json.gz',
        'qa_Appliances.json.gz',
        '9c613a5dfedd1071431faa29de903b1b0e592c5ac1c7861c26d8b69dfda8ac78',
        zipped=False,
    ),
    DownloadableFile(
        'http://jmcauley.ucsd.edu/data/amazon/qa/qa_Arts_Crafts_and_Sewing.json.gz',
        'qa_Arts_Crafts_and_Sewing.json.gz',
        'c9aad6d615294571c1be7ea6a88730829a68e701ca7d1168f4d6b5234c37ac65',
        zipped=False,
    ),
    DownloadableFile(
        'http://jmcauley.ucsd.edu/data/amazon/qa/qa_Automotive.json.gz',
        'qa_Automotive.json.gz',
        'ca2da4b9d3afd3e6c915d69b34618bdcf9c6febadd7389f368fb51e9e1585009',
        zipped=False,
    ),
    DownloadableFile(
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        '0BwmD_VLjROrfN0xhTDVteGQ3eG8',
        'qadailymail.tar.gz',
        '77bfe0d91dbc9774991bbce59895743adfc984eafffc328a7b1d34a89e2b5646',
        from_google=True,
    )
]


def _process(fname, fout):
    with open(fname) as f:
        lines = [line.strip('\n') for line in f]
    # main article
    s = '1 ' + lines[2]
    # add question
    s = s + ' ' + lines[4]
    # add answer
    s = s + '\t' + lines[6]
    # add candidates (and strip them of the real names)
Пример #25
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os
from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/dialogue_safety/single_turn_safety.json',
        'single_turn_safety.json',
        'f3a46265aa639cfa4b55d2be4dca4be1c596acb5e8f94d7e0041e1a54cedd4cd',
        zipped=False,
    ),
    DownloadableFile(
        'http://parl.ai/downloads/dialogue_safety/multi_turn_safety.json',
        'multi_turn_safety.json',
        'e3e577f456d63d51eb7b5f98ffd251ad695476f186d422fa8de1a177742fa7b6',
        zipped=False,
    ),
]


def build(datapath):
    version = 'v1.0'
    dpath = os.path.join(datapath, 'dialogue_safety')

    if not build_data.built(dpath, version):
Пример #26
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os
from parlai.core.build_data import DownloadableFile
from parlai.core import build_data

RESOURCES = [
    DownloadableFile(
        'https://drive.google.com/uc?export=download&id=0B2MvoQfXtqZmMTJqclpBdGN2bmc',
        'dialog-bAbI-plus.zip',
        'e67dfecbde5e6250833143a6148150a313204237b765d39e7b8ebc111cb3204e',
    )
]


def build(opt):
    dpath = os.path.join(opt['datapath'], 'dialog-bAbI-plus')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
Пример #27
0
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

from parlai.core.build_data import DownloadableFile
from parlai.utils.io import PathManager
import parlai.core.build_data as build_data
import codecs
import os

RESOURCES = [
    DownloadableFile(
        'http://parl.ai/downloads/cornell_movie/cornell_movie_dialogs_corpus.tgz',
        'cornell_movie_dialogs_corpus.tgz',
        'ae77ab2e4743ce929087a4f529934059b920c4bdaa3143741b65b1e648ab45fd',
    )
]


def create_fb_format(lines_file, convo_file, outpath):
    print('[building fbformat]')
    with PathManager.open(os.path.join(outpath, 'train.txt'),
                          'w') as ftrain, PathManager.open(
                              os.path.join(outpath, 'valid.txt'),
                              'w') as fvalid, PathManager.open(
                                  os.path.join(outpath, 'test.txt'),
                                  'w') as ftest:
        lines = {}
Пример #28
0
from parlai.core.build_data import DownloadableFile
from parlai.utils.io import PathManager
import parlai.core.build_data as build_data
import os
import json

VERSION = '1'
TRAIN_FILENAME = 'hotpot_train_v{}.1.json'.format(VERSION)
DEV_DISTRACTOR_FILENAME = 'hotpot_dev_distractor_v{}.json'.format(VERSION)
DEV_FULLWIKI_FILENAME = 'hotpot_dev_fullwiki_v{}.json'.format(VERSION)

RESOURCES = [
    DownloadableFile(
        'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_train_v1.1.json',
        'hotpot_train_v1.1.json',
        '26650cf50234ef5fb2e664ed70bbecdfd87815e6bffc257e068efea5cf7cd316',
        zipped=False,
    ),
    DownloadableFile(
        'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_distractor_v1.json',
        'hotpot_dev_distractor_v1.json',
        '4e9ecb5c8d3b719f624d66b60f8d56bf227f03914f5f0753d6fa1b359d7104ea',
        zipped=False,
    ),
    DownloadableFile(
        'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json',
        'hotpot_dev_fullwiki_v1.json',
        '2f1f3e594a3066a3084cc57950ca2713c24712adaad03af6ccce18d1846d5618',
        zipped=False,
    ),
]
Пример #29
0
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile

RESOURCES = [
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Train_mscoco.zip',
        'v2_Questions_Train_mscoco.zip',
        '05a64b6e2582d06d7585f5429674a9a33851878be1bff9f8668cdcf792df611e',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Val_mscoco.zip',
        'v2_Questions_Val_mscoco.zip',
        'e71f6c5c3e97a51d050f28243e262b28cd0c48d11a6b4632d769d30d3f93222a',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Questions_Test_mscoco.zip',
        'v2_Questions_Test_mscoco.zip',
        '982e2e687a86514b78ea83af356d151976c5e3fb4168a29ca543610574082ad7',
    ),
    DownloadableFile(
        'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/v2_Annotations_Val_mscoco.zip',
        'v2_Annotations_Val_mscoco.zip',
        '0caae7c8d1dafd852727f5ac046bc1efca9b72026bd6ffa34fc489f3a7b3291e',
Пример #30
0
import os

import parlai.core.build_data as build_data
from parlai.core.build_data import DownloadableFile
from parlai.core.opt import Opt

BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION = 'v0.2'
HUMAN_SAFETY_EVAL_TESTSET_VERSION = 'v0.1'

TASK_FOLDER_NAME = 'bot_adversarial_dialogue'

BOT_ADVERSARIAL_DIALOGUE_DATASETS_RESOURCES = [
    DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz',
        f'dialogue_datasets_{BOT_ADVERSARIAL_DIALOGUE_DATASETS_VERSION}.tar.gz',
        '2178b022fac154ddd9b570f6386abc4cd3e7ceb4476f0bebfbce5941424461eb',
    )
]
HUMAN_SAFETY_EVAL_TESTSET_RESOURCES = [
    build_data.DownloadableFile(
        f'http://parl.ai/downloads/bot_adversarial_dialogue/human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        f'human_safety_eval_{HUMAN_SAFETY_EVAL_TESTSET_VERSION}.tar.gz',
        'b8b351c3e5eefcd54fdd73cd6a04847cd1eeb9106fc53b92a87e2a4c7537a7b2',
    )
]


def get_adversarial_dialogue_folder(datapath: str) -> str:
    return os.path.join(datapath, TASK_FOLDER_NAME, 'dialogue_datasets')