Exemplos de split em Python, exemplos de torch.utils.data.split em Python

Exemplo n.º 1

0

Exibir arquivo

    def load_data(self):

        datas = []
        labels = []
        with codecs.open(self.path + self.data_name, "r",
                         encoding="utf-8") as fin:
            for ele in fin:
                if len(ele) < 10:
                    continue
                data, label = ele.split("\t")
                data = data.split()
                if not data:
                    continue
                datas.append(data[0])
                labels.append(self.label_dic[label[:-1]])

        datas = [data.split("。") for data in datas]
        datas = sorted(datas, key=lambda x: len(x), reverse=True)

        # for i,data in enumerate(datas):
        #     for j,sentence in enumerate(datas[i]):
        #
        #         datas[i][j] = sentence

        datas, segment_ids, input_masks = self.convert_data2id(datas)
        return datas, segment_ids, input_masks, labels

Exemplo n.º 2

0

Exibir arquivo

Arquivo: dataset.py Projeto: SPECTRELWF/-

    def __getitem__(self,index):
        data = self.datas[index]
        image = data.split(' ')[0].replace('\\','/')
        label1 = data.split(' ')[1]
        label2 = data.split(' ')[2].strip('\n')
        img = cv2.imread(data_path + image)
        img = cv2.resize(img, (224,224))
        img = img.transpose(2,0,1)
        img = torch.from_numpy(img).float()
        time = torch.tensor(period.index(label1))
        weather = torch.tensor(weathers.index(label2))

        return img,time,weather

Exemplo n.º 3

0

Exibir arquivo

    def __init__(self, data_root, data_list, transform=None):
        self.root = data_root
        self.transform = transform

        f = open(data_list, 'r')
        data_list = f.readlines()
        f.close()

        self.n_data = len(data_list)
        self.img_paths = []
        self.img_labels = []

        for data in data_list:
            self.img_paths.append(data.split()[0])
            self.img_labels.append(data.split()[1])

Exemplo n.º 4

0

Exibir arquivo

Arquivo: data_loader.py Projeto: zyxdSTU/ccf

    def __init__(self, path, config):
        self.config = config
        if isinstance(path, list):
            data = ""
            for element in path:
                f = open(element, 'r', encoding='utf-8', errors='ignore')
                data += f.read()
                f.close()
            data = data.split('\n\n')
        else:
            f = open(path, 'r', encoding='utf-8', errors='ignore')
            data = f.read().split('\n\n')
            f.close()

        data = [[(element2.split('\t')[0], element2.split('\t')[1])
                 for element2 in element1.split('\n') if len(element2) != 0]
                for element1 in data if len(element1.strip('\n')) != 0]

        self.sentenceList = [[element2[0] for element2 in element1]
                             for element1 in data]

        self.tagList = [[element2[1] for element2 in element1]
                        for element1 in data]

        if self.config['model']['pretrained_model'] == 'Bert':
            self.tokenizer = BertTokenizer.from_pretrained(
                self.config['model']['bert_base_chinese'], do_lower_case=True)

        if self.config['model']['pretrained_model'] == 'XLNet':
            self.tokenizer = XLNetTokenizer.from_pretrained(
                self.config['model']['xlnet_base_chinese'],
                do_lower_case=False)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: hw1_template.py Projeto: punxopen28/uml_nlp_class

def load_data(filename):
    with open(filename, 'r') as f:
        data = f.read()

    tokens = data.split()

    return tokens

Exemplo n.º 6

0

Exibir arquivo

 def read_data(self):
     """
     数据加载
     """
     data = open(self.path + "/text8.txt").read()
     data = data.split()
     self.word2freq = {}
     for word in data:
         """
         构建词频
         """
         if self.word2freq.get(word) != None:
             self.word2freq[word] += 1
         else:
             self.word2freq[word] = 1
     word2id = {}
     id2word = {}
     for word in self.word2freq:
         """
         构建word2id
         """
         if self.word2freq[word] < self.min_count:
             continue
         else:
             if word2id.get(word) == None:
                 word2id[word] = len(word2id)
                 id2word[len(id2word)] = word
     self.word2id = word2id
     self.id2word = id2word
     print(len(self.word2id))
     return data

Exemplo n.º 7

0

Exibir arquivo

Arquivo: Kitti_dataloader_training.py Projeto: koutilya-pnvr/SharinGAN

    def __init__(self, root='/vulcanscratch/koutilya/kitti', data_file='train.txt', phase='train',
                 img_transform=None, joint_transform=None, depth_transform=None, depth_resize='bilinear'):
      
        self.root = root
        self.data_file = data_file
        self.files = []
        self.phase = phase
        self.img_transform = img_transform
        self.joint_transform = joint_transform
        self.depth_transform = depth_transform
        self.depth_resize = depth_resize
        depth_path = ''
        if self.depth_resize == 'bilinear':
            depth_path = 'Bilinear_model_pseudo_labels'
        elif self.depth_resize == 'bicubic':
            depth_path = 'Bicubic_model_pseudo_labels'

        with open(osp.join(self.root, self.data_file), 'r') as f:
            data_list = f.read().split('\n')
            for data in data_list:
                if len(data) == 0:
                    continue
                
                data_info = data.split(' ')

                self.files.append({
                        "l_rgb": data_info[0],
                        "r_rgb": data_info[1],
                        "cam_intrin": data_info[2],
                        "depth": osp.join('Depth_baseline_all_syn', depth_path, data_info[0])
                        })

Exemplo n.º 8

0

Exibir arquivo

Arquivo: vanila_vae.py Projeto: jaywalnut310/Deepest-Face-Manipulation-Challenge

def latent_space_transition(items):  # input is list of tuples of  (a,b)
    load_last_model()
    model.eval()
    data = [im for item in items for im in item[:-1]]
    data = [totensor(i) for i in data]
    data = torch.stack(data, dim=0)
    data = Variable(data)
    if args.cuda:
        data = data.cuda()
    z = model.get_latent_var(data.view(-1, model.nc, model.ndf, model.ngf))
    it = iter(z.split(1))
    z = zip(it, it)
    zs = []
    numsample = 11
    for i, j in z:
        for factor in np.linspace(0, 1, numsample):
            zs.append(i + (j - i) * factor)
    z = torch.cat(zs, 0)
    recon = model.decode(z)

    it1 = iter(data.split(1))
    it2 = [iter(recon.split(1))] * numsample
    result = zip(it1, it1, *it2)
    result = [im for item in result for im in item]

    result = torch.cat(result, 0)
    torchvision.utils.save_image(result.data,
                                 '../imgs/trans.jpg',
                                 nrow=2 + numsample,
                                 padding=2)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: data_loader.py Projeto: yiming-j/SPLINE-Net

    def preprocess(self):
        """Preprocess the Pixels attribute file."""
        if self.mode == 'train':
            trainFileList = os.listdir(self.train_image_dir)
            random.seed(1234)
            random.shuffle(trainFileList)
            for i, trainFile in enumerate(trainFileList):
                filename = trainFile
                n = trainFile[:-4].split('_')
                #            ind = int(n[0])
                nx = float(n[1].split('=')[1])
                ny = float(n[2].split('=')[1])
                nz = float(n[3].split('=')[1])
                normal = [nx, ny, nz]
                self.train_dataset.append([filename, normal])

        if self.mode == 'test':
            testFileList = os.listdir(self.test_image_dir)
            f = open(os.path.join(self.test_image_dir, testFileList[0]))
            data = f.read()
            lines = data.split('\n')
            for i in range(len(lines) - 1):
                line = lines[i].split('\\')
                ind = int(line[0])
                nx = float(line[1])
                ny = float(line[2])
                nz = float(line[3])
                normal = [nx, ny, nz]
                mapsind = [int(l) for l in line[4:14]]
                maps = [float(l) for l in line[14:24]]
                self.test_dataset.append([ind, normal, mapsind, maps])

        print('Finished preprocessing the Pixels dataset...')

Exemplo n.º 10

0

Exibir arquivo

    def __init__(self,
                 root='./datasets',
                 data_file='train.txt',
                 phase='train',
                 img_transform=None,
                 depth_transform=None,
                 joint_transform=None):
        self.root = root
        self.data_file = data_file
        self.files = []
        self.phase = phase
        self.img_transform = img_transform
        self.depth_transform = depth_transform
        self.joint_transform = joint_transform
        self.to_tensor = torchvision.transforms.ToTensor()

        with open(osp.join('./datasets/vkitti/', self.data_file), 'r') as f:
            data_list = f.read().split('\n')
            for data in data_list:

                if len(data) == 0:
                    continue
                data_info = data.split(' ')

                self.files.append({"rgb": data_info[0], "depth": data_info[1]})

Exemplo n.º 11

0

Exibir arquivo

Arquivo: datasets.py Projeto: zhwzhong/ACMNet

    def __init__(self,
                 root='./datasets/kitti',
                 data_file='train.list',
                 phase='train',
                 joint_transform=None):

        self.root = root
        self.data_file = data_file
        self.files = []
        self.joint_transform = joint_transform
        self.phase = phase
        self.no_gt = False

        with open(osp.join(self.root, self.data_file), 'r') as f:
            data_list = f.read().split('\n')
            for data in data_list:
                if len(data) == 0:
                    continue

                data_info = data.split(' ')

                if len(data_info) == 3:
                    self.files.append({
                        "rgb": data_info[0],
                        "sparse": data_info[1],
                        "gt": data_info[2]
                    })
                else:
                    self.files.append({
                        "rgb": data_info[0],
                        "sparse": data_info[1],
                    })
                    self.no_gt = True
        self.nSamples = len(self.files)

Exemplo n.º 12

0

Exibir arquivo

    def __init__(self,
                 root='./datasets',
                 data_file='test.list',
                 phase='test',
                 img_transform=None,
                 joint_transform=None,
                 depth_transform=None):
        self.root = root
        self.data_file = data_file
        self.files = []
        self.phase = phase
        self.img_transform = img_transform
        self.joint_transform = joint_transform

        with open(osp.join(self.root, self.data_file), 'r') as f:
            data_list = f.read().split('\n')
            for data in data_list:
                if len(data) == 0:
                    continue

                data_info = data.split(' ')

                self.files.append({
                    "rgb": data_info[0],
                })

Exemplo n.º 13

0

Exibir arquivo

 def __obtain(self, file_paths):
     datas, boxes = [], []
     for file_idx, file_path in enumerate(file_paths):
         assert osp.isfile(
             file_path), 'The path : {} is not a file.'.format(file_path)
         listfile = open(file_path, 'r')
         listdata = listfile.read().splitlines()
         listfile.close()
         print('Load [{:d}/{:d}]-th list : {:} with {:} images'.format(
             file_idx, len(file_paths), file_path, len(listdata)))
         for idx, data in enumerate(listdata):
             alls = data.split(' ')
             if '' in alls: alls.remove('')
             assert len(alls) == 6 or len(
                 alls) == 7, 'The {:04d}-th line is wrong : {:}'.format(
                     idx, data)
             datas.append(alls[0])
             box = np.array([
                 float(alls[2]),
                 float(alls[3]),
                 float(alls[4]),
                 float(alls[5])
             ])
             boxes.append(box)
     labels = []
     for idx, data in enumerate(datas):
         assert isinstance(
             data, str), 'The type of data is not correct : {}'.format(data)
         meta = Point_Meta(1, None, boxes[idx], data, self.dataset_name)
         labels.append(meta)
     return datas, labels

Exemplo n.º 14

0

Exibir arquivo

Arquivo: VideoDataset.py Projeto: tobyclh/supervision-by-registration

    def load_list(self, file_lists, num_pts, reset):
        lists = load_file_lists(file_lists)
        print('GeneralDataset : load-list : load {:} lines'.format(len(lists)))

        datas, labels, boxes, face_sizes = [], [], [], []

        for idx, data in enumerate(lists):
            alls = [x for x in data.split(' ') if x != '']

            assert len(alls) == 6 or len(
                alls) == 7, 'The {:04d}-th line in {:} is wrong : {:}'.format(
                    idx, data)
            datas.append(alls[0])
            if alls[1] == 'None':
                labels.append(None)
            else:
                labels.append(alls[1])
            box = np.array([
                float(alls[2]),
                float(alls[3]),
                float(alls[4]),
                float(alls[5])
            ])
            boxes.append(box)
            if len(alls) == 6:
                face_sizes.append(None)
            else:
                face_sizes.append(float(alls[6]))
        self.load_data(datas, labels, boxes, face_sizes, num_pts, reset)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: vanila_vae.py Projeto: jaywalnut310/Deepest-Face-Manipulation-Challenge

def perform_latent_space_arithmatics(
        items):  # input is list of tuples of 3 [(a1,b1,c1), (a2,b2,c2)]
    load_last_model()
    model.eval()
    data = [im for item in items for im in item]
    data = [totensor(i) for i in data]
    data = torch.stack(data, dim=0)
    data = Variable(data)
    if args.cuda:
        data = data.cuda()
    z = model.get_latent_var(data.view(-1, model.nc, model.ndf, model.ngf))
    it = iter(z.split(1))
    z = zip(it, it, it)
    zs = []
    numsample = 11
    for i, j, k in z:
        for factor in np.linspace(0, 1, numsample):
            zs.append((i - j) * factor + k)
    z = torch.cat(zs, 0)
    recon = model.decode(z)

    it1 = iter(data.split(1))
    it2 = [iter(recon.split(1))] * numsample
    result = zip(it1, it1, it1, *it2)
    result = [im for item in result for im in item]

    result = torch.cat(result, 0)
    torchvision.utils.save_image(result.data,
                                 '../imgs/vec_math.jpg',
                                 nrow=3 + numsample,
                                 padding=2)

Exemplo n.º 16

0

Exibir arquivo

 def _makelist(self,readtxt):
     with open(readtxt, 'r') as f :
         data = f.read()
         
     datalist = data.split('\n')
     datalist.pop()
 
     return datalist

Exemplo n.º 17

0

Exibir arquivo

 def load_data(self):
     datas = open(self.path + self.data_name,
                  encoding="utf-8").read().splitlines()
     datas = [
         data.split("		")[-1].split() + [data.split("		")[2]]
         for data in datas
     ]
     datas = sorted(datas, key=lambda x: len(x), reverse=True)
     labels = [int(data[-1]) - 1 for data in datas]
     datas = [data[0:-1] for data in datas]
     if self.word2id == None:
         self.get_word2id(datas)
     for i, data in enumerate(datas):
         datas[i] = " ".join(data).split("<sssss>")
         for j, sentence in enumerate(datas[i]):
             datas[i][j] = sentence.split()
     datas = self.convert_data2id(datas)
     return datas, labels

Exemplo n.º 18

0

Exibir arquivo

Arquivo: data_loader_emd.py Projeto: cuijiang94/age_trans

 def __getitem__(self, index):
     data = self.info[index]
     data = data.split()
     img = Image.open(self.img_dir + data[0])
     img = (np.asarray(img.resize((224, 224))).astype(np.float32) -
            np.array([104., 117., 124.])) / 127.5
     img = torch.Tensor(np.transpose(img, [2, 0, 1])).cuda()
     age = torch.Tensor([int(data[1])]).long().cuda()
     return img, age

Exemplo n.º 19

0

Exibir arquivo

 def __getitem__(self, index):
     data = Image.open(os.path.join(self.file_path, self.ims[index]))
     bic_im = self.trans_bic(data)
     data = data.convert("YCbCr")
     data_y, cb, cr = data.split()
     data = self.trans(data_y)
     # batch must contain tensors, numbers, dicts or lists;
     # data_dict = {'bic':bic_im,'name':self.ims[index]}
     return data, bic_im, self.ims[index]

Exemplo n.º 20

0

Exibir arquivo

 def padding(self, data, max_len):
     data = data.split()
     if len(data) > max_len-2:
         data = data[:max_len-2]
     Y = list(map(lambda t: self.w2id.get(t, 3), data))
     Y = [1] + Y + [2]
     length = len(Y)
     Y = torch.cat([torch.LongTensor(Y), torch.zeros(max_len - length).long()])
     return Y

Exemplo n.º 21

0

Exibir arquivo

    def __init__(self, state="train", k=0, embedding_type="word2vec"):

        self.path = os.path.abspath('')
        if "data" not in self.path:
            self.path += "/data"
        # 导入数据及
        pos_samples = open(self.path + "/MR/rt-polarity.pos",
                           errors="ignore").readlines()
        neg_samples = open(self.path + "/MR/rt-polarity.neg",
                           errors="ignore").readlines()
        datas = pos_samples + neg_samples
        #datas = [nltk.word_tokenize(data) for data in datas]
        datas = [data.split() for data in datas]
        max_sample_length = max([len(sample) for sample in datas
                                 ])  # 求句子最大长度，将所有句子pad成一样的长度
        labels = [1] * len(pos_samples) + [0] * len(neg_samples)
        word2id = {"<pad>": 0}  # 生成word2id
        for i, data in enumerate(datas):
            for j, word in enumerate(data):
                if word2id.get(word) == None:
                    word2id[word] = len(word2id)
                datas[i][j] = word2id[word]
            datas[i] = datas[i] + [0] * (max_sample_length - len(datas[i]))
        self.n_vocab = len(word2id)
        self.word2id = word2id
        if embedding_type == "word2vec":
            self.get_word2vec()
        elif embedding_type == "glove":
            self.get_glove_embedding()
        else:
            pass
        #self.get_word2vec()
        c = list(zip(datas, labels))  # 打乱训练集
        random.seed(1)
        random.shuffle(c)
        datas[:], labels[:] = zip(*c)
        if state == "train":  # 生成训练集
            self.datas = datas[:int(k * len(datas) / 10)] + datas[int(
                (k + 1) * len(datas) / 10):]
            self.labels = labels[:int(k * len(datas) / 10)] + labels[int(
                (k + 1) * len(labels) / 10):]
            self.datas = np.array(self.datas[0:int(0.9 * len(self.datas))])
            self.labels = np.array(self.labels[0:int(0.9 * len(self.labels))])
        elif state == "valid":  # 生成验证集
            self.datas = datas[:int(k * len(datas) / 10)] + datas[int(
                (k + 1) * len(datas) / 10):]
            self.labels = labels[:int(k * len(datas) / 10)] + labels[int(
                (k + 1) * len(labels) / 10):]
            self.datas = np.array(self.datas[int(0.9 * len(self.datas)):])
            self.labels = np.array(self.labels[int(0.9 * len(self.labels)):])
        elif state == "test":  # 生成测试集
            self.datas = np.array(
                datas[int(k * len(datas) / 10):int((k + 1) * len(datas) / 10)])
            self.labels = np.array(labels[int(k * len(datas) /
                                              10):int((k + 1) * len(datas) /
                                                      10)])

Exemplo n.º 22

0

Exibir arquivo

def extract_vertices(pth):

    f = open(pth, "r")
    data = f.read()
    data = [i.lstrip("ï»¿") for i in data.split('\n') if i != '']
    data = [i.split(',') for i in data]
    data = np.array(data)
    vertices = np.array(np.array([list(map(int, i[:8]))
                                  for i in data])).astype(np.int32)
    labels = np.array([0 if '###' in i else 1 for i in data])
    return vertices.reshape(labels.shape[0], 4, 2), labels

Exemplo n.º 23

0

Exibir arquivo

Arquivo: datasets.py Projeto: kklemon/text-gan-experiments

    def __init__(self,
                 path,
                 seq_len,
                 split=b'\n',
                 pad_idx=0,
                 eos_idx=1,
                 cache_dir='.cache'):
        super().__init__()

        self.path = path
        self.seq_len = seq_len

        self.pad_idx = pad_idx
        self.eos_idx = eos_idx

        self.vocab_size = 128

        data = Path(path).read_bytes()

        m = hashlib.sha256()
        m.update(data)
        m.update(str((self.seq_len, self.pad_idx, self.eos_idx)).encode())

        if cache_dir is not None:
            cache_path = Path(cache_dir) / (m.hexdigest() + '.p')
            if cache_path.exists():
                print(f'Using cached file {cache_path}')
                self.samples = torch.load(str(cache_path))
                return

        if isinstance(split, str):
            split = split.encode('utf8')
        lines = data.split(split)

        self.samples = torch.full([len(lines), seq_len],
                                  fill_value=pad_idx,
                                  dtype=torch.long)

        for i, line in enumerate(tqdm(lines)):
            line = line[:seq_len]
            self.samples[i, :len(line)] = torch.tensor(list(line),
                                                       dtype=torch.long)

        if eos_idx is not None:
            lengths = torch.tensor(list(map(len, lines)),
                                   dtype=torch.long).clamp_max(seq_len - 1)
            self.samples[torch.arange(len(lines)), lengths] = eos_idx

        if cache_dir is not None:
            cache_path = Path(cache_dir) / (m.hexdigest() + '.p')
            cache_path.parent.mkdir(exist_ok=True)
            torch.save(self.samples, cache_path)

Exemplo n.º 24

0

Exibir arquivo

    def __init__(self,
                 root,
                 transform=None,
                 target_transform=None,
                 download=True):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform

        if download:
            self.download()

        if not self._check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' You can use download=True to download it')

        self.data = []
        self.labels = []
        fp = os.path.join(root, self.filename)
        file = open(fp, 'r')
        data = file.read()
        file.close()
        dataSplitted = data.split("\n")[:-1]
        datasetLength = len(dataSplitted)
        i = 0
        while i < datasetLength:
            # Get the 'i-th' row
            strings = dataSplitted[i]

            # Split row into numbers(string), and avoid blank at the end
            stringsSplitted = (strings[:-1]).split(" ")

            # Get data (which ends at column 256th), then in a numpy array.
            rawData = stringsSplitted[:256]
            dataFloat = [float(j) for j in rawData]
            img = np.array(dataFloat[:16])
            j = 16
            k = 0
            while j < len(dataFloat):
                temp = np.array(dataFloat[k:j])
                img = np.vstack((img, temp))

                k = j
                j += 16

            self.data.append(img)

            # Get label and convert it into numbers, then in a numpy array.
            labelString = stringsSplitted[256:]
            labelInt = [int(index) for index in labelString]
            self.labels.append(np.array(labelInt))
            i += 1

Exemplo n.º 25

0

Exibir arquivo

def plot_text_loss():
    txt_file = 'p1a_validate.txt'
    plot_x = []
    plot_y = []
    with open(txt_file, 'r') as f:
        for line in f:
            data = line.strip()
            data = data.split(' ')
            plot_x.append(int(data[0]))
            plot_y.append(float(data[1]))
    plt.plot(plot_x, plot_y, 'b')
    plt.title('validate accuracy')
    plt.show()

Exemplo n.º 26

0

Exibir arquivo

def plot_training_loss():
    txt_file = 'p1a_trainloss.txt'
    plot_x = []
    plot_y = []
    with open(txt_file, 'r') as f:
        for line in f:
            data = line.strip()
            data = data.split(' ')
            plot_x.append(int(data[0]))
            plot_y.append(float(data[1]))
    plt.plot(plot_x, plot_y, 'b')
    plt.title('training loss')
    plt.show()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: flickr.py Projeto: sutravemohinder/Wrangling_with_Python_3

 def handle_data(self, data):
     if self.in_table:
         if data == 'Image Not Found':
             self.current_img = None
         elif self.current_tag == 'a':
             img_id = data.split('/')[-2]
             img_id = os.path.join(self.root, img_id + '_*.jpg')
             img_id = glob.glob(img_id)[0]
             self.current_img = img_id
             self.annotations[img_id] = []
         elif self.current_tag == 'li' and self.current_img:
             img_id = self.current_img
             self.annotations[img_id].append(data.strip())

Exemplo n.º 28

0

Exibir arquivo

    def _parse_list(self):
        self.video_list = []

        if self.phase == 'Fntest':
            vid = 0
            for x in open(self.list_file):
                idx = 0
                for i in range(self.n_times):
                    for j in range(self.crop_num):
                        data = x.strip().split(' ')[0]
                        name = data.split('/')[-1].split('.')[0]
                        path = self.root_path
                        if os.path.exists(os.path.join(path, name)):
                            self.video_list.append(VideoRecord([name, x.split(' ')[1], x.split(
                                ' ')[2]], self.root_path, phase='Val', copy_id=i, crop=j, vid=vid))
                            idx += 1
                vid += 1

        elif self.phase == 'Val':
            for x in open(self.list_file):
                data = x.strip().split(' ')[0]
                name = data.split('/')[-1].split('.')[0]
                path = self.root_path

                if os.path.exists(os.path.join(path, name)):
                    self.video_list.append(VideoRecord(
                        [name, x.split(' ')[1], x.split(' ')[2]], self.root_path, ))

        else:
            for x in open(self.list_file):
                data = x.strip().split(' ')[0]
                name = data.split('/')[-1].split('.')[0]
                path = self.root_path
                if os.path.exists(os.path.join(path, name)):
                    self.video_list.append(VideoRecord(
                        [name, x.split(' ')[1], x.split(' ')[2]], self.root_path, ))
            self.rng.shuffle(self.video_list)

Exemplo n.º 29

0

Exibir arquivo

    def _create_class_idx_dict_val(self):
        val_annotations_file = os.path.join(self.val_dir, "val_annotations.txt")
        self.val_img_to_class = {}
        set_of_classes = set()
        with open(val_annotations_file, 'r') as fo:
            entry = fo.readlines()
            for data in entry:
                words = data.split("\t")
                self.val_img_to_class[words[0]] = words[1]
                set_of_classes.add(words[1])

        self.len_dataset = len(list(self.val_img_to_class.keys()))
        classes = sorted(list(set_of_classes))
        self.class_to_tgt_idx = {classes[i]: i for i in range(len(classes))}
        self.tgt_idx_to_class = {i: classes[i] for i in range(len(classes))}

Exemplo n.º 30

0

Exibir arquivo

    def padding(data, max_len, language):
        if language == 'src':  # source language
            vocabs = src_vocabs
        elif language == 'tgt':  # target language
            vocabs = tgt_vocabs

        data = data.split()
        if len(data) > max_len - 2:
            data = data[:max_len - 2]
        Y = list(map(lambda t: vocabs.get(t, 3), data))
        Y = [1] + Y + [2]
        length = len(Y)
        Y = torch.cat(
            [torch.LongTensor(Y),
             torch.zeros(max_len - length).long()])
        return Y

Exemplo n.º 31

0

Exibir arquivo

Arquivo: dataset.py Projeto: lpw007/ImageSentimentClassify

 def __getitem__(self, index):
     '''
     返回一张图片的数据
     '''
     index = self.img_idxs[index]
     data = self.image_data[index]
     data = data.split()
     input = [float(i) for i in data]
     input = np.array(input)
     input = input.reshape([48,48])/255.
     image = Image.fromarray(input) #生成image对象
     data = self.transforms(image)
     if self.test:
         id = index
         return id,data
     else:
         # label = self.image_label[index]
         #完成多类别的label转化为one-hot label vector
         # label_vector = torch.zeros(1,7).scatter_(1,label,1)
         label_vector = self.label_tensor[index]
         return label_vector,data

Exemplo n.º 32

0

Exibir arquivo

Arquivo: GeneralDataset.py Projeto: Zumbalamambo/supervision-by-registration

  def load_list(self, file_lists, num_pts, reset):
    lists = load_file_lists(file_lists)
    print ('GeneralDataset : load-list : load {:} lines'.format(len(lists)))

    datas, labels, boxes, face_sizes = [], [], [], []

    for idx, data in enumerate(lists):
      alls = [x for x in data.split(' ') if x != '']
      
      assert len(alls) == 6 or len(alls) == 7, 'The {:04d}-th line in {:} is wrong : {:}'.format(idx, data)
      datas.append( alls[0] )
      if alls[1] == 'None':
        labels.append( None )
      else:
        labels.append( alls[1] )
      box = np.array( [ float(alls[2]), float(alls[3]), float(alls[4]), float(alls[5]) ] )
      boxes.append( box )
      if len(alls) == 6:
        face_sizes.append( None )
      else:
        face_sizes.append( float(alls[6]) )
    self.load_data(datas, labels, boxes, face_sizes, num_pts, reset)