Exemplo n.º 1
0
def loadValidationData(percentage=0.3, size=224):
    '''
    percentage: only load the last percentage*100% as validation data
    size: image will be resize to size*size
    return data, label
    data: Variable(torch.FloatTensor) ( 3764*percentage, 3, size, size)
    label: torch.LongTensor (3764*percentage)
    '''
    # constant
    labelPath = '../json/label.dat'
    imgPath = '../train/'

    # make label dict
    labelDict = dict()
    labelFile = open(labelPath, 'r')
    for line in labelFile:
        idx, brand = line.strip().split(" ")
        labelDict[brand] = int(idx)
    labelFile.close()

    # get the number of training pic
    valsize = 0
    for brand in labelDict:
        brandPath = imgPath + brand
        allImg = os.listdir(brandPath)
        totalImg = len(allImg)
        num = totalImg - int(totalImg * (1 - percentage))
        valsize += num

    valx = torch.empty(valsize, 3, size, size)
    valy = []

    # read image. brand by brand
    count = 0
    for brand in labelDict:
        # for each brand, get the num of imgs in training set
        brandPath = imgPath + brand
        allImg = os.listdir(brandPath)
        totalImg = len(allImg)
        num = int(totalImg * (1 - percentage))
        # select first num of imgs
        for idx, img in enumerate(allImg):
            if idx >= num:
                imgTmp = preprocess.readImage(brandPath + "/" + img)
                imgTmp2 = preprocess.preprocess(imgTmp, size)
                imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type(
                    torch.FloatTensor)
                valy.append(labelDict[brand])
                valx[count:(count + 1)] = imgout
                count += 1
    # end1 = time.time()
    # print("finish computation in " + str(end1 -start) )
    # convert to Tensor
    label = torch.LongTensor(valy)
    data = Variable(valx)
    print(count)
    print(label.shape)
    print(data.shape)
    # return
    return data, label
Exemplo n.º 2
0
def loadImagesList(filelist, size):
    '''
    load images in a list of filename.
    filelist: list [str, str, ...]. a list of filename
    return: Variable(torch.FloatTensor) ( len(filelist), 3, size, size)
    '''
    # constant
    labelPath = '../json/label.dat'
    imgPath = '../train/'

    valsize = len(filelist)
    valx = torch.empty(valsize, 3, size, size)

    # read image.
    count = 0
    # select first num of imgs
    for imgName in filelist:
        imgTmp = preprocess.readImage(imgName)
        imgTmp2 = preprocess.preprocess(imgTmp, size)
        imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1,
                                                   2).type(torch.FloatTensor)
        valx[count:(count + 1)] = imgout
        count += 1

    data = Variable(valx)
    print(count)
    print(data.shape)

    return data
Exemplo n.º 3
0
def loadValDataFromFile(valFile, size=224, percentage=1.0):
    '''
    load validation data from a txt file.
    the format of valFile: "spu_id img_path category_id"
    percentage: only take percentage*100% of whole images in valFile as validation set.
    percentage = 0.02 is usually used as validation set in learning curve
    size: image will be resize to size*size
    return data, label
    data: Variable(torch.FloatTensor) ( num of images , 3, size, size)
    label: torch.LongTensor (num of images)
    '''
    val = open(valFile, 'r')

    count = 0
    for line in val:
        count += 1
    valsize = int(count * percentage)

    val.seek(0)

    valy = []
    valx = torch.empty(valsize, 3, size, size)

    count = 0
    for line in val:
        if count < valsize:
            spuid, img, cat = line.strip().split(" ")
            impTmp = preprocess.readImage(img)
            imgTmp2 = preprocess.preprocess(imgTmp, size)
            imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type(
                torch.FloatTensor)
            valy.append(int(cat))
            valx[count:(count + 1)] = imgout
            count += 1

    label = torch.LongTensor(valy)
    data = Variable(valx)
    print(count)
    print(label.shape)
    print(data.shape)
    # return
    return data, label
Exemplo n.º 4
0
def loadTrainDataFromFile(trainFile, size=224, percentage=1):
    '''
    load training data from a txt file.
    the format of trainFile: "spu_id img_path, category_id"
    percentage: only take percentage*100% of images in trainFile as training set.
    percentage = 0.1 is usually used for debug.
    size: images will be resize to size * size. 
    return: data, label
    data: torch.FloatTensor ( num of images, 3, size, size)
    label: torch.LongTensor ( num of images )
    '''
    train = open(trainFile, 'r')

    count = 0
    for line in train:
        count += 1
    trainsize = int(count * percentage)

    train.seek(0)

    trainy = []
    trainx = torch.empty(trainsize, 3, size, size)

    count = 0
    for line in train:
        if count < trainsize:
            idx, img, cat = line.strip().split(" ")
            impTmp = preprocess.readImage(img)
            imgTmp2 = preprocess.preprocess(imgTmp, size)
            imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type(
                torch.FloatTensor)
            trainy.append(int(cat))
            trainx[count:(count + 1)] = imgout
            count += 1

    label = torch.LongTensor(trainy)
    print(count)
    print(label.shape)
    print(trainx.shape)
    return trainx, label
Exemplo n.º 5
0
def loadTrainData(percentage=1.0, size=224):
    '''
    percentage: only load first percentage*100% as training set.
    size: images will be resize to size * size. 
    return: data, label
    data: torch.FloatTensor ( 3764*percentage, 3, size, size)
    label: torch.LongTensor (3764*percentage)
    '''
    # start = time.time()
    # constant
    labelPath = '../json/label.dat'
    imgPath = '../train/'

    # make label dict
    labelDict = dict()
    labelFile = open(labelPath, 'r')
    for line in labelFile:
        idx, brand = line.strip().split(" ")
        labelDict[brand] = int(idx)
    labelFile.close()

    # get the number of training pic
    trainsize = 0
    for brand in labelDict:
        brandPath = imgPath + brand
        allImg = os.listdir(brandPath)
        totalImg = len(allImg)
        num = int(totalImg * percentage)
        trainsize += num
    trainx = torch.empty(trainsize, 3, size, size)
    trainy = []

    # read image. brand by brand
    count = 0
    for brand in labelDict:
        # for each brand, get the num of imgs in training set
        brandPath = imgPath + brand
        allImg = os.listdir(brandPath)
        totalImg = len(allImg)
        num = int(totalImg * percentage)
        # select first num of imgs
        for idx, img in enumerate(allImg):
            if idx == 1000:
                print(img)
                # output should be:
                # d792d0eba6b447049a28686b9298915d.jpg
                # 6df9e31532af4810a02d5abb9548f1f1.jpg
            if idx < num:
                imgTmp = preprocess.readImage(brandPath + "/" + img)
                imgTmp2 = preprocess.preprocess(imgTmp, size)
                imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type(
                    torch.FloatTensor)
                trainy.append(labelDict[brand])
                trainx[count:(count + 1)] = imgout
                count += 1
    # end1 = time.time()
    # print("finish computation in " + str(end1 -start) )
    # convert to Tensor
    label = torch.LongTensor(trainy)
    print(count)
    print(label.shape)
    print(trainx.shape)
    # return
    return trainx, label