Exemplo n.º 1
0
 def __getitem__(self, idx):
     image_root = self.train_image_file_paths[idx]
     image_name = image_root.split('/')[-1]
     image = Image.open(image_root)
     #print(image)
     fix_size = (160, 60)
     image = image.resize(fix_size)
     # print(image_name)
     if self.transform is not None:
         image = self.transform(image)
     # print(image_name)
     if ('_' in image_name):
         label = ohe.encode(image_name.split('_')[0].upper())
     else:
         label = ohe.encode(image_name.split('.')[0].upper())
     return image, label, image_name
Exemplo n.º 2
0
 def __getitem__(self, idx):
     image_root = self.train_image_file_paths[idx]
     image_name = image_root.split(os.path.sep)[-1]
     image = Image.open(image_root)
     if self.transform is not None:
         image = self.transform(image)
     label = ohe.encode(image_name.split('_')[0])
     return image, label
Exemplo n.º 3
0
 def __getitem__(self, idx):
     image_root = self.train_image_file_paths[idx]
     image_name = image_root.split(os.path.sep)[-1]
     image = Image.open(image_root)
     if self.transform is not None:
         image = self.transform(image)
     label = ohe.encode(image_name.split('_')[0]) # 为了方便,在生成图片的时候,图片文件的命名格式 "4个数字或者数字_时间戳.PNG", 4个字母或者即是图片的验证码的值,字母大写,同时对该值做 one-hot 处理
     return image, label
Exemplo n.º 4
0
 def __getitem__(self, idx):
     image_root = self.train_image_file_paths[idx]
     image_name = image_root.split(os.path.sep)[-1]
     image = Image.open(image_root)
     if self.transform is not None:
         image = self.transform(image)
     label = ohe.encode(
         image_name.split('_')[0]
     )  # For convenience, we name the img file as "4letters_timestamp.PNG", and then one-hot encode it
     return image, label
Exemplo n.º 5
0
 def _preload(self):
     """preload dataset to momory"""
     self.labels = []
     self.images = []
     for image_fn in self.filenames:
         image = cv2.imread(image_fn)
         self.images.append(image.copy())
         label_string = image_fn[-8:-4]
         label = ohe.encode(label_string)
         self.labels.append(label)
Exemplo n.º 6
0
 def __getitem__(self, idx):
     image_root = self.train_image_file_paths[idx]
     image_name = image_root.split(os.path.sep)[-1]
     image = Image.open(image_root)
     image = image.resize((self.iw, self.ih))
     # print("width" + str(image.width) + " height" + str(image.height))
     if self.transform is not None:
         image = self.transform(image)
     x1 = image[np.newaxis, :]
     name_ = image_name[0:4]
     label = ohe.encode(
         name_
     )  # 为了方便,在生成图片的时候,图片文件的命名格式 "4个数字_时间戳.PNG", 4个数字即是图片的验证码的值,同时对该值做 one-hot 处理
     return image, label
Exemplo n.º 7
0
 def __getitem__(self, idx):
     """Get a sample from the dataset"""
     if self.images:
         # if preload
         image = self.images[idx]
         label = self.labels[idx]
     else:
         image_name = self.filenames[idx]
         image = cv2.imread(image_name, 0)
         label_string = image_name[-8:-4]
         label = ohe.encode(label_string)
     # if we have transform functions
     if self.transform:
         image = self.transform(image)
     return image, label
Exemplo n.º 8
0
    # generate nouns

    #noun_file_main = main_file + str(i) + 'n' + '.txt'
    #noun_eval_file = eval_file + 'n' + '.txt'

    ## keywords extracted from the main file will normalize all the values
    #normalizing_val = extract_keywords(main_file+str(i)+'.txt',noun_file_main)
    #print(normalizing_val)
    #if normalizing_val>val:
    #           val = normalizing_val
    #           temp = i
    #           print(normalizing_val)

    #print(val,temp)

    ## keywords extracted from the eval file will be stored for clustering
    #count_eval = extract_keywords(eval_file+str(i)+'.txt',eval_file+str(i)+'n'+'.txt')
    #if count_eval <10:
    #os.remove(eval_file+str(i)+'.txt')
    #os.remove(eval_file+str(i)+'n'+'.txt')
    #print("kam",count_eval)
    #print(eval_file+str(i)+'.txt')
    # one hot encoding
    encode(main_file + 'n' + '.txt', eval_file + str(i) + '.txt', 0)
    #print(x,main_score)
# store the results in a file to be used by clustering
# value to be store is main_score and count_eval
#results_file = '/home/manan/Desktop/Research/Learning-Perspectives/results/scores.txt'
#found = count_keywords(eval_file + str(i)+'n'+'.txt')
#write_score(x,main_score,found,results_file)