def __getitem__(self, idx): image_root = self.train_image_file_paths[idx] image_name = image_root.split('/')[-1] image = Image.open(image_root) #print(image) fix_size = (160, 60) image = image.resize(fix_size) # print(image_name) if self.transform is not None: image = self.transform(image) # print(image_name) if ('_' in image_name): label = ohe.encode(image_name.split('_')[0].upper()) else: label = ohe.encode(image_name.split('.')[0].upper()) return image, label, image_name
def __getitem__(self, idx): image_root = self.train_image_file_paths[idx] image_name = image_root.split(os.path.sep)[-1] image = Image.open(image_root) if self.transform is not None: image = self.transform(image) label = ohe.encode(image_name.split('_')[0]) return image, label
def __getitem__(self, idx): image_root = self.train_image_file_paths[idx] image_name = image_root.split(os.path.sep)[-1] image = Image.open(image_root) if self.transform is not None: image = self.transform(image) label = ohe.encode(image_name.split('_')[0]) # 为了方便,在生成图片的时候,图片文件的命名格式 "4个数字或者数字_时间戳.PNG", 4个字母或者即是图片的验证码的值,字母大写,同时对该值做 one-hot 处理 return image, label
def __getitem__(self, idx): image_root = self.train_image_file_paths[idx] image_name = image_root.split(os.path.sep)[-1] image = Image.open(image_root) if self.transform is not None: image = self.transform(image) label = ohe.encode( image_name.split('_')[0] ) # For convenience, we name the img file as "4letters_timestamp.PNG", and then one-hot encode it return image, label
def _preload(self): """preload dataset to momory""" self.labels = [] self.images = [] for image_fn in self.filenames: image = cv2.imread(image_fn) self.images.append(image.copy()) label_string = image_fn[-8:-4] label = ohe.encode(label_string) self.labels.append(label)
def __getitem__(self, idx): image_root = self.train_image_file_paths[idx] image_name = image_root.split(os.path.sep)[-1] image = Image.open(image_root) image = image.resize((self.iw, self.ih)) # print("width" + str(image.width) + " height" + str(image.height)) if self.transform is not None: image = self.transform(image) x1 = image[np.newaxis, :] name_ = image_name[0:4] label = ohe.encode( name_ ) # 为了方便,在生成图片的时候,图片文件的命名格式 "4个数字_时间戳.PNG", 4个数字即是图片的验证码的值,同时对该值做 one-hot 处理 return image, label
def __getitem__(self, idx): """Get a sample from the dataset""" if self.images: # if preload image = self.images[idx] label = self.labels[idx] else: image_name = self.filenames[idx] image = cv2.imread(image_name, 0) label_string = image_name[-8:-4] label = ohe.encode(label_string) # if we have transform functions if self.transform: image = self.transform(image) return image, label
# generate nouns #noun_file_main = main_file + str(i) + 'n' + '.txt' #noun_eval_file = eval_file + 'n' + '.txt' ## keywords extracted from the main file will normalize all the values #normalizing_val = extract_keywords(main_file+str(i)+'.txt',noun_file_main) #print(normalizing_val) #if normalizing_val>val: # val = normalizing_val # temp = i # print(normalizing_val) #print(val,temp) ## keywords extracted from the eval file will be stored for clustering #count_eval = extract_keywords(eval_file+str(i)+'.txt',eval_file+str(i)+'n'+'.txt') #if count_eval <10: #os.remove(eval_file+str(i)+'.txt') #os.remove(eval_file+str(i)+'n'+'.txt') #print("kam",count_eval) #print(eval_file+str(i)+'.txt') # one hot encoding encode(main_file + 'n' + '.txt', eval_file + str(i) + '.txt', 0) #print(x,main_score) # store the results in a file to be used by clustering # value to be store is main_score and count_eval #results_file = '/home/manan/Desktop/Research/Learning-Perspectives/results/scores.txt' #found = count_keywords(eval_file + str(i)+'n'+'.txt') #write_score(x,main_score,found,results_file)