def __init__(self): """initiate the Runner class""" self.fm = FileManager() self.dp = DataPrepper() self.tr = None self.de = None self.__location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__)))
def __init__(self, transforms, video_file, *args): self.video_name = video_file.split('/')[-1].split('.')[0] start = int(self.video_name.split('_')[-1]) self.fm = FileManager() for i in args: self.pfm = i self.pid = self.pfm.pid self.transforms = transforms self.img_files = [] cap = cv2.VideoCapture(video_file) self.height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) self.len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) self.frames = [] count = start for i in range(self.len): ret, frame = cap.read() if not ret: print("Couldn't read frame " + str(i) in video_file + ". Using last good frame", file=sys.stderr) break else: name = 'Frame_{}.jpg'.format(count) self.img_files.append(name) self.frames.append(frame) count += 1 print('video: ', self.video_name, '\tFrame: ', count) cap.release()
class Runner: """user-friendly class for accessing the majority of module's functionality.""" def __init__(self): """initiate the Runner class""" self.fm = FileManager() self.dp = DataPrepper() self.tr = None self.de = None self.__location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) def download(self): """download all required data.""" self.dp.download_all() def prep(self): """prep downloaded data""" self.dp.prep() def train(self, num_epochs, upload_results=True): """initiate a Trainer object and train the model. Args: num_epochs (int): number of epochs to train upload_results(bool): if True, automatically upload the results (weights, logs, etc.) after training """ self.tr = Trainer(num_epochs, upload_results) self.tr.train() def sync(self): self.fm.sync_training_dir() def detect(self, img_dir): # self.down = DetectDownload() # master, i_dir, files = self.down._locate_cloud_files() # self.down.download(i_dir, files) self.de = Detector() if img_dir == 'test': self.de.test(5) elif img_dir == 'fullvideo': path = '/Users/rhiyasharma/Documents/_McGrathLab/CD_work/videos/short_ten.mp4' self.de.frame_detect(path) else: self.de.detect(img_dir)
def __init__(self, pid, video_path, video, csv_file, *args): self.fm = FileManager() self.track = Tracking() for i in args: self.pfm = i self.detection_dir = self.fm.local_files['detection_dir'] self.video = video_path self.video_name = video.split('.')[0] self.ann_video_name = 'annotated_' + pid + '_' + self.video_name + '_p2.mp4' self.csv_file_path = join(self.detection_dir, csv_file)
def __init__(self, num_epochs, compare_annotations=True): """initialize trainer Args: num_epochs (int): number of epochs to train compare_annotations: If True, evaluate the model on the test set after each epoch. This does not affect the end result of training, but does produce more data about model performance at each epoch. Setting to True also increases total runtime significantly """ self.compare_annotations = compare_annotations self.fm = FileManager() self.num_epochs = num_epochs self._initiate_loaders() self._initiate_model() self._initiate_loggers()
def __init__(self, transforms, subset): """initialize DataLoader Args: transforms: Composition of Pytorch transformations to apply to the data when loading subset (str): data subset to use, options are 'train' and 'test' """ self.fm = FileManager() self.files_list = self.fm.local_files['{}_list'.format(subset)] self.img_dir = self.fm.local_files['{}_image_dir'.format(subset)] self.transforms = transforms # open either train_list.txt or test_list.txt and read the image file names with open(self.files_list, 'r') as f: self.img_files = sorted([os.path.join(self.img_dir, fname) for fname in f.read().splitlines()]) # generate a list of matching label file names label_dir = self.fm.local_files['label_dir'] self.label_files = [fname.replace('.jpg', '.txt') for fname in self.img_files] self.label_files = [join(label_dir, basename(path)) for path in self.label_files]
def __init__(self): self.fm = FileManager() self.fig_dir = self.fm.local_files['figure_dir'] self.fig_data_dir = join(self.fig_dir, 'figure_data') self._load_data()
class Plotter: def __init__(self): self.fm = FileManager() self.fig_dir = self.fm.local_files['figure_dir'] self.fig_data_dir = join(self.fig_dir, 'figure_data') self._load_data() def save_fig(self, fig: Figure, file_stub: str): """save the figure as a pdf and close it Notes: saves the figure to the figure_dir specified in the FileManager object Args: fig (Figure): figure to save file_stub (str): name to use for the file. Don't include '.pdf' """ fig.savefig(join(self.fig_dir, '{}.pdf'.format(file_stub))) plt.close('all') def plot_all(self): """create pdf's of every plot this class can produce""" self.total_loss_vs_epoch() self.n_boxes_vs_epoch() self.animated_learning() self.iou_vs_epoch() self.final_epoch_eval() @plotter_decorator def total_loss_vs_epoch(self, fig: Figure): """plot the training loss vs epoch and save as loss_vs_epoch.pdf Args: fig (Figure): matplotlib Figure object into which to plot """ ax = fig.add_subplot(111) ax.set(xlabel='epoch', ylabel='total loss', title='Training Loss vs. Epoch') sns.lineplot(data=self.train_log.loss_total, ax=ax) self.train_log.loc[:, ['loss_total']].to_csv( join(self.fig_data_dir, 'total_loss_vs_epoch.csv')) @plotter_decorator def n_boxes_vs_epoch(self, fig: Figure): """plot the average number of boxes predicted per frame vs the epoch""" predicted = pd.Series( [df.boxes.apply(len).agg('mean') for df in self.epoch_predictions]) actual = pd.Series([self.ground_truth.boxes.apply(len).agg('mean')] * len(predicted)) ax = fig.add_subplot(111) ax.set(xlabel='epoch', ylabel='avg # detections', title='Average Number of Detections vs. Epoch') sns.lineplot(data=predicted, ax=ax, label='predicted') sns.lineplot(data=actual, ax=ax, label='actual') df = pd.DataFrame({'predicted': predicted, 'actual': actual}) df.to_csv(join(self.fig_data_dir, 'n_boxes_vs_epoch.csv')) @plotter_decorator(save=False) def animated_learning(self, fig: Figure): """for a single frame, successively plot the predicted boxes and labels at each epoch to create an animation""" # find a frame with a good balance of number of fish and final-epoch score for each box, and load that image final_epoch = self.epoch_predictions[-1].copy() final_epoch['n_detections'] = final_epoch['labels'].apply(len) final_epoch['min_score'] = final_epoch['scores'].apply( lambda x: 0 if len(x) is 0 else min(x)) final_epoch = final_epoch[final_epoch.min_score > 0.95] frame = final_epoch.sort_values(by=['n_detections', 'min_score'], ascending=False).iloc[0].name im = np.array(Image.open( join(self.fm.local_files['test_image_dir'], frame)), dtype=np.uint8) # build up the animation max_detections = 5 ax = fig.add_subplot(111) plt.xlim(0, im.shape[1]) plt.ylim(im.shape[0], 0) boxes = [ Rectangle((0, 0), 0, 0, fill=False) for _ in range(max_detections) ] def init(): for box in boxes: ax.add_patch(box) return boxes def animate(i): label_preds = self.epoch_predictions[i].loc[frame, 'labels'] label_preds = (label_preds + ([0] * max_detections))[:5] box_preds = self.epoch_predictions[i].loc[frame, 'boxes'] box_preds = [xyminmax_to_xywh(*p) for p in box_preds] box_preds = (box_preds + ([[0, 0, 0, 0]] * max_detections))[:5] color_lookup = {0: 'None', 1: '#FF1493', 2: '#00BFFF'} for j in range(5): boxes[j].set_xy([box_preds[j][0], box_preds[j][1]]) boxes[j].set_width(box_preds[j][2]) boxes[j].set_height(box_preds[j][3]) boxes[j].set_edgecolor(color_lookup[label_preds[j]]) return boxes anim = FuncAnimation(fig, animate, init_func=init, frames=len(self.epoch_predictions), blit=True, interval=200, repeat=False) ax.imshow(im, zorder=0) anim.save(join(self.fig_dir, 'animated_learning.gif'), writer='imagemagick') plt.close('all') @plotter_decorator def iou_vs_epoch(self, fig: Figure): ious = [] for ep in range(len(self.epoch_predictions)): ious.append(self._calc_epoch_iou(ep)) ax = fig.add_subplot(111) ax.set(xlabel='epoch', ylabel='average iou', title='IOU score vs. Epoch') sns.lineplot(data=pd.Series(ious), ax=ax) pd.DataFrame({ 'iou': ious }).to_csv(join(self.fig_data_dir, 'iou_vs_epoch.csv')) @plotter_decorator def final_epoch_eval(self, fig: Figure): fig.set_size_inches(11, 8.5) epoch_index = len(self.epoch_predictions) - 1 df, summary = self._full_epoch_eval(epoch_index) df = df.reset_index() no_err_val = df[df.n_boxes_predicted_error == 0].count()['Framefile'] err_val = df[df.n_boxes_predicted_error != 0].count()['Framefile'] pos = df[df.n_boxes_predicted_error > 0].count()['Framefile'] neg = df[df.n_boxes_predicted_error < 0].count()['Framefile'] ax1 = fig.add_subplot(221) # top left ax2 = fig.add_subplot(222) # top right ax3 = fig.add_subplot(223) # bottom left ax4 = fig.add_subplot(224) # bottom right sns.distplot(df.n_boxes_predicted_error, hist_kws=dict(edgecolor="k", linewidth=0.5), norm_hist=False, kde=False, ax=ax1) ax1.set(xlabel='Error Score', ylabel='No. of Frames') ax1.set_title("Distribution of Frames Over Error Values", fontsize=10) ax2.bar(x=['No Error', 'Error'], height=[no_err_val, err_val], color=['green', 'red'], width=0.4) ax2.set_ylabel('No. of Framefiles', fontsize=10) ax2.set_title('No. of Frames With Error vs Without Error', fontsize=10) sns.distplot(df.average_iou, hist_kws=dict(edgecolor="k", linewidth=0.1), norm_hist=False, kde=False, ax=ax3) ax3.set(xlabel='Average IOU', ylabel='No. of Frames') ax3.set_title("Distribution of Frames Over Average IOU Scores", fontsize=10) ax4.bar(x=['Overestimation', 'Underestimation'], height=[pos, neg], color='red', width=0.4) ax4.set_title('Analysis of Errors', fontsize=10) ax4.set_ylabel('No. of Frames', fontsize=10) def _load_data(self): """load and parse all relevant data. Automatically syncs training dir with cloud if any files are missing""" required_files = [ self.fm.local_files[x] for x in ['boxed_fish_csv', 'train_log'] ] required_files.append( join(self.fm.local_files['predictions_dir'], '0.csv')) for f in required_files: if not exists(f): self.fm.sync_training_dir( exclude=['labels/**', 'train_images/**']) break self.train_log = self._parse_train_log() self.num_epochs = len(self.train_log) self.ground_truth = self._parse_epoch_csv() self.epoch_predictions = [] for epoch in range(self.num_epochs): self.epoch_predictions.append(self._parse_epoch_csv(epoch)) def _parse_train_log(self): """parse the logfile that tracked overall loss and learning rate at each epoch Returns: Pandas Dataframe of losses and learning rate, indexed by epoch number """ return pd.read_csv(self.fm.local_files['train_log'], sep='\t', index_col='epoch') def _parse_epoch_csv(self, epoch=-1): """parse the csv file of predictions produced when Trainer.train() is run with compare_annotations=True Notes: if the epoch arg is left at the default value of -1, this function will instead parse 'ground_truth.csv' Args: epoch(int): epoch number, where 0 refers to the first epoch. Defaults to -1, which parses the ground truth csv Returns: Pandas DataFrame of epoch data """ if epoch == -1: path = self.fm.local_files['ground_truth_csv'] usecols = ['Framefile', 'boxes', 'labels'] else: path = join(self.fm.local_files['predictions_dir'], '{}.csv'.format(epoch)) usecols = ['Framefile', 'boxes', 'labels', 'scores'] return pd.read_csv( path, usecols=usecols).set_index('Framefile').applymap(lambda x: eval(x)) def _full_epoch_eval(self, epoch): ep = self.epoch_predictions[epoch] gt = self.ground_truth df = gt.join(ep, lsuffix='_actual', rsuffix='_predicted') df['n_boxes_actual'] = df.boxes_actual.apply(len) df['n_boxes_predicted'] = df.boxes_predicted.apply(len) df['n_boxes_predicted_error'] = df.n_boxes_predicted - df.n_boxes_actual df['average_iou'], df['act_to_pred_map'] = zip( *df.apply(lambda x: self._calc_frame_iou( x.boxes_actual, x.boxes_predicted, map_boxes=True), axis=1)) df['pred_to_act_map'] = df.apply(lambda x: self._flip_mapping( x.act_to_pred_map, x.n_boxes_predicted), axis=1) df['pred_accuracy'] = df.apply(lambda x: self._compare_labels( x.labels_actual, x.labels_predicted, x.pred_to_act_map), axis=1) df['avg_accuracy'] = df.pred_accuracy.apply(lambda x: sum(x) / len(x) if len(x) > 0 else 1.0) df.to_csv(join(self.fig_data_dir, 'epoch_{}_eval.csv'.format(epoch))) summary = pd.Series() summary['classification_accuracy'] = np.average( df.avg_accuracy, weights=df.n_boxes_predicted) summary['average_iou'] = np.average(df.average_iou, weights=df.n_boxes_predicted) summary['n_predictions'] = df.n_boxes_predicted.sum() summary['n_annotations'] = df.n_boxes_actual.sum() summary['n_frames'] = len(df) summary.to_csv( join(self.fig_data_dir, 'epoch_{}_eval_summary.csv'.format(epoch))) return df, summary def _compare_labels(self, labels_actual, labels_predicted, pred_to_act_map): """determine whether the label for each predicted box matches the label of the corresponding ground-truth box Args: labels_actual (list of ints): ground-truth label for each ground-truth box labels_predicted (list of ints): predicted label for each predicted box pred_to_act_map (list of ints): list mapping each predicted box to the ground truth box with the max iou Returns: list: outcomes, where outcomes[i] = 1 if labels_predicted[i] is correct, and 0 if it's incorrect """ outcomes = [] for i, pred in enumerate(labels_predicted): # append 0 if the predicted box does not overlap a ground truth box if pred_to_act_map[i] is None: outcomes.append(0) # else, append 1 if the predicted label was correct, or 0 if it was incorrect else: outcomes.append(1 if pred == labels_actual[pred_to_act_map[i]] else 0) return outcomes def _calc_precision(self): pass def _calc_recall(self): pass def _flip_mapping(self, a_to_b, len_b): if len_b == 0: return [] else: mapping = [] for i in range(len_b): try: mapping.append(a_to_b.index(i)) except ValueError: mapping.append(None) return mapping def _calc_epoch_iou(self, epoch): """calculate the average iou across all test frames for a given epoch Args: epoch (int): epoch number Returns: float: average iou value per predicted box for the epoch """ gt = self.ground_truth ep = self.epoch_predictions[epoch] combo = gt.join(ep, lsuffix='_gt', rsuffix='_ep') combo['frame_iou'] = combo.apply( lambda x: self._calc_frame_iou(x.boxes_gt, x.boxes_ep), axis=1) combo['n_boxes_ep'] = combo.boxes_ep.apply(len) return np.average(combo.frame_iou, weights=combo.n_boxes_ep) def _calc_frame_iou(self, actual_boxes, predicted_boxes, map_boxes=False): """calculate the average iou for a frame Args: actual_boxes (list of lists of 4 ints): list of ground truth bounding boxes predicted_boxes (list of lists of 4 ints): list of predicted bounding boxes map_boxes: if True, also return a list of ints mapping the ground truth boxes to the predicted box with the highest iou value Returns: if map_boxes is False, returns: float: mean iou value for the given frame if map_boxes is True, returns: float: mean iou value for the given frame list of ints: if map_boxes is True, also returns a list mapping the actual boxes to the the predicted boxes, where actual_box[i] maps to predicted_boxes[mapping_list[i]] (and predicted_box[j] maps to actual_boxes[mapping_list.index(j)] if the actual_boxes list is empty, returns an empty list. for ground truth boxes that do not intersect a predicted box, the list will contain a None element """ a_bs = actual_boxes p_bs = predicted_boxes # if the model predicts no boxes for an empty frame, return a perfect score of 1.0 (and an empty mapping list) if (len(a_bs) == 0) and (len(p_bs) == 0): return (1.0, []) if map_boxes else 1.0 # elif the model predicts no boxes for a frame with one or more fish, return a score of 0.0 (and mapping list of # null values with the same length as the number of actual boxes) elif (len(a_bs) > 0) and (len(p_bs) == 0): return (0.0, [None] * len(a_bs)) if map_boxes else 0.0 # elif the model predicts 1 or more boxes for a frame with no fish, return a score of 0 (and an empty mapping # list) elif (len(a_bs) == 0) and (len(p_bs) > 0): return (0.0, []) if map_boxes else 0.0 # elif the model predicted 1 or more boxes for a frame with 1 or more fish, calculate the iou for each # ground truth box with its best match, and average those values for the frame elif (len(a_bs) > 0) and (len(p_bs) > 0): ious = [] mapping = [] # for each actual box, find the largest iou score of that box with a one of the predicted boxes for a_b in a_bs: max_iou = 0.0 max_iou_mapping = None for i, p_b in enumerate(p_bs): iou = self._calc_iou(a_b, p_b) if iou > max_iou: max_iou = iou max_iou_mapping = i ious.append(max_iou) mapping.append(max_iou_mapping) # if the model predicted more boxes than there are objects, penalize by scoring the remaining boxes 0.0 if len(a_bs) < len(p_bs): ious.extend([0.0] * (len(p_bs) - len(a_bs))) # return the mean return (np.mean(ious), mapping) if map_boxes else np.mean(ious) def _calc_iou(self, box_a, box_b): """calculate the iou between box_a and box_b""" a = box_a b = box_b if (len(box_a) == 0) and (len(box_b) == 0): return 1.0 # find area of the box formed by the intersection of box_a and box_b xa, ya, xb, yb = (max(a[0], b[0]), max(a[1], b[1]), min(a[2], b[2]), min(a[3], b[3])) intersection = max(0, xb - xa + 1) * max(0, yb - ya + 1) # if the boxes do not intersect, short-circuit and return 0.0 if intersection == 0: return 0.0 # else, calculate the area of the union of box_a and box_b, and return intersection/union else: a_area = (a[2] - a[0] + 1) * (a[3] - a[1] + 1) b_area = (b[2] - b[0] + 1) * (b[3] - b[1] + 1) union = float(a_area + b_area - intersection) return intersection / union
[ com.extend( list( chain.from_iterable( zip(['--exclude'] * len(exclude), exclude)))) for com in [down, up] ] [run(com) for com in [down, up]] # Measure duration of program s = ctime(time.time()) print("Start Time (Full): ", ctime(time.time())) # Initialize functions. Create project directory and download the specified files fm = FileManager() pfm = ProjectFileManager(args.pid, fm, args.download_images, args.download_video, args.video) print('downloaded video, created directories!') # Storing video path. Setting video and final csv file names. video_path = os.path.join(pfm.local_files['{}_dir'.format(args.pid)], args.video) video_name = args.video.split('.')[0] csv_name = '{}_{}_detections.csv'.format(args.pid, video_name) if args.full: """ 1. Run all the processes - video trimming, detections, video annotation 2. Create intervals list and iterate through them to crop video and feed it into the model """
def __init__(self): """initiate a FileManager object, and and empty dictionary to store a ProjectFileManager object for each project""" self.file_manager = FileManager() self.proj_file_managers = {}
def __init__(self): self.fm = FileManager() self.csv_dir = self.fm.local_files['figure_data_dir'] self.file = 'epoch_99_eval.csv' self.data = os.path.join(self.csv_dir, self.file) self.img_dir = self.fm.local_files['test_image_dir']
def __init__(self): self.fm = FileManager() self.pid = 'MC6_5'
host = socket.gethostname() # if running from a PACE login node, assert that args.command == full_auto, then submit the train.pbs script if ('login' in host) and ('pace' in host): assert (args.command == 'full_auto' ), 'full_auto is the only mode currently on a PACE login node' pbs_dir = os.path.join(package_root, 'CichlidDetection/PBS') subprocess.run( ['qsub', 'train.pbs', '-v', 'EPOCHS={}'.format(args.Epochs)], cwd=pbs_dir) # if not on a PACE login node, begin the analysis specified by args.command else: if args.command == 'sync': from CichlidDetection.Classes.FileManager import FileManager FileManager().sync_training_dir() else: from CichlidDetection.Classes.Runner import Runner runner = Runner() if args.command == 'full_auto': runner.download() runner.prep() runner.train(num_epochs=args.Epochs) elif args.command == 'download': runner.download() elif args.command == 'train': runner.prep()
def __init__(self, *args): # initialize detector for i in args: self.pfm = i self.fm = FileManager() self._initiate_model()
def __init__(self, *args): self.fm = FileManager()