def merge_labelact_annolist(annolist): name_to_idx = {}; res_annolist = AnnotationLib.AnnoList(); for a in annolist: if a.imageName in name_to_idx: aidx = name_to_idx[a.imageName]; res_annolist[aidx].rects.extend(a.rects); else: res_annolist.append(a); name_to_idx[a.imageName] = len(res_annolist) - 1; return res_annolist;
def eval(self, weights, test_boxes, min_conf, tau, show_suppressed, expname): self.H["grid_width"] = self.H["image_width"] / self.H["region_size"] self.H["grid_height"] = self.H["image_height"] / self.H["region_size"] x_in = tf.placeholder(tf.float32, name='x_in', shape=[self.H['image_height'], self.H['image_width'], 3]) if self.H['use_rezoom']: pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas = self.build_forward(tf.expand_dims(x_in, 0), 'test', reuse=None) grid_area = self.H['grid_height'] * self.H['grid_width'] pred_confidences = tf.reshape(tf.nn.softmax(tf.reshape(pred_confs_deltas, [grid_area * self.H['rnn_len'], 2])), [grid_area, self.H['rnn_len'], 2]) if self.H['reregress']: pred_boxes = pred_boxes + pred_boxes_deltas else: pred_boxes, pred_logits, pred_confidences = self.build_forward(tf.expand_dims(x_in, 0), 'test', reuse=None) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, weights) pred_annolist = al.AnnoList() true_annolist = al.parse(test_boxes) data_dir = os.path.dirname(test_boxes) image_dir = self.get_image_dir(weights, expname, test_boxes) subprocess.call('mkdir -p %s' % image_dir, shell=True) for i in range(len(true_annolist)): true_anno = true_annolist[i] orig_img = imread('%s/%s' % (data_dir, true_anno.imageName))[:,:,:3] img = imresize(orig_img, (self.H["image_height"], self.H["image_width"]), interp='cubic') feed = {x_in: img} (np_pred_boxes, np_pred_confidences) = sess.run([pred_boxes, pred_confidences], feed_dict=feed) pred_anno = al.Annotation() pred_anno.imageName = true_anno.imageName new_img, rects = add_rectangles(self.H, [img], np_pred_confidences, np_pred_boxes, use_stitching=True, rnn_len=self.H['rnn_len'], min_conf=min_conf, tau=tau, show_suppressed=show_suppressed) pred_anno.rects = rects pred_anno.imagePath = os.path.abspath(data_dir) pred_anno = rescale_boxes((self.H["image_height"], self.H["image_width"]), pred_anno, orig_img.shape[0], orig_img.shape[1]) pred_annolist.append(pred_anno) imname = '%s/%s' % (image_dir, os.path.basename(true_anno.imageName)) misc.imsave(imname, new_img) if i % 25 == 0: print(i) return pred_annolist, true_annolist
def pal2al(_annolist): #annotations = []; annotations = AnnotationLib.AnnoList() for adesc in _annolist.attribute_desc: annotations.attribute_desc[adesc.name] = adesc print "attribute: ", adesc.name, adesc.id for valdesc in adesc.val_to_str: annotations.add_attribute_val(adesc.name, valdesc.s, valdesc.id) attribute_name_from_id = { adesc.id: aname for aname, adesc in annotations.attribute_desc.iteritems() } attribute_dtype_from_id = { adesc.id: adesc.dtype for aname, adesc in annotations.attribute_desc.iteritems() } for _a in _annolist.annotation: anno = AnnotationLib.Annotation() anno.imageName = _a.imageName anno.rects = [] for _r in _a.rect: rect = AnnotationLib.AnnoRect() rect.x1 = _r.x1 rect.x2 = _r.x2 rect.y1 = _r.y1 rect.y2 = _r.y2 if _r.HasField("id"): rect.id = _r.id if _r.HasField("track_id"): rect.track_id = _r.track_id if _r.HasField("score"): rect.score = _r.score for _at in _r.attribute: try: cur_aname = attribute_name_from_id[_at.id] cur_dtype = attribute_dtype_from_id[_at.id] except KeyError as e: print "attribute: ", _at.id print e assert (False) if cur_dtype == AnnotationLib.AnnoList.TYPE_INT32: rect.at[cur_aname] = _at.val elif cur_dtype == AnnotationLib.AnnoList.TYPE_FLOAT: rect.at[cur_aname] = _at.fval elif cur_dtype == AnnotationLib.AnnoList.TYPE_STRING: rect.at[cur_aname] = _at.strval else: assert (False) anno.rects.append(rect) annotations.append(anno) return annotations
def main(): parser = argparse.ArgumentParser() parser.add_argument("hits_dir", help="hit directory") parser.add_argument("--print_empty", action="store_true", help="print info on empty hits"); parser.add_argument("--save_worker_results", action="store_true", help="save results for each individual worker"); #parser.add_argument("output_ext", default=".al", help="output format .idl/.al/.pal") #parser.add_argument("--url_prefix", help="path on S3 that should be removed when converting to annolist") args = parser.parse_args() #url_prefix = bash_var_to_py("./data_utils_init.sh", "S3_HOST_DIR") url_prefix = bash_var_to_py(args.hits_dir + "/hit_params.sh", "S3_HOST_DIR") args.output_ext = ".pal" if url_prefix[-1] != os.sep: url_prefix += os.sep; hit_name = get_hit_name(args.hits_dir) #pal_name = args.hits_dir + '/' + hit_name + '.pal' pal_name = args.hits_dir + '/' + hit_name + args.output_ext results_filename = args.hits_dir + '/' + hit_name + '.results' results_by_worker_dir = '%s/results_by_worker_%s' % (args.hits_dir, hit_name) subprocess.call(['mkdir', '-p', results_by_worker_dir]) try: with open(args.hits_dir + '/bad_workers.txt', 'r') as f: bad_workerids = set(x.strip() for x in f.readlines()) except IOError: bad_workerids = set() with open(results_filename, 'r') as results_file: results_list = list(csv.reader(results_file, delimiter='\t')) invert = lambda d: {v:k for k,v in d.iteritems()} columns = invert(dict(enumerate(results_list[0]))) annotation_list = AnnotationLib.AnnoList(); hit_type = ""; for each in results_list[1:]: if each[columns['hitstatus']] == 'Reviewable' and each[columns['workerid']] not in bad_workerids: cur_hit_type = each[columns['Answer.results']].split(',')[0]; if not hit_type: hit_type = cur_hit_type; else: assert(hit_type == cur_hit_type); a = annotation_from_result(each, columns, url_prefix); annotation_list.append(a); # MA: some hit types require special post-processing if hit_type == "label_act": annotation_list = merge_labelact_annolist(annotation_list); annotation_list.add_attribute("gender", int); annotation_list.add_attribute("ptype", int); annotation_list.add_attribute("act", int); annotation_list.add_attribute_val("gender", "male", ATTR_VAL_GENDER_MALE); annotation_list.add_attribute_val("gender", "female", ATTR_VAL_GENDER_FEMALE); annotation_list.add_attribute_val("ptype", "sales", ATTR_VAL_PTYPE_SALES); annotation_list.add_attribute_val("ptype", "customer", ATTR_VAL_PTYPE_CUST); annotation_list.add_attribute_val("act", "interact_with_customer", ATTR_VAL_ACT_SALES_INT); annotation_list.add_attribute_val("act", "clean", ATTR_VAL_ACT_SALES_CLEAN); annotation_list.add_attribute_val("act", "other_sales", ATTR_VAL_ACT_SALES_OTHER); annotation_list.add_attribute_val("act", "queue", ATTR_VAL_ACT_CUST_QUEUE); annotation_list.add_attribute_val("act", "interact_with_sales", ATTR_VAL_ACT_CUST_INT); annotation_list.add_attribute_val("act", "browse", ATTR_VAL_ACT_CUST_BROWSE); annotation_list.add_attribute_val("act", "other_customer", ATTR_VAL_ACT_CUST_OTHER); AnnotationLib.save(pal_name, annotation_list) if args.save_worker_results: for workerid in set(x[columns['workerid']] for x in results_list[1:]): if workerid == '': print '%s missing entries' % sum(1 for each in results_list[1:] if each[columns['workerid']] == '') continue annotation_list = AnnotationLib.AnnoList([annotation_from_result(each, columns, url_prefix) for each in results_list[1:] if each[columns['hitstatus']] == 'Reviewable' and each[columns['workerid']] == workerid]); output_filename = '%s/%s.pal' % (results_by_worker_dir, workerid); #print "saving ", output_filename; print "worker: {}, number of annotations: {}".format(workerid, len(annotation_list)); AnnotationLib.save(output_filename, annotation_list) if args.print_empty: for res in results_list[1:]: if res[columns['Answer.results']] == "": print "empty hit: ", res # show statistics on empty results (happens due to unknown bug in javascript tool) empty_res_workers = [each[columns['workerid']] for each in results_list[1:] if each[columns['Answer.results']] == ""]; print "empty output by workerid: " for workerid, worker_empty_count in collections.Counter(empty_res_workers).items(): print workerid, worker_empty_count num_empty = sum((1 for each in results_list[1:] if each[columns['Answer.results']] == "")); print "number of empty results: ", num_empty