Пример #1
0
def merge_labelact_annolist(annolist):
    name_to_idx = {};
    res_annolist = AnnotationLib.AnnoList();


    for a in annolist:
        if a.imageName in name_to_idx:
            aidx = name_to_idx[a.imageName];
            res_annolist[aidx].rects.extend(a.rects);
        else:
            res_annolist.append(a);
            name_to_idx[a.imageName] = len(res_annolist) - 1;

    return res_annolist;
Пример #2
0
    def eval(self, weights, test_boxes, min_conf, tau, show_suppressed, expname):
        self.H["grid_width"] = self.H["image_width"] / self.H["region_size"]
        self.H["grid_height"] = self.H["image_height"] / self.H["region_size"]
        x_in = tf.placeholder(tf.float32, name='x_in', shape=[self.H['image_height'], self.H['image_width'], 3])
        if self.H['use_rezoom']:
            pred_boxes, pred_logits, pred_confidences, pred_confs_deltas, pred_boxes_deltas = self.build_forward(tf.expand_dims(x_in, 0), 'test', reuse=None)
            grid_area = self.H['grid_height'] * self.H['grid_width']
            pred_confidences = tf.reshape(tf.nn.softmax(tf.reshape(pred_confs_deltas, [grid_area * self.H['rnn_len'], 2])),
                                          [grid_area, self.H['rnn_len'], 2])
            if self.H['reregress']:
                pred_boxes = pred_boxes + pred_boxes_deltas
        else:
            pred_boxes, pred_logits, pred_confidences = self.build_forward(tf.expand_dims(x_in, 0), 'test', reuse=None)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, weights)

            pred_annolist = al.AnnoList()

            true_annolist = al.parse(test_boxes)
            data_dir = os.path.dirname(test_boxes)
            image_dir = self.get_image_dir(weights, expname, test_boxes)
            subprocess.call('mkdir -p %s' % image_dir, shell=True)
            for i in range(len(true_annolist)):
                true_anno = true_annolist[i]
                orig_img = imread('%s/%s' % (data_dir, true_anno.imageName))[:,:,:3]
                img = imresize(orig_img, (self.H["image_height"], self.H["image_width"]), interp='cubic')
                feed = {x_in: img}
                (np_pred_boxes, np_pred_confidences) = sess.run([pred_boxes, pred_confidences], feed_dict=feed)
                pred_anno = al.Annotation()
                pred_anno.imageName = true_anno.imageName
                new_img, rects = add_rectangles(self.H, [img], np_pred_confidences, np_pred_boxes,
                                                use_stitching=True, rnn_len=self.H['rnn_len'], min_conf=min_conf, tau=tau, show_suppressed=show_suppressed)
            
                pred_anno.rects = rects
                pred_anno.imagePath = os.path.abspath(data_dir)
                pred_anno = rescale_boxes((self.H["image_height"], self.H["image_width"]), pred_anno, orig_img.shape[0], orig_img.shape[1])
                pred_annolist.append(pred_anno)
                
                imname = '%s/%s' % (image_dir, os.path.basename(true_anno.imageName))
                misc.imsave(imname, new_img)
                if i % 25 == 0:
                    print(i)
        return pred_annolist, true_annolist
Пример #3
0
def pal2al(_annolist):
    #annotations = [];
    annotations = AnnotationLib.AnnoList()

    for adesc in _annolist.attribute_desc:
        annotations.attribute_desc[adesc.name] = adesc
        print "attribute: ", adesc.name, adesc.id

        for valdesc in adesc.val_to_str:
            annotations.add_attribute_val(adesc.name, valdesc.s, valdesc.id)

    attribute_name_from_id = {
        adesc.id: aname
        for aname, adesc in annotations.attribute_desc.iteritems()
    }
    attribute_dtype_from_id = {
        adesc.id: adesc.dtype
        for aname, adesc in annotations.attribute_desc.iteritems()
    }

    for _a in _annolist.annotation:
        anno = AnnotationLib.Annotation()

        anno.imageName = _a.imageName

        anno.rects = []

        for _r in _a.rect:
            rect = AnnotationLib.AnnoRect()

            rect.x1 = _r.x1
            rect.x2 = _r.x2
            rect.y1 = _r.y1
            rect.y2 = _r.y2

            if _r.HasField("id"):
                rect.id = _r.id

            if _r.HasField("track_id"):
                rect.track_id = _r.track_id

            if _r.HasField("score"):
                rect.score = _r.score

            for _at in _r.attribute:
                try:
                    cur_aname = attribute_name_from_id[_at.id]
                    cur_dtype = attribute_dtype_from_id[_at.id]
                except KeyError as e:
                    print "attribute: ", _at.id
                    print e
                    assert (False)

                if cur_dtype == AnnotationLib.AnnoList.TYPE_INT32:
                    rect.at[cur_aname] = _at.val
                elif cur_dtype == AnnotationLib.AnnoList.TYPE_FLOAT:
                    rect.at[cur_aname] = _at.fval
                elif cur_dtype == AnnotationLib.AnnoList.TYPE_STRING:
                    rect.at[cur_aname] = _at.strval
                else:
                    assert (False)

            anno.rects.append(rect)

        annotations.append(anno)

    return annotations
Пример #4
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("hits_dir", help="hit directory")
    parser.add_argument("--print_empty", action="store_true", help="print info on empty hits");
    parser.add_argument("--save_worker_results", action="store_true", help="save results for each individual worker");

    #parser.add_argument("output_ext", default=".al", help="output format .idl/.al/.pal")
    #parser.add_argument("--url_prefix", help="path on S3 that should be removed when converting to annolist")   

    args = parser.parse_args()

    #url_prefix = bash_var_to_py("./data_utils_init.sh", "S3_HOST_DIR")
    url_prefix = bash_var_to_py(args.hits_dir + "/hit_params.sh", "S3_HOST_DIR")

    args.output_ext = ".pal"

    if url_prefix[-1] != os.sep:
        url_prefix += os.sep;
    
    hit_name = get_hit_name(args.hits_dir)
    #pal_name = args.hits_dir + '/' + hit_name + '.pal'
    pal_name = args.hits_dir + '/' + hit_name + args.output_ext

    results_filename = args.hits_dir + '/' + hit_name + '.results'
    results_by_worker_dir = '%s/results_by_worker_%s' % (args.hits_dir, hit_name)
    subprocess.call(['mkdir', '-p', results_by_worker_dir])
    try:
        with open(args.hits_dir + '/bad_workers.txt', 'r') as f:
            bad_workerids = set(x.strip() for x in f.readlines())
    except IOError:
        bad_workerids = set()

    with open(results_filename, 'r') as results_file:
        results_list = list(csv.reader(results_file, delimiter='\t'))
        invert = lambda d: {v:k for k,v in d.iteritems()}
        columns = invert(dict(enumerate(results_list[0])))

        annotation_list = AnnotationLib.AnnoList();
        hit_type = "";

        for each in results_list[1:]:
            if each[columns['hitstatus']] == 'Reviewable' and each[columns['workerid']] not in bad_workerids:
                cur_hit_type = each[columns['Answer.results']].split(',')[0];
                if not hit_type:
                    hit_type = cur_hit_type;
                else:
                    assert(hit_type == cur_hit_type);

                a = annotation_from_result(each, columns, url_prefix);
                annotation_list.append(a);

        # MA: some hit types require special post-processing
        if hit_type == "label_act":
            annotation_list = merge_labelact_annolist(annotation_list);
            annotation_list.add_attribute("gender", int);
            annotation_list.add_attribute("ptype", int);
            annotation_list.add_attribute("act", int);

            annotation_list.add_attribute_val("gender", "male", ATTR_VAL_GENDER_MALE);
            annotation_list.add_attribute_val("gender", "female", ATTR_VAL_GENDER_FEMALE);

            annotation_list.add_attribute_val("ptype", "sales", ATTR_VAL_PTYPE_SALES);
            annotation_list.add_attribute_val("ptype", "customer", ATTR_VAL_PTYPE_CUST);

            annotation_list.add_attribute_val("act", "interact_with_customer", ATTR_VAL_ACT_SALES_INT);
            annotation_list.add_attribute_val("act", "clean", ATTR_VAL_ACT_SALES_CLEAN);
            annotation_list.add_attribute_val("act", "other_sales", ATTR_VAL_ACT_SALES_OTHER);

            annotation_list.add_attribute_val("act", "queue", ATTR_VAL_ACT_CUST_QUEUE);
            annotation_list.add_attribute_val("act", "interact_with_sales", ATTR_VAL_ACT_CUST_INT);
            annotation_list.add_attribute_val("act", "browse", ATTR_VAL_ACT_CUST_BROWSE);
            annotation_list.add_attribute_val("act", "other_customer", ATTR_VAL_ACT_CUST_OTHER);


        AnnotationLib.save(pal_name, annotation_list)

        if args.save_worker_results:
            for workerid in set(x[columns['workerid']] for x in results_list[1:]):
                if workerid == '':
                    print '%s missing entries' % sum(1 for each in results_list[1:]
                                                     if each[columns['workerid']] == '')
                    continue

                annotation_list = AnnotationLib.AnnoList([annotation_from_result(each, columns, url_prefix)
                                                          for each in results_list[1:]
                                                          if each[columns['hitstatus']] == 'Reviewable'
                                                          and each[columns['workerid']] == workerid]);

                output_filename = '%s/%s.pal' % (results_by_worker_dir, workerid);
                #print "saving ", output_filename;

                print "worker: {}, number of annotations: {}".format(workerid, len(annotation_list));
                AnnotationLib.save(output_filename, annotation_list)

        if args.print_empty:
            for res in results_list[1:]:
                if res[columns['Answer.results']] == "":
                    print "empty hit: ", res

        # show statistics on empty results (happens due to unknown bug in javascript tool)
        empty_res_workers = [each[columns['workerid']] for each in results_list[1:] if each[columns['Answer.results']] == ""];

        print "empty output by workerid: " 
        for workerid, worker_empty_count in collections.Counter(empty_res_workers).items():
            print workerid, worker_empty_count

        num_empty = sum((1 for each in results_list[1:] if each[columns['Answer.results']] == ""));
        print "number of empty results: ", num_empty