def run(cyto_job, parameters): job = cyto_job.job project_id = cyto_job.project term_id = parameters.terms_list logging.info(f"########### Parameters = {str(parameters)}") logging.info(f"########### Term {str(term_id)}") logging.info(f"########### Project {str(project_id)}") annotations = AnnotationCollection() annotations.project = project_id annotations.terms = [term_id] annotations.fetch() progress = 0 progress_delta = 1.0 / (1.50 * len(annotations)) job.update( progress=progress, statusComment=f"Converting annotations from project {project_id}") new_annotations = AnnotationCollection() for a in annotations: if a.location is None: a.fetch() new_annotations.append( Annotation(a.location, a.image, a.term, a.project)) new_annotations.save(chunk=None) job.update(progress=0.25, statusComment=f"Deleting old annotations...") for a in annotations: a.delete() progress += progress_delta job.update(progress=progress)
def preprocess(cytomine, working_path, id_project, id_terms=None, id_tags_for_images=None): """ Get data from Cytomine in order to train YOLO. :param cytomine: The Cytomine client :param working_path: The path where files will be stored :param id_project: The Cytomine project ID used to get data :param id_terms: The Cytomine term IDS used to get data :param id_tags_for_images: The Cytomine tags IDS associated to images used to get data :return: classes_filename: The name of the file with classes image_filenames: A list of image filenames annotation_filenames: A list of filenames with annotations in YOLO format """ if not os.path.exists(working_path): os.makedirs(working_path) images_path = os.path.join(working_path, IMG_DIRECTORY) if not os.path.exists(images_path): os.makedirs(images_path) annotations_path = os.path.join(working_path, ANNOTATION_DIRECTORY) if not os.path.exists(annotations_path): os.makedirs(annotations_path) terms = TermCollection().fetch_with_filter("project", id_project) if id_terms: filtered_term_ids = [int(id_term) for id_term in id_terms.split(',')] filtered_terms = [term for term in terms if term.id in filtered_term_ids] else: filtered_terms = terms terms_indexes = {term.id: i for i, term in enumerate(filtered_terms)} # https://github.com/eriklindernoren/PyTorch-YOLOv3#train-on-custom-dataset # Write obj.names classes_filename = os.path.join(working_path, CLASSES_FILENAME) with open(classes_filename, 'w') as f: for term in filtered_terms: f.write(term.name + os.linesep) # Download images image_filenames = [] image_tags = id_tags_for_images if id_tags_for_images else None images = ImageInstanceCollection(tags=image_tags).fetch_with_filter("project", id_project) for image in images: image.dump(os.path.join(working_path, IMG_DIRECTORY, "{id}.png"), override=False) image_filenames.append(image.filename) # Create annotation files annotation_filenames = [] for image in images: annotations = AnnotationCollection() annotations.image = image.id annotations.terms = [t.id for t in filtered_terms] if id_terms else None annotations.showWKT = True annotations.showTerm = True annotations.fetch() filename = os.path.join(working_path, ANNOTATION_DIRECTORY, "{}.txt".format(image.id)) with open(filename, 'w') as f: for annotation in annotations: geometry = wkt.loads(annotation.location) x, y, w, h = geometry_to_yolo(geometry, image.width, image.height) for term_id in annotation.term: # <object-class> <x_center> <y_center> <width> <height> f.write("{} {:.12f} {:.12f} {:.12f} {:.12f}".format(terms_indexes[term_id], x, y, w, h) + os.linesep) annotation_filenames.append(filename) return classes_filename, image_filenames, annotation_filenames
def main(argv): with CytomineJob.from_cli(argv) as cj: cj.job.update(progress=1, statusComment="Initialisation") cj.log(str(cj.parameters)) term_ids = [int(term_id) for term_id in cj.parameters.cytomine_id_terms.split(",")] terms = TermCollection().fetch_with_filter("project", cj.parameters.cytomine_id_project) terms = [term for term in terms if term.id in term_ids] image_ids = [int(image_id) for image_id in cj.parameters.cytomine_id_images.split(",")] images = ImageInstanceCollection(light=True).fetch_with_filter("project", cj.parameters.cytomine_id_project) images = [image for image in images if image.id in image_ids] if hasattr(cj.parameters, "cytomine_id_users") and cj.parameters.cytomine_id_users is not None: user_ids = [int(user_id) for user_id in cj.parameters.cytomine_id_users.split(",")] else: user_ids = [] if hasattr(cj.parameters, "cytomine_id_jobs") and cj.parameters.cytomine_id_jobs is not None: job_ids = [int(job_id) for job_id in cj.parameters.cytomine_id_jobs.split(",")] jobs = JobCollection(project=cj.parameters.cytomine_id_project).fetch() jobs = [job for job in jobs if job.id in job_ids] else: jobs = [] userjobs_ids = [job.userJob for job in jobs] all_user_ids = user_ids + userjobs_ids cj.job.update(progress=20, statusComment="Collect data") ac = AnnotationCollection() ac.terms = term_ids ac.images = image_ids ac.showMeta = True ac.showGIS = True ac.showTerm = True ac.reviewed = True if cj.parameters.cytomine_reviewed_only else None ac.users = all_user_ids if len(all_user_ids) > 0 else None ac.fetch() cj.job.update(progress=55, statusComment="Compute statistics") data = dict() for image in images: d = dict() areas = [a.area for a in ac if a.image == image.id] total_area = np.sum(areas) d['total'] = total_area d['count'] = len(areas) d['ratio'] = 1.0 for term in terms: annotations = [a for a in ac if a.image == image.id and term.id in a.term] areas = [a.area for a in annotations] d[term.name] = dict() d[term.name]['total'] = np.sum(areas) d[term.name]['count'] = len(annotations) d[term.name]['ratio'] = d[term.name]['total'] / float(total_area) if total_area > 0 else 0 d[term.name]['mean'] = np.mean(areas) d[term.name]['annotations'] = [{"created": a.created, "area": a.area} for a in annotations] data[image.instanceFilename] = d cj.job.update(progress=90, statusComment="Write CSV report") with open("stat-area.csv", "w") as f: for l in write_csv(data, terms): f.write("{}\n".format(l)) job_data = JobData(id_job=cj.job.id, key="Area CSV report", filename="stat-area.csv") job_data = job_data.save() job_data.upload("stat-area.csv") cj.job.update(statusComment="Finished.", progress=100)