def samples_fix_patches_cmd(task): """Generate missing patches for the samples in the database""" # Get a list of all patches relevant to us, sorted by slide so we don't have to # sample slides out of order make_dbview_full('v_full') db = get_db() rc = db.execute('SELECT * FROM v_full ' 'WHERE task=? ORDER BY slide_name', (task, )).fetchall() # Get the required patch dimensions (project, t_data) = get_task_data(task) patch_dim = t_data['dltrain'].get('display-patch-size', 512) # For each patch check if it is there for row in rc: id = row['id'] fn = get_sample_patch_filename(id) if os.path.exists(fn): w, h = Image.open(fn).size if w == patch_dim and h == patch_dim: continue print('Missing or corrupt patch for sample %d, %s' % (id, fn)) rect = (float(row['x0']), float(row['y0']), float(row['x1']), float(row['y1'])) # Generate the patch generate_sample_patch(row['slide'], id, rect, dims=(patch_dim, patch_dim))
def block_detail_by_name(task_id, specimen_name, block_name): # Get the current task data project,task = get_task_data(task_id) pr = ProjectRef(project) return render_template('slide/task_detail.html', project=project, project_name=pr.disp_name, task=task, task_id=task_id, specimen_name=specimen_name, block_name=block_name)
def get_label_id_in_task(task_id, label_name): db = get_db() project, task = get_task_data(task_id) ls_id = get_labelset_id(project, task) # Look up the label label_id = db.execute('SELECT id FROM label WHERE name=? AND labelset=?', (label_name, ls_id)).fetchone()['id'] return label_id
def block_slide_listing(task_id, specimen_name, block_name): db = get_db() # Get the current task data project, task = get_task_data(task_id) # Get the block descriptor block = db.execute( 'SELECT * FROM block_info WHERE specimen_name=? AND block_name=? AND project=?', (specimen_name, block_name, project)).fetchone() if block is None: return json.dumps([]) block_id = block['id'] # List all the blocks that meet requirements for the current task if task['mode'] == 'annot': # Join with the annotations table slides = db.execute( """SELECT S.*, IFNULL(SUM(A.n_paths),0) as n_paths, IFNULL(SUM(A.n_markers),0) as n_markers, IFNULL(SUM(A.n_paths),0) + IFNULL(SUM(A.n_markers),0) as n_annot FROM task_slide_index TSI LEFT JOIN slide_info S ON TSI.slide = S.id LEFT JOIN annot A on A.slide_id = S.id AND A.task_id = TSI.task_id WHERE TSI.task_id = ? AND S.block_id = ? GROUP BY S.id, S.section, S.slide ORDER BY section, slide""", (task_id, block_id)).fetchall() elif task['mode'] == 'dltrain': # Join with the training samples table slides = db.execute( """SELECT S.*, COUNT(T.id) as n_samples FROM task_slide_index TSI LEFT JOIN slide_info S on S.id == TSI.slide LEFT JOIN training_sample T on T.slide = S.id AND T.task = TSI.task_id WHERE TSI.task_id = ? AND block_id = ? GROUP BY S.id, S.section, S.slide ORDER BY section, slide""", (task_id, block_id)).fetchall() elif task['mode'] == 'browse': slides = db.execute( """SELECT S.* FROM task_slide_index TSI LEFT JOIN slide_info S on S.id == TSI.slide WHERE TSI.task_id = ? AND S.block_id = ? ORDER BY section, slide""", (task_id, block_id)).fetchall() return json.dumps([dict(row) for row in slides])
def add_labelset_label(task_id): db = get_db() project, task = get_task_data(task_id) ls_id = get_labelset_id(project, task) label_name = request.form['label_name'] color = request.form['label_color'] desc = request.form['label_desc'] label_id = db.execute( 'INSERT INTO label (name, labelset, description, color) VALUES (?,?,?,?)', (label_name, ls_id, desc, color)).lastrowid db.commit() return json.dumps({"id": label_id})
def make_slide_dbview(task_id, view_name): db = get_db() # This call guarantees that there is no database spoofing project, task = get_task_data(task_id) # Create a where clause wcl = '' if 'stains' in task: col = ','.join('"{0}"'.format(x) for x in task['stains']) wcl = "AND S.stain COLLATE NOCASE IN (%s)" % col if task['mode'] == 'annot': db.execute("""CREATE TEMP VIEW %s AS SELECT S.*, IFNULL(SUM(A.n_paths),0) as n_paths, IFNULL(SUM(A.n_markers),0) as n_markers, IFNULL(SUM(A.n_paths),0) + IFNULL(SUM(A.n_markers),0) as n_annot FROM task_slide_index TSI LEFT JOIN slide_info S ON TSI.slide = S.id LEFT JOIN annot A on A.slide_id = S.id AND A.task_id = TSI.task_id WHERE TSI.task_id = %d GROUP BY S.id, S.section, S.slide, specimen_name, block_name ORDER BY specimen_name, block_name, section, slide""" % (view_name, int(task_id))) elif task['mode'] == 'dltrain': db.execute("""CREATE TEMP VIEW %s AS SELECT S.*, COUNT(T.id) as n_samples FROM task_slide_index TSI LEFT JOIN slide_info S ON TSI.slide = S.id LEFT JOIN training_sample T on T.slide = S.id AND T.task = TSI.task_id WHERE TSI.task_id = %d GROUP BY S.id, S.section, S.slide, specimen_name, block_name ORDER BY specimen_name, block_name, section, slide""" % (view_name, int(task_id))) elif task['mode'] == 'browse': db.execute("""CREATE TEMP VIEW %s AS SELECT S.* FROM task_slide_index TSI LEFT JOIN slide_info S on S.id == TSI.slide WHERE TSI.task_id = %d ORDER BY specimen_name, block_name, section, slide""" % (view_name, int(task_id)))
def check_rect(task_id, rect): project, t_data = get_task_data(task_id) min_size = t_data['dltrain'].get('min-size') max_size = t_data['dltrain'].get('max-size') w = round(abs(rect[2] - rect[0])) h = round(abs(rect[3] - rect[1])) print('Checking %d %d against %d %d' % (w, h, min_size, max_size)) if min_size is not None and (w < min_size or h < min_size): abort(Response('Box is too small', 401)) if max_size is not None and (w > max_size or h > max_size): abort(Response('Box is too large', 401))
def specimen_block_listing(task_id, specimen_name): db = get_db() # Get the current task data project, task = get_task_data(task_id) # List all the blocks that meet requirements for the current task if task['mode'] == 'annot': # Join with the annotations table blocks = db.execute( """SELECT block_id,block_name,specimen_name, COUNT (S.id) as nslides, COUNT(A.slide_id) as nannot FROM task_slide_index TSI LEFT JOIN slide_info S on S.id == TSI.slide LEFT JOIN annot A on A.slide_id = S.id AND A.task_id = TSI.task_id WHERE TSI.task_id = ? AND S.specimen_name = ? GROUP BY block_id,block_name,specimen_name ORDER BY block_name""", (task_id, specimen_name)).fetchall() elif task['mode'] == 'dltrain': # Join with the annotations table blocks = db.execute( """SELECT block_id,block_name,specimen_name, COUNT (DISTINCT S.id) as nslides, COUNT(T.slide) as nsamples FROM task_slide_index TSI LEFT JOIN slide_info S on S.id == TSI.slide LEFT JOIN training_sample T on T.slide = S.id AND T.task = TSI.task_id WHERE TSI.task_id = ? AND S.specimen_name = ? GROUP BY block_id,block_name,specimen_name ORDER BY block_name""", (task_id, specimen_name)).fetchall() else: # Browse mode blocks = db.execute( """SELECT block_id,block_name,specimen_name,COUNT(S.id) as nslides FROM task_slide_index TSI LEFT JOIN slide_info S on S.id == TSI.slide WHERE TSI.task_id = ? AND S.specimen_name = ? GROUP BY block_id,block_name,specimen_name ORDER BY block_name""", (task_id, specimen_name)).fetchall() return json.dumps([dict(row) for row in blocks])
def get_labelset_labels_table(task_id, slide_id): db = get_db() project, task = get_task_data(task_id) ll = db.execute( 'SELECT L.*, COUNT(T.id) as n_samples ' 'FROM label L LEFT JOIN training_sample T ' ' ON T.label = L.id AND T.task=? AND T.slide=? ' 'GROUP BY L.id ' 'ORDER BY L.id', (task_id, slide_id)) ll_data = [dict(row) for row in ll.fetchall()] return render_template('dbtrain/label_table.html', labels=ll_data, task_id=task_id, slide_id=slide_id)
def get_labelset_labels_table_json(task_id, slide_id): db = get_db() project, task = get_task_data(task_id) ll = db.execute( 'SELECT L.*, COUNT(T.id) as n_samples ' 'FROM label L LEFT JOIN training_sample T ' ' ON T.label = L.id AND T.task=? AND T.slide=? ' ' LEFT JOIN labelset_info LS on L.labelset = LS.id ' 'WHERE LS.name=? AND LS.project=?' 'GROUP BY L.id ' 'ORDER BY L.id', (task_id, slide_id, task['dltrain']['labelset'], project)) ll_data = [dict(row) for row in ll.fetchall()] return json.dumps(ll_data)
def create_sample_base(task_id, slide_id, label_id, rect, osl_level=0): project, t_data = get_task_data(task_id) db = get_db() # Create a meta record meta_id = create_edit_meta() # Create the main record sample_id = db.execute( 'INSERT INTO training_sample (meta_id,x0,y0,x1,y1,label,slide,task) VALUES (?,?,?,?,?,?,?,?)', (meta_id, rect[0], rect[1], rect[2], rect[3], label_id, slide_id, task_id)).lastrowid # Get the preferred patch size patch_dim = t_data['dltrain'].get('display-patch-size', 512) # Create a job that will sample the patch from the image. The reason we do this in a queue # is that a server hosting the slide might have gone down and the slide would need to be # downloaded again, and we don't want to hold up returning to the user for so long q = Queue(current_app.config['PRELOAD_QUEUE'], connection=Redis()) job = q.enqueue(generate_sample_patch, slide_id, sample_id, rect, (patch_dim, patch_dim), osl_level, job_timeout="120s", result_ttl="60s") # Stick the properties into the job job.meta['args'] = (slide_id, sample_id, rect) job.save_meta() # Only commit once this has been saved db.commit() # Return the sample id and the patch generation job id return json.dumps({'id': sample_id, 'patch_job_id': job.id})
def slide_view(task_id, slide_id, resolution, affine_mode): # Get the current task data project, task = get_task_data(task_id) # Get the next/previous slides for this task si, prev_slide, next_slide, stain_list, user_prefs = get_slide_info( task_id, slide_id) # Check that the affine mode and resolution requested are available pr = ProjectRef(project) sr = SlideRef(pr, si['specimen_name'], si['block_name'], si['slide_name'], si['slide_ext']) have_affine = sr.resource_exists('affine', True) or sr.resource_exists( 'affine', False) have_x16 = sr.resource_exists('x16', True) or sr.resource_exists( 'x16', False) # If one is missing, we need a redirect rd_affine_mode = affine_mode if have_affine else 'raw' rd_resolution = resolution if have_x16 else 'raw' # Get the list of available overlays and jsonify overlays = sr.get_available_overlays(local=False) if (affine_mode == 'affine' and not have_affine) or (resolution == 'x16' and not have_x16): return redirect( url_for('slide.slide_view', task_id=task_id, slide_id=slide_id, resolution=rd_resolution, affine_mode=rd_affine_mode)) # Get additional project info pr = ProjectRef(project) # Form the URL templates for preloading and actual dzi access, so that in JS we # can just do a quick substitution url_ctx = { 'project': project, 'specimen': si['specimen_name'], 'block': si['block_name'], 'slide_name': si['slide_name'], 'slide_ext': si['slide_ext'], 'mode': affine_mode, 'resource': 'XXXXX' } url_tmpl_preload = url_for('dzi.dzi_preload_endpoint', **url_ctx) url_tmpl_dzi = url_for('dzi.dzi', **url_ctx) url_tmpl_download = url_for('dzi.dzi_download', **url_ctx) # Build a dictionary to call context = { 'slide_id': slide_id, 'slide_info': si, 'next_slide': next_slide, 'prev_slide': prev_slide, 'stain_list': stain_list, 'affine_mode': affine_mode, 'have_affine': have_affine, 'have_x16': have_x16, 'resolution': resolution, 'seg_mode': task['mode'], 'task_id': task_id, 'project': si['project'], 'project_name': pr.disp_name, 'block_id': si['block_id'], 'url_tmpl_preload': url_tmpl_preload, 'url_tmpl_dzi': url_tmpl_dzi, 'url_tmpl_download': url_tmpl_download, 'task': task, 'fixed_box_size': get_dltrain_fixed_box_size(task), 'user_prefs': user_prefs, 'overlays': overlays } # Add optional fields to context for field in ('sample_id', 'sample_cx', 'sample_cy'): if field in request.form: context[field] = request.form[field] # Render the template return render_template('slide/slide_view.html', **context)
def get_slide_info(task_id, slide_id): db = get_db() # Get the info on the current slide slide_info = db.execute('SELECT * from slide_info WHERE id = ?', (slide_id, )).fetchone() # Get the slide info block_id = slide_info['block_id'] section = slide_info['section'] slideno = slide_info['slide'] stain = slide_info['stain'] # Get the task-specific where clause project, task = get_task_data(task_id) # Create a view for this task make_slide_dbview(task_id, 'v_full') # Get a list of slides/stains for this section (for drop-down menu) rc_slide = db.execute( 'SELECT id, slide, stain FROM v_full ' 'WHERE block_id=? AND section=? ' 'ORDER BY slide', (block_id, section)) stain_list = [dict(row) for row in rc_slide.fetchall()] # Get the corresponding slide in the previous section. A corresponding slide # has the same stain and the closest slide to the current section. If the # same stain is not available, then just the closest slide. # Find the previous section number prev_sec = db.execute( 'SELECT section FROM v_full ' 'WHERE block_id=? AND section<? ' 'ORDER BY section DESC limit 1', (block_id, section)).fetchone() # Find the closest slide in the section if prev_sec is not None: prev_slide = db.execute( 'SELECT *,(stain<>?)*1000+abs(slide-?) AS X ' 'FROM v_full WHERE block_id=? AND section=? ' 'ORDER BY X LIMIT 1', (stain, slideno, block_id, prev_sec['section'])).fetchone() else: prev_slide = None # Find the previous section number next_sec = db.execute( 'SELECT section FROM v_full ' 'WHERE block_id=? AND section>? ' 'ORDER BY section ASC limit 1', (block_id, section)).fetchone() # Find the closest slide in the section if next_sec is not None: next_slide = db.execute( 'SELECT *,(stain<>?)*1000+abs(slide-?) AS X ' 'FROM v_full WHERE block_id=? AND section=? ' 'ORDER BY X LIMIT 1', (stain, slideno, block_id, next_sec['section'])).fetchone() else: next_slide = None # Load the user preferences for this slide rc = db.execute( 'SELECT json FROM user_task_slide_preferences ' ' WHERE user=? AND task_id=? AND slide=?', (g.user['id'], task_id, slide_id)).fetchone() user_prefs = json.loads(rc['json']) if rc is not None else {} return slide_info, prev_slide, next_slide, stain_list, user_prefs
def samples_import_csv_command(task, input_file, user): """Import training samples from a CSV file""" db = get_db() # Look up the labelset for the current task project, tdata = get_task_data(task) if not 'dltrain' in tdata: print('Task %s is not the right type for importing samples' % tdata['name']) return -1 lsid = get_labelset_id(project, tdata) # Look up the user g.user = db.execute('SELECT * FROM user WHERE username=?', (user, )).fetchone() if g.user is None: print('User %s is not in the system' % user) return -1 lines = input_file.read().splitlines() for line in lines: fields = line.split(',') if len(fields) < 6: print('skipping ill-formatted line "%s"' % (line, )) continue (slide_name, label_name, x, y, w, h) = fields[0:6] # Look up the slide rcs = db.execute('SELECT id FROM slide WHERE slide_name=?', (slide_name, )).fetchone() if rcs is None: print('Slide %s does not exist, skipping line %s' % (slide_name, line)) continue # Look up the label rcl = db.execute('SELECT id FROM label WHERE name=? AND labelset=?', (label_name, lsid)).fetchone() if rcl is None: print('Label %s does not exist, skipping line %s' % (label_name, line)) continue # Create a data record rect = (float(x), float(y), float(x) + float(w), float(y) + float(h)) # Check for overlapping samples rc_intercept = db.execute( 'SELECT max(x0,?) as p0, min(x1,?) as p1, ' ' max(y0,?) as q0, min(y1,?) as q1, * ' 'FROM training_sample ' 'WHERE p0 < p1 AND q0 < q1 AND task=? and slide=?', (rect[0], rect[2], rect[1], rect[3], task, rcs['id'])).fetchall() if len(rc_intercept) > 0: # for row in rc_intercept: # print(row) print('There are %d overlapping samples for sample "%s"' % (len(rc_intercept), line)) continue # Create the sample result = json.loads( create_sample_base(task, rcs['id'], rcl['id'], rect)) # Success print('Imported new sample %d from line "%s"' % (result['id'], line))