def compare(self, filename, fix_local_path, fix_length, sha1, fix_time): """Check a fixity entry against the current state of the data in the filesystem""" try: local_path = fix_local_path if not os.path.exists(fix_local_path): if self.resolvers is not None: # use resolvers['binpid2path'] to try to find file (bin_lid, format) = re.split(r'\.',filename) hit = resolvers['binpid2path'].resolve(pid=bin_lid,format=format) if hit is not None: local_path = hit.value print 'WARNING on %s: file has been moved to %s' % (fix_local_path, local_path) else: raise FixityError('file is missing') file_stat = os.stat(local_path) file_length = file_stat.st_size file_time = file_stat.st_mtime time_delta = file_time - fix_time fix_date = iso8601(time.gmtime(fix_time)) file_date = iso8601(time.gmtime(file_time)) if fix_length != file_length: raise FixityError('file was %d bytes at fix time of %s, but is %d bytes as of %s' % (fix_length, fix_date, file_length, file_date)) if local_path == fix_local_path and time_delta > self.time_threshold: checksum = sha1_file(local_path) if checksum != sha1: raise FixityError('file modified at %s, after fix date of %s' % (file_date, fix_date)) else: raise FixityError('file touched at %s, after fix date of %s, but checksums match' % (file_date, fix_date)) except KeyboardInterrupt: raise except FixityError as e: print 'FAILED on %s: %s' % (local_path,e)
def product2dict(product): d = { 'id': product.id, 'pid': product.pid, STATE: product.state, EVENT: product.event, MESSAGE: product.message, TTL: product.ttl, 'ts': iso8601(product.ts.timetuple()), 'expires': iso8601(product.expires.timetuple()) if product.expires is not None else None } return d
def binpid2zip(pid, outfile, log_callback=None): def log(msg): if log_callback is not None: log_callback(msg) """Generate a zip file given a canonical pid""" parsed = parse_pid(pid) bin_pid = ''.join([parsed[NAMESPACE], parsed[BIN_LID]]) timestamp = iso8601(strptime(parsed[TIMESTAMP], parsed[TIMESTAMP_FORMAT])) log('copying raw data for %s to temp files ...' % bin_pid) with tempfile.NamedTemporaryFile() as hdr_tmp: hdr_path = hdr_tmp.name drain(UrlSource(bin_pid+'.hdr'), LocalFileSink(hdr_path)) hdr = parse_hdr_file(hdr_path) with tempfile.NamedTemporaryFile() as adc_tmp: adc_path = adc_tmp.name drain(UrlSource(bin_pid+'.adc'), LocalFileSink(adc_path)) adc = Adc(adc_path, parsed[SCHEMA_VERSION]) unstitched_targets = add_pids(adc.get_targets(), bin_pid) stitched_targets = list_stitched_targets(unstitched_targets) with tempfile.NamedTemporaryFile() as roi_tmp: roi_path = roi_tmp.name drain(UrlSource(bin_pid+'.roi'), LocalFileSink(roi_path)) canonical_pid = bin_pid log('copied raw data for %s' % canonical_pid) """*parsed_pid - result of parsing pid *canonical_pid - canonicalized with URL prefix *targets - list of (stitched) targets *hdr - result of parsing header file *timestamp - timestamp (FIXME in what format?) *roi_path - path to ROI file outfile - where to write resulting zip file""" log('creating zip file for %s' % bin_pid) with open(outfile,'wb') as fout: return bin2zip(parsed,bin_pid,stitched_targets,hdr,timestamp,roi_path,fout)
def random_annotation(self): p = gen_id(self.namespace) i = gen_id(self.namespace) b = [[123,234], [345,456]] # tuples do not survive JSON roundtripping t = gen_id(self.namespace) a = gen_id(self.namespace) ts = iso8601() return structs(timestamp=ts, pid=p, image=i, geometry=b, category=t, annotator=a)
def log(self,message): """Call this in run_callback to send messages to the log exchange""" debug('log %s' % message) ename = self.qname+'_log' if self.log_channel is None: self.log_channel, self.log_connection = declare_log_exchange(ename,self.host) prefix = '%s %s ' % (iso8601(), self.workerid) log(prefix + message,ename,channel=self.log_channel)
def annotations_for(file): for raw in csv.DictReader(fin,['bin','roi','category','annotator']): yield { PID: gen_id(ANNOTATION_NAMESPACE), IMAGE: DATA_NAMESPACE + raw['bin'].replace('.mat','_') + raw['roi'], TIMESTAMP: iso8601(), CATEGORY: raw['category'], ANNOTATOR: raw['annotator'], }
def file2dict(f): return { 'filename': f.filename, 'filetype': f.filetype, 'length': f.length, 'sha1': f.sha1, 'fix_time': iso8601(f.fix_time.timetuple()), 'local_path': f.local_path }
def view_metrics(ts_label,metrics): with Feed(session, ts_label) as feed: # FIXME configurable time range for b in feed.latest(): break then = iso8601(b.sample_time.timetuple()) tmpl = { 'static': STATIC, 'timeseries': ts_label, 'metrics': [{ 'endpoint': '/%s/api/feed/%s/end/%s' % (ts_label, metric, then), 'metric': metric, 'y_label': metric } for metric in metrics] } return template_response('instrument.html',**tmpl)
def win_callback(self,params): start_time = params['start_time'] end_time = iso8601() img_in = params['img_in'] tmp_out = params['img_out'] # temporary output file img_out = params['final_out'] # final output file process_id = gen_id() if not os.path.exists(tmp_out): self.log('FAIL temporary output file does not exist: %s' % tmp_out) return FAIL in_md5 = md5_file(img_in) out_md5 = md5_file(tmp_out) in_length = os.stat(img_in).st_size out_length = os.stat(tmp_out).st_size used = { 'process_id': process_id, 'algorithm_id': '201203_ic', # FIXME uncontrolled 'direction': 'used', # FIXME local id 'pathname': img_in, # FIXME local pathname 'no_earlier_than': start_time, 'no_later_than': end_time, 'fixity_md5': in_md5, 'fixity_length': in_length } generated_by = { 'process_id': process_id, 'algorithm_id': '201203_ic', # FIXME uncontrolled 'direction': 'generated by', # FIXME local id 'pathname': img_out, # FIXME local pathname 'no_earlier_than': start_time, 'no_later_than': end_time, 'fixity_md5': out_md5, 'fixity_length': out_length } # FIXME emit provenance record prov_qname = '%s_prov' % self.qname try: self.enqueue(json.dumps(used), prov_qname) self.enqueue(json.dumps(generated_by), prov_qname) except: raise JobExit('Failed to enqueue provenance records', FAIL) try: os.rename(tmp_out, img_out) except: raise JobExit('Cannot move temporary file into place: %s -> %s' % (tmp_out, img_out), FAIL) return WIN
def resolve(pid): """Resolve a URL to some data endpoint in a time series, including bin and target metadata endpoints, and image endpoints""" # use the PID resolver (which also works for LIDs) hit = resolve_pid(pid) if hit is None: abort(404) # construct the namespace from the configuration and time series ID try: hit.date = iso8601(strptime(hit.date, hit.date_format)) except: abort(404) # if the name is malformed, then it there's no resource to serve # determine extension if hit.extension is None: # default is .rdf hit.extension = 'rdf' # determine MIME type filename = '%s.%s' % (hit.lid, hit.extension) (mimetype, _) = mimetypes.guess_type(filename) if mimetype is None: mimetype = 'application/octet-stream' # is this request for a product? if hit.product is not None: if re.match(r'blob.*',hit.product): return serve_blob(hit.time_series,hit.pid) if re.match(r'features',hit.product): return serve_features(hit.time_series,hit.pid) if re.match(r'class_scores',hit.product): return serve_class_scores(hit.time_series,hit.pid) # is the request for a single target? if hit.target is not None: hit.target_no = int(hit.target) # parse target number if major_type(mimetype) == 'image': # need an image? mask = False if hit.product == 'stitch2': return serve_roi(hit, mask=None, stitch_version=2) if hit.product == 'mask': mask = True return serve_roi(hit, mask=mask) # serve it, or its mask else: # otherwise serve metadata hit.target_pid = hit.namespace + hit.lid # construct target pid return serve_target(hit,mimetype) else: # nope, it's for a whole bin return serve_bin(hit,mimetype) # nothing recognized, so return Not Found abort(404)
def feed_response(time_series,dicts,format='json'): app.logger.debug(dicts) if len(dicts) > 0: max_date = max([entry['date'] for entry in dicts]) # FIXME doesn't work for RFC822 else: max_date = iso8601() # now ns = get_namespace(time_series) context = dict(max_date=max_date, namespace=ns, feed=dicts) if format == 'json': return jsonr(dicts) if format == 'html': return template_response('feed.html', **context) elif format == 'atom': #return template_response('feed.atom', mimetype='application/xml+atom', ttl=feed_ttl, **context) return template_response('feed.atom', **context) elif format == 'rss': #return template_response('feed.rss', mimetype='application/xml+rss', ttl=feed_ttl, **context) return template_response('feed.rss', **context)
def serve_after_before(ts_label,after_before,n=1,pid=None): if not after_before in ['before','after']: abort(400) try: parsed = next(ifcb().pid(pid)) except StopIteration: abort(404) bin_lid = parsed['bin_lid'] with Feed(session, ts_label) as feed: if after_before=='before': bins = list(feed.before(bin_lid, n)) else: bins = list(feed.after(bin_lid, n)) resp = [] for bin in bins: sample_time_str = iso8601(bin.sample_time.timetuple()) pid = canonicalize(get_url_root(), ts_label, bin.lid) resp.append(dict(pid=pid, date=sample_time_str)) return Response(json.dumps(resp), mimetype=MIME_JSON)
def get_parameters(self,message): img_in = message if not os.path.exists(img_in): self.log('FAIL input file does not exist: %s %s %s' % (self.config.out_dir, img_in, img_out)) raise JobExit('Input file does not exist: %s' % img_in, FAIL) img_out_file = re.sub(self.config.in_prefix,'',re.sub(r'\.tif','.png',img_in)).lstrip('/') print 'img_out_file = %s' % img_out_file img_out = os.path.join(self.config.out_dir,img_out_file) print 'img_out = %s' % img_out if os.path.exists(img_out): self.log('SKIP output file already exists: %s %s %s' % (self.config.out_dir, img_in, img_out)) raise JobExit('Output file already exists: %s' % img_out, SKIP) od = os.path.dirname(img_out) if freespace(od) < 104857600: # 100MB msg = 'free disk space in output location <100MB: %s' % od self.log('WARNING %s' % msg) raise JobExit(msg, FAIL) if not os.path.exists(od): os.makedirs(od) tmp_out = re.sub(r'\.png','_part.png',img_out) return dict(img_in=img_in,img_out=tmp_out,final_out=img_out,start_time=iso8601())
def deposit(pid): req = DashboardRequest(pid, request) try: destpath = files.get_product_destination(session, pid) except NotFound: abort(404) product_data = request.data destpath_part = '%s_%s.part' % (destpath, gen_id()) try: os.makedirs(os.path.dirname(destpath)) except: pass with open(destpath_part,'w') as out: shutil.copyfileobj(StringIO(product_data), out) os.rename(destpath_part, destpath) utcnow = iso8601() message = '%s wrote %d bytes to %s' % (utcnow, len(product_data), destpath) return Response(json.dumps(dict( status='OK', time=utcnow, message=message, pid=pid, path=destpath )), mimetype=MIME_JSON)
def get_timestamp(parsed_pid): return iso8601(strptime(parsed_pid[TIMESTAMP], parsed_pid[TIMESTAMP_FORMAT]))
import re #MOUNT_POINT = '/Volumes/d_work' #DATA_DIR = os.path.join(MOUNT_POINT,'IFCB1','ifcb_data_mvco_jun06','Manual_fromClass','annotations_csv') MOUNT_POINT = '/Users/jfutrelle/dev/ifcb' DATA_DIR = os.path.join(MOUNT_POINT,'annotations_csv') def annotations_for(file): for raw in csv.DictReader(fin,['bin','roi','category','annotator']): yield { PID: gen_id(ANNOTATION_NAMESPACE), IMAGE: DATA_NAMESPACE + raw['bin'].replace('.mat','_') + raw['roi'], TIMESTAMP: iso8601(), CATEGORY: raw['category'], ANNOTATOR: raw['annotator'], } store = PsqlAnnotationStore(sys.argv[1]) # arg must be full psql connect string dbname=xxx user=xxx password=xxx print 'initializing store...' store.create(False) for file in os.listdir(DATA_DIR): if re.match(r'IFCB.*\.csv',file): with open(os.path.join(DATA_DIR,file),'r') as fin: anns = list(annotations_for(fin)) store.bulk_create_annotations(anns) now = iso8601() print '%s created %d annotation(s) for %s' % (now, len(anns), file) print 'creating indexes ... this will take a long time' store.create_indexes() print 'done'
def canonicalize_bin(ts_label, b): return { 'pid': canonicalize(get_url_root(), ts_label, b.lid), 'date': iso8601(b.sample_time.timetuple()) }
def to_url(self, value): return iso8601(value.timetuple())
def message(msg='WARNING'): return ' '.join([iso8601(),str(msg)])