Пример #1
0
def enqueue_blobs(time_series,queue):
    """config needs psql_connect, resolver"""
    config = get_config(CONFIG_FILE, time_series)
    feed = IfcbFeed(config.psql_connect)
    r = parse_stream(config.resolver)
    blob_resolver = r['mvco_blob']
    pid_resolver = r['pid']
    for lid in feed.latest_bins(n=10000):
        if blob_resolver.resolve(pid=lid,time_series=time_series) is None:
            pid = pid_resolver.resolve(pid=lid,time_series=time_series).bin_pid
            print 'No blobs found for %s, enqueuing' % pid
            extract_blobs.apply_async(args=[time_series, pid],queue=queue)
Пример #2
0
def list_new_filesets(time_series,psql_connect,resolver,after_year=2012):
    feed = IfcbFeed(psql_connect)
    r = parse_stream(resolver)
    for s in list_adcs(time_series,resolver,after_year):
        if feed.exists(s.pid):
            logging.info('%s EXISTS in time series %s' % (s.pid, time_series))
        else:
            logging.info('%s NEW, not already in time series %s' % (s.pid, time_series))
            fs = r['fileset'].resolve(pid=s.pid,product='raw',time_series=time_series,day_dir=s.day_dir)
            if fs is None:
                logging.warn('%s UNRESOLVABLE cannot find raw files' % s.pid)
            else:
                yield fs
Пример #3
0
def enqueue_features(time_series,queue):
    """config needs psql_connect, resolver"""
    config = get_config(CONFIG_FILE, time_series)
    feed = IfcbFeed(config.psql_connect)
    r = parse_stream(config.resolver)
    blob_resolver = r['mvco_blob']
    feature_resolver = r['features']
    pid_resolver = r['pid']
    for lid in feed.latest_bins(n=5000):
        if blob_resolver.resolve(pid=lid,time_series=time_series) is not None:
            pid = pid_resolver.resolve(pid=lid,time_series=time_series).bin_pid
            if feature_resolver.resolve(pid=lid,time_series=time_series) is None:
                print 'found blobs but no features for %s' % pid
                extract_features.apply_async(args=[time_series, pid],queue=queue)
Пример #4
0
def accede(config_file, time_series):
    config = get_config(config_file, time_series)
    logging.info('parsed config file %s:%s' % (config_file, time_series))
    fx = IfcbFixity(config.psql_connect)
    feed = IfcbFeed(config.psql_connect)
    try:
        year_pattern = config.year_pattern
    except:
        year_pattern = '....'
    with xa(config.psql_connect) as (c, db):
        for s in list_new_filesets(time_series,config.psql_connect,config.resolver,year_pattern=year_pattern): # FIXME hardcoded
            try:
                check_integrity(s.pid, s.hdr_path, s.adc_path, s.roi_path, s.schema_version)
            except Exception, e:
                logger.warn('%s FAIL integrity checks: %s' % (s.pid, e))
                continue
            # hot diggity, we've got some good data
            # compute fixity
            try:
                fx.fix(s.pid, s.hdr_path, cursor=db, filetype='hdr')
                logger.info('%s FIXITY computed for %s' % (s.pid, s.hdr_path))
                fx.fix(s.pid, s.adc_path, cursor=db, filetype='adc')
                logger.info('%s FIXITY computed for %s' % (s.pid, s.adc_path))
                fx.fix(s.pid, s.roi_path, cursor=db, filetype='roi')
                logger.info('%s FIXITY computed for %s' % (s.pid, s.roi_path))
            except:
                logger.error('%s FAIL fixity cannot be computed!' % s.pid)
                c.rollback()
                continue
            # register bin
            try:
                ts = text2utcdatetime(s.date, s.date_format)
                feed.create(s.pid, ts, cursor=db)
                c.commit()
                logger.info('%s DONE' % s.pid)
            except:
                logger.error('%s FAILED' % s.pid)
                continue
Пример #5
0
    logging.info('%s PASS integrity check %s' % (pid, hdr_path))
    targets = list(integrity.check_adc(LocalFileSource(adc_path), schema_version=schema_version))
    logging.info('%s PASS integrity check %s' % (pid, adc_path))
    integrity.check_roi(LocalFileSource(roi_path), targets)
    logging.info('%s PASS integrity check %s' % (pid, roi_path))

if __name__=='__main__':
    try:
        time_series=sys.argv[2]
        config = get_config(sys.argv[1], time_series)
    except:
        sys.stderr.write('usage: [python] oii/ifcb/accession.py [config file] [time series name]\n')
        sys.exit(-1)
    logging.basicConfig(level=logging.INFO)
    fx = IfcbFixity(config.psql_connect)
    feed = IfcbFeed(config.psql_connect)
    with xa(config.psql_connect) as (c, db):
        for s in list_new_filesets(time_series,config.psql_connect,config.resolver,after_year=2005): # FIXME hardcoded
            try:
                check_integrity(s.pid, s.hdr_path, s.adc_path, s.roi_path, s.schema_version)
            except Exception, e:
                logging.warn('%s FAIL integrity checks: %s' % (s.pid, e))
                continue
            # hot diggity, we've got some good data
            # compute fixity
            try:
                fx.fix(s.pid, s.hdr_path, cursor=db, filetype='hdr')
                logging.info('%s FIXITY computed for %s' % (s.pid, s.hdr_path))
                fx.fix(s.pid, s.adc_path, cursor=db, filetype='adc')
                logging.info('%s FIXITY computed for %s' % (s.pid, s.adc_path))
                fx.fix(s.pid, s.roi_path, cursor=db, filetype='roi')
Пример #6
0
try:
    time_series = sys.argv[1]
except:
    time_series = 'mvco'

config = get_config('./db.conf',time_series)
outdir = config.outdir

psql_connect = '%s dbname=%s' % (config.psql_connect, config.dbname)

R = parse_stream(config.resolver)

NAMESPACE='http://demi.whoi.edu/mvco/'

feed = IfcbFeed(psql_connect)

start=strptime('2005-01-01T00:00:00Z',ISO_8601_FORMAT);
end=strptime('2014-01-01T00:00:00Z',ISO_8601_FORMAT);

with xa(psql_connect) as (c, db):
    bin_lids = list(feed.between(start,end))

N=8
pids = []
for n in range(N):
    pid = os.fork()
    if pid == 0:
        outfile = os.path.join(outdir,'scores_%d.csv' % n)
        with open(outfile,'w') as of:
            for bin_lid in bin_lids[n::N]:
Пример #7
0
from oii.times import text2utcdatetime, ISO_8601_FORMAT

try:
    time_series = sys.argv[1]
except:
    time_series = 'mvco'

config = get_config('./db.conf',time_series)

psql_connect = '%s dbname=%s' % (config.psql_connect, config.dbname)

R = parse_stream(config.resolver)

NAMESPACE='http://demi.whoi.edu/mvco/'

feed = IfcbFeed(psql_connect)

start=strptime('2005-01-01T00:00:00Z',ISO_8601_FORMAT);
end=strptime('2014-01-01T00:00:00Z',ISO_8601_FORMAT);

s = 'select count(*) from autoclass where bin_lid=%s'
q = 'insert into autoclass (bin_lid, class_label, roinums, scores) values (%s, %s, %s, %s)'

with xa(psql_connect) as (c, db):
    n = 0
    for bin_lid in feed.between(start,end):
        bin_pid = NAMESPACE + bin_lid

        db.execute(s,(bin_lid,))
        count = db.fetchone()[0]
        if count == 0: