def check_pushzone(pzdir, destdir): ''' Checks that every file/folder in pzdir is identified by the hierarchy and that they are not already existing in destdir. No control on the database entries in this function. Returns two lists of items marked as unknown and already existing''' from pluricent import checkbase as cb cl = cb.CloudyCheckbase(pzdir) import os import os.path as osp unknown = [] already_existing = [] for root, dirs, files in os.walk(pzdir): for f in files: fp = osp.join(root, f) print fp res = cb.parsefilepath(fp, cl.patterns) if res is None: unknown.append(fp) else: datatype, att = res att['database'] = destdir fp = cb.getfilepath(datatype, att, cl.patterns) if osp.exists(fp): already_existing.append(fp) return unknown, already_existing
def test_respect_hierarchy(destdir): ''' Checks that every file/folder in pzdir is identified by the hierarchy Returns True if the unknown list is empty''' from pluricent import checkbase as cb cl = cb.CloudyCheckbase(destdir) import os import os.path as osp unknown = [] scanned = 0 for root, dirs, files in os.walk(destdir): for f in files: fp = osp.join(root, f) scanned += 1 print fp res = cb.parsefilepath(fp, cl.patterns) if res is None: unknown.append(fp) print 'unknown', unknown print 'scanned items :', scanned # Exceptions exceptions = ['pluricent.db'] exceptions = [osp.join(destdir, e) for e in exceptions] nb_excep = 0 for e in exceptions: if e in unknown: unknown.remove(e) nb_excep += 1 print 'exceptions :', nb_excep return len(unknown) == 0
def push_to_repo(pzdir, destdir): ''' This functions is only to push images (provided studies and subjects have been already previously created''' unknown, already_existing = check_pushzone(pzdir, destdir) from pluricent import checkbase as cb import os import os.path as osp import pluricent as pl from pluricent.web import settings cl = cb.CloudyCheckbase(pzdir) if len(unknown) == 0 and len(already_existing) == 0: print 'pushzone ok' for root, dirs, files in os.walk(pzdir): for f in files: fp = osp.join(root, f) res = cb.parsefilepath(fp, cl.patterns) datatype, att = res study_dir = osp.split(att['database'])[-1] att['database'] = osp.join(destdir, study_dir) fp2 = cb.getfilepath(datatype, att, cl.patterns) print 'cp %s %s'%(fp, fp2) s = pl.create_session(settings.DATABASE) print '...checking directory %s already referring to a study'%study_dir study = [e for e in pl.studies(s) if study_dir == pl.study_dir(s, e)][0] studies_dir = [pl.study_dir(s, e) for e in pl.studies(s)] assert(study_dir in studies_dir) print '=> yes (%s)'%study print '...checking %s already exists in %s'%(att['subject'], study) subjects = pl.subjects(s, study) assert(att['subject'] in subjects) print '=> yes' print '...checking that the image %s is not already existing'%fp[len(pzdir)+1:] t1images = pl.t1images(s, study) print '...creating action' #pl.add_action( print '...copying file' print '...adding entry in database' else: print 'pushzone errors' print 'unknown', unknown print 'already_existing', already_existing
def test_matching_t1images(): ''' Checks if T1 images entries in the database are matching with existing files in the repository''' from pluricent import checkbase as cb import os.path as osp db = global_settings()['database'] p = pl.Pluricent(db) destdir = osp.dirname(db) cl = cb.CloudyCheckbase(destdir) import os import os.path as osp unknown = [] scanned = 0 print destdir raw_files = [] for root, dirs, files in os.walk(destdir): for f in files: scanned += 1 fp = osp.join(root, f) res = cb.parsefilepath(fp, cl.patterns) if not res is None: datatype, att = res if datatype == 'raw': raw_files.append(fp[len(destdir)+1:]) raw_entries = [e.path for e in p.t1images()] # comparing raw_files and raw_entries matching = True for f in raw_files: if not f in raw_entries: print f, 'missing from raw_entries' matching = False for f in raw_entries: if not f in raw_files: print f, 'missing from raw_files' matching = False print 'items in %s :'%destdir, scanned print 'entries in db:', len(raw_entries) return matching
def test_matching_t1images(): ''' Checks if T1 images entries in the database are matching with existing files in the repository''' from pluricent import checkbase as cb import os.path as osp db = global_settings()['database'] p = pl.Pluricent(db) destdir = osp.dirname(db) cl = cb.CloudyCheckbase(destdir) import os import os.path as osp unknown = [] scanned = 0 print destdir raw_files = [] for root, dirs, files in os.walk(destdir): for f in files: scanned += 1 fp = osp.join(root, f) res = cb.parsefilepath(fp, cl.patterns) if not res is None: datatype, att = res if datatype == 'raw': raw_files.append(fp[len(destdir) + 1:]) raw_entries = [e.path for e in p.t1images()] # comparing raw_files and raw_entries matching = True for f in raw_files: if not f in raw_entries: print f, 'missing from raw_entries' matching = False for f in raw_entries: if not f in raw_files: print f, 'missing from raw_files' matching = False print 'items in %s :' % destdir, scanned print 'entries in db:', len(raw_entries) return matching
def populate_from_directory(self, rootdir, answer_yes=False): '''directory should be the root dir containing multiple studies''' unknown = [] import os import os.path as osp from pluricent import checkbase as cb from pluricent import tests rootdir = osp.abspath(rootdir) dirlist = [ e for e in os.listdir(rootdir) if osp.isdir(osp.join(rootdir, e)) and not e in ['.', '..'] ] filelist = [ e for e in os.listdir(rootdir) if osp.isfile(osp.join(rootdir, e)) ] if not filelist == ['pluricent.db']: raise EXception( '%s should contain only pluricent.db and study folder (contains %s)' % (rootdir, filelist)) actions = [] # Then, go for the browsing for each in dirlist: studydir = osp.join(rootdir, each) print 'processing %s' % studydir assert (tests.test_respect_hierarchy(studydir)) cl = cb.CloudyCheckbase(studydir) # first look for dataset_description.json and add study fp = cb.getfilepath('dataset_description', {'database': studydir}, cl.patterns) if not osp.exists(fp): print fp, 'is missing' import json studyname = json.load(open(fp))['name'] actions.append([ 'add_study', { 'name': studyname, 'directory': studydir[len(rootdir) + 1:], 'description_file': fp[len(osp.dirname(fp)) + 1:] } ]) print 'study %s (%s)' % (studyname, studydir) for s in [ e for e in os.listdir(studydir) if osp.isdir(osp.join(studydir, e)) ]: actions.append( ['add_subject', { 'subjects': [s], 'study': studyname }]) for root, dirs, files in os.walk(studydir): for f in files: fp = osp.join(root, f) res = cb.parsefilepath(fp, cl.patterns) if not res is None: datatype, att = res if datatype == 'raw': actions.append([ 'add_image', { 'path': fp[len(rootdir) + 1:], 'study': studyname, 'subject': att['subject'] } ]) elif datatype in ['left_greywhite', 'right_greywhite', 'nobias', 'spm_nobias', 'split', 'brainmask', \ 'left_white', 'right_white', 'left_hemi', 'right_hemi', 'left_sulci', 'right_sulci', 'spm_nobias',\ 'spm_greymap', 'spm_whitemap', 'spm_csfmap', 'spm_greymap_warped', 'spm_whitemap_warped',\ 'spm_csfmap_warped', 'spm_greymap_modulated', 'spm_whitemap_modulated', 'spm_csfmap_modulated']: t1image = cb.getfilepath('raw', att, cl.patterns) software = 'spm8' if datatype.startswith( 'spm') else 'morphologist' actions.append([ 'add_processing', { 'path': fp[len(rootdir) + 1:], 'inputfp': t1image[len(rootdir) + 1:], 'datatype': datatype, 'software': software } ]) elif datatype == 'measurements': actions.append([ 'add_measurements', { 'csvfile': fp[len(rootdir) + 1:], 'study': studyname } ]) print actions print len(actions), 'actions to make' ans = answer_yes or raw_input('proceed ? y/n') == 'y' if ans: print 'warning: erasing database contents' ans = answer_yes or raw_input('proceed ? y/n') == 'y' if ans: models.create_database(self.filepath, from_existing_repository=True) self.make_actions(actions)