def _project_ftp(objectId): # Get project from server. Project = Object.factory('Project') project = Project.Query.get(objectId=objectId) config = Config.get() ftp_path = config['ftpPath'] project_dir = config['projectDir'] ftp_project_path = os.path.join(ftp_path, project_dir, objectId) # Change ftp home directory to the project root. cmd = ( 'pure-pw usermod {} -d {} -m -f /etc/pure-ftpd/passwd/pureftpd.passwd' ).format(objectId, ftp_project_path) try: ftp_name = config['repoName'] + '_' + config['ftpService'] + '_1' client = docker.from_env() ftp = client.containers.get(ftp_name) # run command. out = ftp.exec_run(cmd) exit_code = out[0] if exit_code != 0: raise Exception('non-zero exit code on ftp user modification') except Exception as e: print('error occured while modifying ftp user {}'.format(objectId), file=sys.stderr) raise e return jsonify({'result': project.paths['root']})
def _sample_initialize(projId, objectId, name): # Get project from server. Project = Object.factory('Project') project = Project.Query.get(objectId=projId) config = Config.get() data_path = config['dataPath'] sample_dir = config['sampleDir'] sample_path = os.path.join(data_path, sample_dir, objectId) os.makedirs(sample_path, exist_ok=True) # Get analyses that apply to samples. sample_analyses = _get_analyses().filter(type='sample') paths = {} for analysis in sample_analyses: if analysis.code in project.paths: source_path = os.path.join(project.paths[analysis.code], name) target_path = os.path.join(sample_path, analysis.code) os.makedirs(source_path, exist_ok=True) rel_path = os.path.relpath(source_path, os.path.dirname(target_path)) print(rel_path, target_path, file=sys.stderr) os.symlink(rel_path, target_path, target_is_directory=True) paths[analysis.code] = source_path return jsonify({'result': {'paths': paths}})
def _project_reads(objectId): # Get project from server. Project = Object.factory('Project') project = Project.Query.get(objectId=objectId) extensions = Config.get()['readExtensions'] reads = {} for root, dirs, files in os.walk(project.paths['read']): for f in files: if f.endswith(tuple(extensions)): name = os.path.basename(root.replace(project.paths['read'], '')) # In case files are at the root. if name == '': name = '/' if name not in reads: reads[name] = [] path = os.path.join(root, f) size = os.path.getsize(path) reads[name].append({'path': path, 'size': size}) print(path, file=sys.stderr) return jsonify({'result': reads})
def _project_email(objectId, subject, message): """ Send mail with the given arguments. """ Project = Object.factory('Project') project = Project.Query.get(objectId=objectId) config = Config.get() host = config['host'] data_path = config['dataPath'] email_dir = config['emailDir'] email_path = os.path.join(data_path, email_dir) datetime = dt.datetime.now() url = 'http://{}/?id={}'.format(host, objectId) fr = 'AlaskaProject_{}@{}'.format(objectId, host) to = project.email format_dict = { 'message': message, 'objectId': objectId, 'url': url, 'host': host, 'password': project.ftpPassword, 'to': to, 'datetime': datetime.strftime('%Y-%m-%d %H:%M:%S') + ' PDT' } # Footer that is appended to every email. full_message = '\ <html> \ <head></head> \ <body> \ <p>{message}</p> \ <br> \ <hr> \ <p>Project ID: {objectId}<br> \ Unique URL: <a href="{url}">{url}</a><br> \ FTP server: {host}<br> \ FTP port: 21<br> \ FTP username: {objectId}<br> \ FTP password: {password}<br> \ This message was sent to {to} at {datetime}.<br> \ <b>Please do not reply to this email.</b></p> \ </body> \ </html> \ '.format(**format_dict) email = {'to': to, 'from': fr, 'subject': subject, 'message': full_message} email_file = '{}.json'.format(datetime) output_path = os.path.join(email_path, email_file) with open(output_path, 'w') as f: json.dump(email, f, indent=4) return jsonify({'result': email_file})
def _referenceBuild(reference): ''' Helper function that blocks until the given reference is built. ''' # Make sure the index hasn't been built yet. if reference.ready: return config = Config.get() index_image = config['indexImage'] data_volume = config['repoName'] + '_' + config['dataVolume'] data_path = config['dataPath'] script_volume = config['repoName'] + '_' + config['scriptVolume'] script_path = config['scriptPath'] script = config['indexScript'] network = config['repoName'] + '_' + config['backendNetworkName'] cpus = config['cpus'] # begin container variables cmd = 'python3 {} {}'.format(script, reference.objectId) volumes = { data_volume: { 'bind': data_path, 'mode': 'rw' }, script_volume: { 'bind': script_path, 'mode': 'rw' } } environment = { 'PARSE_HOSTNAME': PARSE_HOSTNAME, 'PARSE_APP_ID': PARSE_APP_ID, 'PARSE_MASTER_KEY': PARSE_MASTER_KEY, 'SENTRY_DSN': os.getenv('SENTRY_INDEX_DSN', ''), 'ENVIRONMENT': os.getenv('ENVIRONMENT', 'default') } wdir = script_path name = 'index-{}'.format(reference.objectId) print(cmd, volumes, wdir, file=sys.stderr) # Docker client. client = docker.from_env() index_container = client.containers.run(index_image, cmd, detach=False, stderr=True, auto_remove=True, volumes=volumes, working_dir=wdir, cpuset_cpus=cpus, network=network, environment=environment, name=name) index_container = None
def _project_initialize(objectId): config = Config.get() data_path = config['dataPath'] project_dir = config['projectDir'] read_dir = config['readDir'] ftp_path = config['ftpPath'] project_archive = config['projectArchive'] # Make directories. root_path = os.path.join(data_path, project_dir, objectId) read_path = os.path.join(root_path, read_dir) ftp_project_path = os.path.join(ftp_path, project_dir, objectId) ftp_read_path = os.path.join(ftp_project_path, read_dir) paths = {'root': root_path, 'read': read_path} # Make sure this is actually a new project. if os.path.exists(root_path): return jsonify({'error': 'root folder exists'}) for _, path in paths.items(): os.makedirs(path, exist_ok=True) # Make UPLOAD_HERE file upload_here = os.path.join(read_path, 'UPLOAD_HERE') with open(upload_here, 'w') as f: f.write('') # Make ftp user. # Generate random password passwd = _generate_password(5) # begin container variables cmd = ( '/bin/bash -c "chmod -R 0777 {} && (echo {}; echo {}) | pure-pw useradd {} -m -f /etc/pure-ftpd/passwd/pureftpd.passwd ' + '-u ftpuser -d {}"').format(ftp_project_path, passwd, passwd, objectId, ftp_read_path) print(cmd, file=sys.stderr) try: ftp_name = config['repoName'] + '_' + config['ftpService'] + '_1' client = docker.from_env() ftp = client.containers.get(ftp_name) # run command. out = ftp.exec_run(cmd) exit_code = out[0] if exit_code != 0: raise Exception('non-zero exit code on ftp user creation') except Exception as e: print('error occured while making ftp user {}'.format(objectId), file=sys.stderr) raise e return jsonify({'result': {'paths': paths, 'ftpPassword': passwd}})
def _project_sleuth(objectId, port): # Get project from server. Project = Object.factory('Project') project = Project.Query.get(objectId=objectId) # Check if there is a sleuth container open for this project. config = Config.get() data_volume = config['repoName'] + '_' + config['dataVolume'] data_path = config['dataPath'] script_volume = config['repoName'] + '_' + config['scriptVolume'] script_path = config['scriptPath'] network = config['repoName'] + '_' + config['backendNetworkName'] shiny_script = config['shinyScript'] so_path = project.files[config['diffDir']]['sleuth'] # Start a new docker container. cmd = 'Rscript {} -p {} --alaska'.format(shiny_script, so_path) volumes = { data_volume: { 'bind': data_path, 'mode': 'rw' }, script_volume: { 'bind': script_path, 'mode': 'rw' } } environment = { 'PARSE_HOSTNAME': PARSE_HOSTNAME, 'PARSE_APP_ID': PARSE_APP_ID, 'PARSE_MASTER_KEY': PARSE_MASTER_KEY } ports = {42427: port} wdir = script_path name = 'shiny-{}'.format(project.objectId) # Docker client. client = docker.from_env() container = client.containers.run(config['diffImage'], cmd, detach=True, auto_remove=True, volumes=volumes, working_dir=wdir, network=network, environment=environment, name=name, ports=ports) return jsonify( {'result': { 'containerId': container.id, 'containerName': name }})
def upload(project, host, username, password, fname): print('uploading project {}'.format(project.objectId)) archive_path = project.files['geo'] geo_dir = Config.get()['geoDir'] # Open a new FTP connection. try: with ftplib.FTP(host, username, password) as conn: conn.cwd(geo_dir) with open(archive_path, 'rb') as f: conn.storbinary('STOR {}'.format(fname), f) except Exception as e: raise Exception('error occured while uploading {}'.format( project.objectId))
def _sample_citation(objectId): # Get project from server. Sample = Object.factory('Sample') sample = Sample.Query.get(objectId=objectId) config = Config.get() genus = sample.reference.organism.genus species = sample.reference.organism.species ref_version = sample.reference.version arg = '-b {} --bias'.format(config['kallistoBootstraps']) if sample.readType == 'single': arg += ' --single -l {} -s {}'.format(sample.readLength, sample.readStd) format_dict = { 'genus': genus, 'species': species, 'ref_version': ref_version, 'arg': arg, **config } info = [ 'RNA-seq data was analyzed with the Alaska pipeline (alaska.caltech.edu).', ('Quality control was performed using using Bowtie2 (v{versionBowtie}), ' 'Samtools (v{versionSamtools}), RSeQC (v{versionRseqc}), ' 'FastQC (v{versionFastqc}), with results aggregated with ' 'MultiQC (v{versionMultiqc}).').format(**format_dict), ('Reads were aligned to the {genus} {species} genome version {ref_version} ' 'as provided by Wormbase using Kallisto (v{versionKallisto}) with the following ' 'flags: {arg}').format(**format_dict), ('Differential expression analyses with Sleuth (v{versionSleuth}) ' 'were performed using a Wald Test corrected for multiple-testing.' ).format(**format_dict) ] if genus == 'caenorhabditis' and species == 'elegans': info.append( 'Enrichment analysis was performed using the Wormbase Enrichment Suite.' ) return jsonify({'result': info})
def check_images(): index_image = Config.get()['indexImage'] Analysis = Object.factory('Analysis') images = list( analysis.image for analysis in Analysis.Query.filter(active=True)) + [index_image] client = docker.from_env() for image in images: print(image, flush=True) try: client.images.get(image) except Exception as e: capture_exception(e) print('error while checking image {}'.format(image)) sys.exit(1)
def _project_upload(project, host, username, password, geo_username): objectId = project.objectId with app.app_context(): try: _project_email( objectId, 'Submission started for project {}'.format(objectId), ('Alaska has started submitting project {} to the GEO. ' 'You may view the progress of your upload through the ' 'public GEO FTP.').format(objectId)) file = '{}_files.tar.gz'.format(geo_username) with configure_scope() as scope: scope.set_tag('upload', objectId) upload(project, host, username, password, file) # Once done, update progress. project.progress = 'uploaded' project.save() _project_email( objectId, 'Submission finished for project {}'.format(objectId), ('Alaska has finished submission of project {} to the GEO.<br>' 'Please fill out this form: <a href="mailto:{}">GEO submission form</a> ' 'with the following information:<br>' '1) Select <i>Notify GEO about your FTP file transfer</i><br>' '2) Select <i>Yes, all my data have finished transferring</i><br>' '3) The name of the uploaded file is: <strong>{}</strong><br>' '4) Select <i>New</i> as the submission kind.<br>' '5) Select your preferred release date.<br>' 'Failure to submit this form may result in the removal ' 'of your data!').format(objectId, Config.get()['geoForm'], file)) except Exception as e: project.progress = 'compiled' project.save() _project_email(objectId, 'Upload failed for project {}'.format( objectId ), ('Alaska encountered an error while uploading project {} to the GEO.' '<br>{}<br>' 'Please submit an issue on Github if ' 'this keeps happening.').format(objectId, str(e)))
def send_reset_email(): data = request.get_json() to = data['email'] fr = '*****@*****.**' datetime = dt.datetime.now() config = Config.get() host = config['host'] data_path = config['dataPath'] email_dir = config['emailDir'] email_path = os.path.join(data_path, email_dir) key = '' for i in range(24): key += str(random.choice(string.digits)) reset[key] = to url = 'http://{}/webhook/reset/verify/{}'.format(host, key) subject = 'Password reset verification for Alaska' message = ( '<html><head></head><body>' 'Please click on the following link to complete password reset.<br>' '<a href="{}">{}</a><br>' 'If you did not make this request, please do not click on the link.<br>' 'This message was sent to {} at {}.<br>' '<b>Please do not reply to this email.</b></body>').format( url, url, to, datetime.strftime('%Y-%m-%d %H:%M:%S')) email = {'to': to, 'from': fr, 'subject': subject, 'message': message} email_file = '{}.json'.format(datetime) output_path = os.path.join(email_path, email_file) with open(output_path, 'w') as f: json.dump(email, f, indent=4) return jsonify({'result': email_file})
def reset_notify(): data = request.get_json() to = data['email'] fr = '*****@*****.**' datetime = dt.datetime.now().strftime('%Y%m%d_%H%M%S') config = Config.get() host = config['host'] data_path = config['dataPath'] email_dir = config['emailDir'] email_path = os.path.join(data_path, email_dir) subject = 'Password reset for Alaska' message = 'Your password for Alaska has been reset.' email = {'to': to, 'from': fr, 'subject': subject, 'message': message} email_file = '{}.json'.format(datetime) output_path = os.path.join(email_path, email_file) with open(output_path, 'w') as f: json.dump(email, f, indent=4) return jsonify({'result': email_file})
def run_post(project, code='post', requires='diff'): print_with_flush('# starting post for project {}'.format(project.objectId)) organism = project.relation('samples').query()[0].reference.organism if organism.genus != 'caenorhabditis' or organism.species != 'elegans': print_with_flush('# Currently, post analysis is only supported for ' 'C. elegans') return config = Config.get() q_threshold = config['qThreshold'] tea_types = config['teaTypes'] diff_path = project.paths[requires] post_path = project.paths[code] for file in os.listdir(diff_path): file_name = os.path.splitext(os.path.basename(file))[0] file_path = os.path.join(diff_path, file) if file.startswith('betas') and file.endswith('.csv'): df = pd.read_csv(file_path, index_col=0) gene_list = df[df.qval < q_threshold].ens_gene # Skip if gene list is empty. if len(gene_list) == 0: print_with_flush( ('# there are no genes with q < {} in ' + '{}!').format( q_threshold, file)) print_with_flush('# this means there are no significantly ' + 'differentially-expressed genes for ' + 'this set of conditions.') continue for tea_type in tea_types: tea_file = '{}_{}'.format( file_name.replace('betas_wt', 'enrichment'), tea_type) tea_title = os.path.join(post_path, tea_file) print_with_flush( ('# performing {} enrichment analysis ' + 'for {}').format( tea_type, file)) df_dict = tea.fetch_dictionary(tea_type) df_results = tea.enrichment_analysis(gene_list, df_dict, aname=tea_title + '.csv', save=True, show=False) tea.plot_enrichment_results(df_results, analysis=tea_type, title=tea_title, save=True) # Archive. archive_path = archive(project, code) if code not in project.files: project.files[code] = {} project.files[code]['archive'] = archive_path project.save() print_with_flush('# done')
def wait(): # Get wait time. interval = Config.get()['workerInterval'] time.sleep(interval)
def start(): global container while True: # Dequeue job. job = dequeue() if job: try: project = job.project analysis = job.analysis print('Retrieved job {} for project {}'.format( job.objectId, project.objectId), flush=True) # Make directory if it doesn't exist. if analysis.code not in project.paths: path = os.path.join(project.paths['root'], analysis.code) os.makedirs(path, exist_ok=True) project.paths[analysis.code] = path project.save() # Also for each sample, if it needs one. if analysis.type == 'sample': samples = project.relation('samples').query() for sample in samples: if analysis.code not in sample.paths: path = os.path.join(project.paths[analysis.code], sample.name) os.makedirs(path, exist_ok=True) sample.paths[analysis.code] = path sample.save() config = Config.get() data_volume = config['repoName'] + '_' + config['dataVolume'] data_path = config['dataPath'] script_volume = config['repoName'] + '_' + config[ 'scriptVolume'] script_path = config['scriptPath'] network = config['repoName'] + '_' + config[ 'backendNetworkName'] cpus = config['cpus'] # begin container variables. cmd = 'python3 -u {} {} {}'.format(analysis.script, project.objectId, analysis.code) if getattr(analysis, 'requires', None) is not None: cmd += ' ' + analysis.requires.code if job.archive: cmd += ' --archive' volumes = { data_volume: { 'bind': data_path, 'mode': 'rw' }, script_volume: { 'bind': script_path, 'mode': 'rw' } } environment = { 'PARSE_HOSTNAME': PARSE_HOSTNAME, 'PARSE_APP_ID': PARSE_APP_ID, 'PARSE_MASTER_KEY': PARSE_MASTER_KEY, 'ENVIRONMENT': os.getenv('ENVIRONMENT', 'default'), 'SENTRY_QC_DSN': os.getenv('SENTRY_QC_DSN', ''), 'SENTRY_QUANT_DSN': os.getenv('SENTRY_QUANT_DSN', ''), 'SENTRY_DIFF_DSN': os.getenv('SENTRY_DIFF_DSN', ''), 'SENTRY_POST_DSN': os.getenv('SENTRY_POST_DSN', '') } wdir = script_path name = '{}-{}'.format(analysis.code, project.objectId) # output path. output_file = '{}_output.txt'.format(analysis.code) output_path = os.path.join(project.paths[analysis.code], output_file) job.outputPath = output_path start = time.time() job.save() # Remove output file if it already exists. if os.path.exists(output_path): os.remove(output_path) progress = config['progress'] key = analysis.code + '_started' if key in progress: project.oldProgress = progress[key] project.save() # Docker client. client = docker.from_env() container = client.containers.run(analysis.image, cmd, detach=True, auto_remove=True, volumes=volumes, working_dir=wdir, cpuset_cpus=cpus, network=network, environment=environment, name=name) print('started container with id {} and name {}'.format( container.id, name)) hook = container.logs(stdout=True, stderr=True, stream=True) for line in hook: decoded = line.decode('utf-8').strip().encode( 'ascii', 'ignore').decode('ascii') if '\n' in decoded: outs = decoded.split('\n') else: outs = [decoded] for out in outs: # Detect commands. if out.startswith('##'): job.commands.append(out.strip('# ')) job.save() # Save output. print(out, flush=True) with open(output_path, 'a') as f: f.write(out + '\n') # Container finished. exitcode = container.wait()['StatusCode'] runtime = time.time() - start if exitcode != 0: log = container.attach(stdout=True, stderr=True, stream=False, logs=True) msg = 'container {} exited with code {}\n{}'.format( name, exitcode, log) raise Exception(msg) else: print('{} success'.format(container.name)) Function('jobSuccess')(objectId=job.objectId, runtime=runtime) continue except Exception as e: capture_exception(e) print(traceback.format_exc(), file=sys.stderr, flush=True) # Notify that there was an error. Function('jobError')(objectId=job.objectId) finally: container = None # Wait. wait()
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Build indices.') parser.add_argument('objectId', type=str, help='objectId of the reference for which to build ' + 'the index') args = parser.parse_args() objectId = args.objectId with configure_scope() as scope: scope.user = {'id': objectId} # Get number of threads. config = Config.get() nthreads = config['threads'] # Get reference object. Reference = Object.factory('Reference') reference = Reference.Query.get(objectId=objectId) # Build bowtie2 index. build_bowtie2(reference, nthreads) # Build kallisto index. build_kallisto(reference, nthreads) # Success. This reference is ready to be used. reference.ready = True reference.save()
def _project_citation(objectId): # Get project from server. Project = Object.factory('Project') project = Project.Query.get(objectId=objectId) config = Config.get() citation_file = config['citationFile'] citation_path = os.path.join(project.paths['root'], citation_file) samples = project.relation('samples').query() args = '' genus = '' species = '' ref_version = '' for sample in samples: genus = sample.reference.organism.genus species = sample.reference.organism.species ref_version = sample.reference.version arg = '-b {} --bias'.format(config['kallistoBootstraps']) if sample.readType == 'single': arg += ' --single -l {} -s {}'.format(sample.readLength, sample.readStd) args += '{}({}):\t{}.\n'.format(sample.objectId, sample.name, arg) format_dict = { 'factor': str(len(project.factors)), 'genus': genus.capitalize(), 'species': species, 'ref_version': ref_version, 'args': args, 'datetime': project.createdAt, 'id': project.objectId, 'n_samples': len(project.relation('samples').query()), **config } info = ('alaska_info.txt for {id}\n' 'This project was created on {datetime} PST with ' '{n_samples} samples.\n\n').format(**format_dict) info += ( 'RNA-seq data was analyzed with Alaska using the ' '{factor}-factor design option.\nBriefly, Alaska ' 'performs quality control using\nBowtie2 (v{versionBowtie}), ' 'Samtools (v{versionSamtools}), RSeQC (v{versionRseqc}), ' 'FastQC (v{versionFastqc}) and outputs\n' 'a summary report generated using MultiQC (v{versionMultiqc}). Read ' 'quantification and\ndifferential expression analysis of ' 'transcripts were performed using\nKallisto (v{versionKallisto}) ' 'and Sleuth (v{versionSleuth}), respectively. ' 'Kallisto (v{versionKallisto}) was run using the\nfollowing flags for each ' 'sample:\n{args}\n' 'Reads were aligned using\n{genus} {species} genome ' 'version {ref_version}\nas provided by Wormbase.\n\n' 'Differential expression analyses with Sleuth (v{versionSleuth}) were ' 'performed using a\nWald Test corrected for ' 'multiple-testing.\n\n').format(**format_dict) # Add more info if enrichment analysis was performed. if genus == 'caenorhabditis' and species == 'elegans': info += ('Enrichment analysis was performed using the WormBase ' 'Enrichment Suite:\n' 'https://doi.org/10.1186/s12859-016-1229-9\n' 'https://www.wormbase.org/tools/enrichment/tea/tea.cgi\n') # if self.epistasis: # info += ('Alaska performed epistasis analyses as first ' # 'presented in\nhttps://doi.org/10.1073/pnas.1712387115\n') with open(citation_path, 'w') as f: f.write(info) project.files['citation'] = citation_path project.save() return jsonify({'result': info})
PARSE_APP_ID = os.getenv('PARSE_APP_ID', 'alaska') PARSE_MASTER_KEY = os.getenv('PARSE_MASTER_KEY', 'MASTER_KEY') print(PARSE_HOSTNAME, PARSE_APP_ID, PARSE_MASTER_KEY) # Setup for parse_rest os.environ["PARSE_API_ROOT"] = PARSE_HOSTNAME from parse_rest.config import Config from parse_rest.datatypes import Function, Object, GeoPoint from parse_rest.connection import register, SessionToken from parse_rest.query import QueryResourceDoesNotExist from parse_rest.connection import ParseBatcher from parse_rest.core import ResourceRequestBadRequest, ParseError register(PARSE_APP_ID, '', master_key=PARSE_MASTER_KEY) sys.path.append(Config.get()['scriptPath']) from compile import compile from upload import upload compiling = {} uploading = {} index_container = None def sigterm_handler(signal, frame): print('SIGTERM received', file=sys.stderr, flush=True) print(compiling, uploading, file=sys.stderr, flush=True) Project = Object.factory('Project') for objectId, t in compiling.items():
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Perform qc.') parser.add_argument('objectId', type=str) parser.add_argument('code', type=str, default='qc') parser.add_argument('--archive', action='store_true') args = parser.parse_args() objectId = args.objectId with configure_scope() as scope: scope.user = {'id': objectId} # Get number of threads. config = Config.get() nthreads = config['threads'] code = args.code # Get project with specified objectId. Project = Object.factory('Project') project = Project.Query.get(objectId=objectId) # Run QC run_qc(project, code=code, nthreads=nthreads) # If archive = true: if args.archive: archive_path = archive_project(project, Config.get()['projectArchive'])
def organismNew(): ''' Method to scan for new organisms. ''' print('scanning for new organisms', file=sys.stderr) config = Config.get() data_path = config['dataPath'] reference_dir = config['referenceDir'] kallisto_dir = config['kallistoIndexDir'] bowtie_dir = config['bowtieIndexDir'] organism_dir = config['organismDir'] organism_path = os.path.join(data_path, organism_dir) # Make the directory in case it doesn't exist. os.makedirs(organism_path, exist_ok=True) organisms = Function('getOrganismsDict')()['result'] for genus in os.listdir(organism_path): genus_path = os.path.join(organism_path, genus) if not os.path.isdir(genus_path): continue for species in os.listdir(genus_path): species_path = os.path.join(genus_path, species) if not os.path.isdir(species_path): continue for version in os.listdir(species_path): version_path = os.path.join(species_path, version) reference_path = os.path.join(version_path, reference_dir) if not os.path.isdir(reference_path): continue # Make new organism. Organism = Object.factory('Organism') if genus not in organisms or species not in organisms[genus]: organism = Organism(genus=genus, species=species, path=species_path) organism.save() if genus not in organisms: organisms[genus] = {} if species not in organisms[genus]: organisms[genus][species] = organism else: # Otherwise, the organism already exists. found = Organism.Query.filter(genus=genus, species=species) assert (len(found) == 1) organism = found[0] # Get all reference versions. references = organism.relation('references').query() versions = [reference.version for reference in references] if version not in versions: # Get reference files. bed = None annotation = None cdna = None dna = None for fname in os.listdir(reference_path): path = os.path.join(reference_path, fname) if fname.endswith('.bed'): bed = path elif '_annotation' in fname: annotation = path elif '_cdna' in fname: cdna = path elif '_dna' in fname: dna = path if bed and annotation and cdna and dna: print('found {}-{}-{}'.format(genus, species, version), file=sys.stderr) index_prefix = '{}_{}_{}'.format( genus, species, version) kallisto_index_name = index_prefix + '.idx' kallisto_index_path = os.path.join( version_path, kallisto_dir, kallisto_index_name) bowtie_index_path = os.path.join( version_path, bowtie_dir, index_prefix) # Paths. paths = { 'root': version_path, 'dna': dna, 'cdna': cdna, 'bed': bed, 'annotation': annotation, 'kallistoIndex': kallisto_index_path, 'bowtieIndex': bowtie_index_path } # Make new reference. Reference = Object.factory('Reference') reference = Reference(version=version, organism=organism, paths=paths, indexBuilt=False, ready=False) reference.save() organism.relation('references').add([reference]) return jsonify({'status': 'done'})