def project_experiment_subset_pending(dbs, confs): """Return a subset of pending experiments for a project.""" confs['configurations'][0]['hgversion'] = 'hg19' dashboard = rnadashboard_results_pending(dbs, confs) grape = _project_experimentstable_experiments(dbs, confs, raw=True, where=True) meta = get_experiment_dict(confs) parameter_labels = confs['request'].environ['parameter_labels'] chart = {} description = [('Replicate', 'string'), ('Lab', 'string'), ('Cell Type', 'string'), ('Localization', 'string'), ('RNA Type', 'string'), ('Read Length', 'string'), ('Paired', 'string'), ] results = [] grape_set = set(grape.keys()) dashboard_set = set(dashboard.keys()) for key in dashboard_set.difference(grape_set): item = dashboard[key] item['RNA Type'] = item['RNA Extract Id'] item['Localization'] = item['Localization Id'] item['Lab'] = item['Replicate Lab'] filter_out = False index = 0 for parameter in meta['parameter_list']: if parameter in parameter_labels: value = item[parameter_labels[parameter][0]] else: value = None if value != meta['parameter_values'][index]: filter_out = True index += 1 if not filter_out: results.append((key, item['Replicate Lab'], item['Cell Type'], item['Localization'], item['RNA Extract'], item['Read Length'], item['Paired'])) chart['table_description'] = description if len(results) == 0: results = [(None,) * len(description)] chart['table_data'] = results return chart
def experiment_replicates(dbs, confs): """Compile the list of replicates for the experiment""" description = [('Project Id', 'string'), ('Parameter List', 'string'), ('Parameter Values', 'string'), ('Replicate Id', 'string'), ('Replicate Url', 'string'), ] chart = {} chart['table_description'] = description projectid = confs['kwargs']['projectid'] parameter_list = confs['kwargs']['parameter_list'] parameter_values = confs['kwargs']['parameter_values'] meta = get_experiment_dict(confs) # Only return the experiment infos if this is an official project sql = """ select experiment_id from experiments %s order by experiment_id;""" % get_experiment_where(confs, meta) cursor = dbs[projectid]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() replicateids = [row[0] for row in rows] results = [] url = '/project/%s/%s/%s/replicate/%s' for replicateid in replicateids: results.append((projectid, parameter_list, parameter_values, replicateid, url % (projectid, parameter_list, parameter_values, replicateid), ) ) chart['table_data'] = results return chart
def experiment_replicates(dbs, confs): """Compile the list of replicates for the experiment""" description = [ ('Project Id', 'string'), ('Parameter List', 'string'), ('Parameter Values', 'string'), ('Replicate Id', 'string'), ('Replicate Url', 'string'), ] chart = {} chart['table_description'] = description projectid = confs['kwargs']['projectid'] parameter_list = confs['kwargs']['parameter_list'] parameter_values = confs['kwargs']['parameter_values'] meta = get_experiment_dict(confs) # Only return the experiment infos if this is an official project sql = """ select experiment_id from experiments %s order by experiment_id;""" % get_experiment_where(confs, meta) cursor = dbs[projectid]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() replicateids = [row[0] for row in rows] results = [] url = '/project/%s/%s/%s/replicate/%s' for replicateid in replicateids: results.append(( projectid, parameter_list, parameter_values, replicateid, url % (projectid, parameter_list, parameter_values, replicateid), )) chart['table_data'] = results return chart
def project_experiment_subset_start(dbs, confs): """XXX This is not used yet The idea is to use this as a start for searching the parameter space of a project. """ experimentids = _project_experimentstable_experiments(dbs, confs, raw=True, where=True) conf = confs['configurations'][0] projectid = conf['projectid'] meta = get_experiment_dict(confs) parameter_labels = confs['request'].environ['parameter_labels'] variations = {} variation_count = {} for experiment_list in experimentids.values(): for parameter in meta['parameter_list']: if parameter in variation_count: variation_count[parameter].append(experiment_list[0][parameter]) else: variation_count[parameter] = [experiment_list[0][parameter]] for experiment in experiment_list: if parameter in experiment: if parameter in variations: variations[parameter].add(experiment[parameter]) else: variations[parameter] = set([experiment[parameter]]) links = [] for parameter in meta['parameter_list']: for variation in variations[parameter]: link = (confs['kwargs']['parameter_list'], parameter_labels[parameter][0], variation, parameter, ) links.append(link) chart = {} description = [('Project', 'string'), ('Parameter Names', 'string'), ('Parameter Values', 'string'), ('Parameter Type', 'string'), ('Parameter Value', 'string'), ('Replicates for this Parameter Value', 'string'), ] chart['table_description'] = description chart['table_data'] = [] for names, name, value, subset in links: chart['table_data'].append((projectid, names, name, str(value), str(variation_count[subset].count(value)))) if len(chart['table_data']) == 0: chart['table_data'].append([None] * len(chart['table_description'])) return chart
def project_experiment_subset_selection(dbs, confs): """XXX Needs refactoring""" experimentids = _project_experimentstable_experiments(dbs, confs, raw=True, where=True) conf = confs['configurations'][0] projectid = conf['projectid'] meta = get_experiment_dict(confs) parameter_mapping = confs['request'].environ['parameter_mapping'] parameter_labels = confs['request'].environ['parameter_labels'] subsets = [] supersets = [] for parameter in parameter_mapping[projectid]: if parameter in meta['parameter_list']: if parameter in meta: supersets.append(parameter) else: if not parameter in meta: subsets.append(parameter) variations = {} variation_count = {} for experiment_list in experimentids.values(): for parameter in parameter_mapping[projectid]: if parameter in variation_count: variation_count[parameter].append(experiment_list[0][parameter]) else: variation_count[parameter] = [experiment_list[0][parameter]] for experiment in experiment_list: if parameter in experiment: if parameter in variations: variations[parameter].add(experiment[parameter]) else: variations[parameter] = set([experiment[parameter]]) links = [] for subset in subsets: # If there is variation for this subset, add links if not subset in variations: continue if len(variations[subset]) < 2: continue for variation in variations[subset]: link = ('%s-%s' % (confs['kwargs']['parameter_list'], subset), '%s-%s' % (confs['kwargs']['parameter_values'], variation), parameter_labels[subset][0], variation, subset, ) links.append(link) chart = {} description = [('Project', 'string'), ('Parameter Names', 'string'), ('Parameter Values', 'string'), ('Parameter Type', 'string'), ('Parameter Value', 'string'), ('Replicates for this Parameter Value', 'string'), ] chart['table_description'] = description chart['table_data'] = [] for names, values, name, value, subset in links: chart['table_data'].append((projectid, names, values, name, str(value), str(variation_count[subset].count(value)))) if len(chart['table_data']) == 0: chart['table_data'].append([None] * len(chart['table_description'])) return chart
def _project_experimentstable_experiments(dbs, confs, raw=True, where=False): """Return a list of experiments for a project.""" conf = confs['configurations'][0] # Only return the experiment infos if this is an official project sql = """ select experiment_id, species_info.species, genome_files.genome, genome_files.location, genome_files.assembly, genome_files.gender, annotation_files.annotation, annotation_files.location, annotation_files.version, template_file, read_length, mismatches, exp_description, expDate, CellType, RNAType, Compartment, Bioreplicate, partition, annotation_version, lab, paired from experiments, species_info, genome_files, annotation_files """ if where: meta = get_experiment_dict(confs) sql = """%s %s and """ % (sql, get_experiment_where(confs, meta)) else: sql = """%s where project_id = '%s' and """ % (sql, conf['projectid']) sql = """%s experiments.species_id = species_info.species_id and experiments.genome_id = genome_files.genome_id and experiments.annotation_id = annotation_files.annotation_id """ % sql sql = """%s %s""" % (sql, get_experiment_order_by(confs)) cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() experimentids = {} rna_extracts = get_rna_extract_display_mapping(dbs) cells = get_cell_display_mapping(dbs) localizations = get_localization_display_mapping(dbs) for row in rows: meta = {} meta['projectid'] = conf['projectid'] meta['read_length'] = row[10] meta['cell'] = row[14] meta['rnaExtract'] = row[15] meta['localization'] = row[16] meta['bio_replicate'] = row[17] meta['partition'] = row[18] meta['annotation_version'] = row[19] meta['lab'] = row[20] meta['paired'] = row[21] if not meta['paired'] is None: meta['paired'] = ord(meta['paired']) meta['parameter_list'] = get_parameter_list(confs) meta['parameter_values'] = get_parameter_values(confs, meta) if not raw: get_experiment_labels(meta, rna_extracts, cells, localizations) if meta['parameter_values'] in experimentids: experimentids[meta['parameter_values']].append(meta) else: experimentids[meta['parameter_values']] = [meta] return experimentids
def experiment_info(dbs, confs): """XXX Needs refactoring""" conf = confs['configurations'][0] chart = {} chart['table_description'] = [('Read Length', 'number'), ('Mismatches', 'number'), ('Description', 'string'), ('Date', 'string'), ('Cell Type', 'string'), ('RNA Type', 'string'), ('Localization', 'string'), ('Bio Replicate', 'string'), ('Partition', 'string'), ('Paired', 'number'), ('Species', 'string'), ('Annotation Version', 'string'), ('Annotation Source', 'string'), ('Genome Assembly', 'string'), ('Genome Source', 'string'), ('Genome Gender', 'string'), ] conf = confs['configurations'][0] meta = get_experiment_dict(confs) result = [] sql = """ select experiment_id, project_id, species_id, genome_id, annotation_id, template_file, read_length, mismatches, exp_description, expDate, CellType, RNAType, Compartment, Bioreplicate, partition, paired from experiments %s order by experiment_id;""" % get_experiment_where(confs, meta) cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() if not rows: chart['table_data'] = [[None] * len(chart['table_description'])] return chart species_id = rows[0][2] genome_id = rows[0][3] annotation_id = rows[0][4] result.append(int(rows[0][6])) result.append(int(rows[0][7])) result.append(rows[0][8]) result.append(str(rows[0][9])) # Use labels instead of the raw values mapping = get_cell_display_mapping(dbs) result.append(mapping.get(rows[0][10], rows[0][10])) mapping = get_rna_extract_display_mapping(dbs) result.append(mapping.get(rows[0][11], rows[0][11])) mapping = get_localization_display_mapping(dbs) result.append(mapping.get(rows[0][12], rows[0][12])) result.append(rows[0][13]) result.append(rows[0][14]) result.append(rows[0][15]) if not result[-1] is None: result[-1] = ord(result[-1]) sql = """ select species_id, species, genus, sp_alias, abbreviation from species_info where species_id='%s' """ % species_id cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() result.append(rows[0][1]) sql = """ select annotation_id, species_id, annotation, location, version, source from annotation_files where annotation_id='%s' """ % annotation_id cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() result.append(rows[0][4]) result.append(rows[0][5]) sql = """ select genome_id, species_id, genome, location, assembly, source, gender from genome_files where genome_id='%s' """ % genome_id cursor = dbs[conf['projectid']]['RNAseqPipelineCommon'].query(sql) rows = cursor.fetchall() cursor.close() result.append(rows[0][4]) result.append(rows[0][5]) result.append(rows[0][6]) chart['table_data'] = [result, ] return chart