def _build_study_info(user, search_type, study_proc=None, proc_samples=None): """Builds list of dicts for studies table, with all HTML formatted Parameters ---------- user : User object logged in user search_type : choice, ['user', 'public'] what kind of search to perform study_proc : dict of lists, optional Dictionary keyed on study_id that lists all processed data associated with that study. Required if proc_samples given. proc_samples : dict of lists, optional Dictionary keyed on proc_data_id that lists all samples associated with that processed data. Required if study_proc given. Returns ------- infolist: list of dict of lists and dicts study and processed data info for JSON serialiation for datatables Each dict in the list is a single study, and contains the text Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ build_samples = False # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') elif study_proc is None: build_samples = True # get list of studies for table if search_type == 'user': user_study_set = user.user_studies.union(user.shared_studies) if user.level == 'admin': user_study_set = (user_study_set | Study.get_by_status('sandbox') | Study.get_by_status('private')) study_set = user_study_set - Study.get_by_status('public') elif search_type == 'public': study_set = Study.get_by_status('public') else: raise ValueError('Not a valid search type') if study_proc is not None: study_set = study_set.intersection(study_proc) if not study_set: # No studies left so no need to continue return [] return generate_study_list([s.id for s in study_set], build_samples, public_only=(search_type == 'public'))
def _build_study_info(studytype, user=None): """builds list of namedtuples for study listings""" if studytype == "private": studylist = user.user_studies elif studytype == "shared": studylist = user.shared_studies elif studytype == "public": studylist = Study.get_by_status('public') else: raise IncompetentQiitaDeveloperError("Must use private, shared, " "or public!") StudyTuple = namedtuple( 'StudyInfo', 'id title meta_complete ' 'num_samples_collected shared num_raw_data pi ' 'pmids owner status') infolist = [] for s_id in studylist: study = Study(s_id) status = study.status # Just passing the email address as the name here, since # name is not a required field in qiita.qiita_user owner = study_person_linkifier((study.owner, study.owner)) info = study.info PI = StudyPerson(info['principal_investigator_id']) PI = study_person_linkifier((PI.email, PI.name)) pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) shared = _get_shared_links_for_study(study) infolist.append( StudyTuple(study.id, study.title, info["metadata_complete"], info["number_samples_collected"], shared, len(study.raw_data()), PI, pmids, owner, status)) return infolist
def _build_study_info(studytype, user=None): """builds list of namedtuples for study listings""" if studytype == "private": studylist = user.user_studies elif studytype == "shared": studylist = user.shared_studies elif studytype == "public": studylist = Study.get_by_status('public') else: raise IncompetentQiitaDeveloperError("Must use private, shared, " "or public!") StudyTuple = namedtuple('StudyInfo', 'id title meta_complete ' 'num_samples_collected shared num_raw_data pi ' 'pmids owner status') infolist = [] for s_id in studylist: study = Study(s_id) status = study.status # Just passing the email address as the name here, since # name is not a required field in qiita.qiita_user owner = study_person_linkifier((study.owner, study.owner)) info = study.info PI = StudyPerson(info['principal_investigator_id']) PI = study_person_linkifier((PI.email, PI.name)) pmids = ", ".join([pubmed_linkifier([pmid]) for pmid in study.pmids]) shared = _get_shared_links_for_study(study) infolist.append(StudyTuple(study.id, study.title, info["metadata_complete"], info["number_samples_collected"], shared, len(study.raw_data()), PI, pmids, owner, status)) return infolist
def get(self): stats = yield Task(self._get_stats) # Pull a random public study from the database public_studies = Study.get_by_status('public') study = choice(list(public_studies)) if public_studies else None if study is None: random_study_info = None random_study_title = None random_study_id = None else: random_study_info = study.info random_study_title = study.title random_study_id = study.id self.render('stats.html', number_studies=stats['number_studies'], number_of_samples=stats['number_of_samples'], num_users=stats['num_users'], lat_longs=eval( stats['lat_longs']) if stats['lat_longs'] else [], num_studies_ebi=stats['num_studies_ebi'], num_samples_ebi=stats['num_samples_ebi'], number_samples_ebi_prep=stats['number_samples_ebi_prep'], img=stats['img'], time=stats['time'], num_processing_jobs=stats['num_processing_jobs'], random_study_info=random_study_info, random_study_title=random_study_title, random_study_id=random_study_id)
def get(self): stats = yield Task(self._get_stats) # Pull a random public study from the database public_studies = Study.get_by_status('public') study = choice(list(public_studies)) if public_studies else None if study is None: random_study_info = None random_study_title = None random_study_id = None else: random_study_info = study.info random_study_title = study.title random_study_id = study.id self.render('stats.html', number_studies=stats['number_studies'], number_of_samples=stats['number_of_samples'], num_users=stats['num_users'], lat_longs=eval( stats['lat_longs']) if stats['lat_longs'] else [], num_studies_ebi=stats['num_studies_ebi'], num_samples_ebi=stats['num_samples_ebi'], number_samples_ebi_prep=stats['number_samples_ebi_prep'], img=stats['img'], time=stats['time'], random_study_info=random_study_info, random_study_title=random_study_title, random_study_id=random_study_id)
def get(self): num_studies, num_samples, num_users, lat_longs = \ yield Task(self._get_stats) # Pull a random public study from the database public_studies = Study.get_by_status('public') study = choice(list(public_studies)) if public_studies else None if study is None: random_study_info = None random_study_title = None random_study_id = None else: random_study_info = study.info random_study_title = study.title random_study_id = study.id self.render('stats.html', num_studies=num_studies, num_samples=num_samples, num_users=num_users, lat_longs=lat_longs, random_study_info=random_study_info, random_study_title=random_study_title, random_study_id=random_study_id)
def test_get_by_status(self): obs = Study.get_by_status('sandbox') self.assertEqual(obs, set()) Study.create(User('*****@*****.**'), 'NOT Identification of the ' 'Microbiomes for Cannabis Soils', [1], self.info) obs = Study.get_by_status('private') self.assertEqual(obs, {1}) obs = Study.get_by_status('sandbox') self.assertEqual(obs, {2}) obs = Study.get_by_status('public') self.assertEqual(obs, set()) obs = Study.get_by_status('awaiting_approval') self.assertEqual(obs, set())
def test_get_by_status(self): obs = Study.get_by_status("sandbox") self.assertEqual(obs, set()) Study.create( User("*****@*****.**"), "NOT Identification of the " "Microbiomes for Cannabis Soils", [1], self.info ) obs = Study.get_by_status("private") self.assertEqual(obs, {1}) obs = Study.get_by_status("sandbox") self.assertEqual(obs, {2}) obs = Study.get_by_status("public") self.assertEqual(obs, set()) obs = Study.get_by_status("awaiting_approval") self.assertEqual(obs, set())
def get(self): user = self.current_user if user.level != 'admin': raise HTTPError(403, 'User %s is not admin' % self.current_user) parsed_studies = [] for sid in Study.get_by_status('awaiting_approval'): study = Study(sid) parsed_studies.append((study.id, study.title, study.owner)) self.render('admin_approval.html', study_info=parsed_studies)
def __call__(self, searchstr, user): """Runs a Study query and returns matching studies and samples Parameters ---------- searchstr : str Search string to use user : User object User making the search. Needed for permissions checks. Returns ------- dict Found samples in format {study_id: [[samp_id1, meta1, meta2, ...], [samp_id2, meta1, meta2, ...], ...} list metadata column names searched for Notes ----- Metadata information for each sample is in the same order as the metadata columns list returned Metadata column names and string searches are case-sensitive """ with TRN: study_sql, sample_sql, meta_headers = \ self._parse_study_search_string(searchstr, True) # get all studies containing the metadata headers requested TRN.add(study_sql) study_ids = set(TRN.execute_fetchflatten()) # strip to only studies user has access to if user.level not in {'admin', 'dev', 'superuser'}: study_ids = study_ids.intersection( Study.get_by_status('public') | user.user_studies | user.shared_studies) results = {} # run search on each study to get out the matching samples for sid in study_ids: TRN.add(sample_sql.format(sid)) study_res = TRN.execute_fetchindex() if study_res: # only add study to results if actually has samples # in results results[sid] = study_res self.results = results self.meta_headers = meta_headers return results, meta_headers
def __call__(self, searchstr, user): """Runs a Study query and returns matching studies and samples Parameters ---------- searchstr : str Search string to use user : User object User making the search. Needed for permissions checks. Returns ------- dict Found samples in format {study_id: [[samp_id1, meta1, meta2, ...], [samp_id2, meta1, meta2, ...], ...} list metadata column names searched for Notes ----- Metadata information for each sample is in the same order as the metadata columns list returned Metadata column names and string searches are case-sensitive """ study_sql, sample_sql, meta_headers = \ self._parse_study_search_string(searchstr, True) conn_handler = SQLConnectionHandler() # get all studies containing the metadata headers requested study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)} # strip to only studies user has access to if user.level not in {'admin', 'dev', 'superuser'}: study_ids = study_ids.intersection( Study.get_by_status('public') + user.user_studies + user.shared_studies) results = {} # run search on each study to get out the matching samples for sid in study_ids: study_res = conn_handler.execute_fetchall(sample_sql.format(sid)) if study_res: # only add study to results if actually has samples in results results[sid] = study_res return results, meta_headers
def __call__(self, searchstr, user): """Runs a Study query and returns matching studies and samples Parameters ---------- searchstr : str Search string to use user : str User making the search. Needed for permissions checks. Returns ------- dict Found samples in format {study_id: [[samp_id1, meta1, meta2, ...], [samp_id2, meta1, meta2, ...], ...} list metadata column names searched for Notes ----- Metadata information for each sample is in the same order as the metadata columns list returned Metadata column names and string searches are case-sensitive """ study_sql, sample_sql, meta_headers = \ self._parse_study_search_string(searchstr, True) conn_handler = SQLConnectionHandler() # get all studies containing the metadata headers requested study_ids = {x[0] for x in conn_handler.execute_fetchall(study_sql)} # strip to only studies user has access to userobj = User(user) study_ids = study_ids.intersection(Study.get_by_status('public') + userobj.user_studies + userobj.shared_studies) results = {} # run search on each study to get out the matching samples for sid in study_ids: study_res = conn_handler.execute_fetchall(sample_sql.format(sid)) if study_res: # only add study to results if actually has samples in results results[sid] = study_res return results, meta_headers
def get(self): num_studies, num_samples, num_users, lat_longs = \ yield Task(self._get_stats) # Pull a random public study from the database public_studies = Study.get_by_status('public') study = Study(choice(list(public_studies))) if public_studies else None if study is None: random_study_info = None random_study_title = None random_study_id = None else: random_study_info = study.info random_study_title = study.title random_study_id = study.id self.render('stats.html', num_studies=num_studies, num_samples=num_samples, num_users=num_users, lat_longs=lat_longs, random_study_info=random_study_info, random_study_title=random_study_title, random_study_id=random_study_id)
def _build_study_info(user, study_proc=None, proc_samples=None): """Builds list of dicts for studies table, with all HTML formatted Parameters ---------- user : User object logged in user study_proc : dict of lists, optional Dictionary keyed on study_id that lists all processed data associated with that study. Required if proc_samples given. proc_samples : dict of lists, optional Dictionary keyed on proc_data_id that lists all samples associated with that processed data. Required if study_proc given. Returns ------- infolist: list of dict of lists and dicts study and processed data info for JSON serialiation for datatables Each dict in the list is a single study, and contains the text Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ build_samples = False # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') elif study_proc is None: build_samples = True # get list of studies for table study_set = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) if study_proc is not None: study_set = study_set.intersection(study_proc) if not study_set: # No studies left so no need to continue return [] cols = ['study_id', 'email', 'principal_investigator_id', 'publication_doi', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info([s.id for s in study_set], cols) # get info for the studies infolist = [] for info in study_info: # Convert DictCursor to proper dict info = dict(info) study = Study(info['study_id']) # Build the processed data info for the study if none passed if build_samples: proc_data_list = [ar for ar in study.artifacts() if ar.artifact_type == 'BIOM'] proc_samples = {} study_proc = {study.id: defaultdict(list)} for proc_data in proc_data_list: study_proc[study.id][proc_data.data_type].append(proc_data.id) # there is only one prep template for each processed data proc_samples[proc_data.id] = proc_data.prep_templates[0].keys() study_info = _build_single_study_info(study, info, study_proc, proc_samples) infolist.append(study_info) return infolist
parameters = {'url': ['string', None], 'private_key': ['string', None], 'study_id': ['integer', None]} create_command(qiita_plugin, "list_remote_files", "retrieves list of valid study files from remote dir", parameters) # Create the 'download_remote_files' command parameters = {'url': ['string', None], 'destination': ['string', None], 'private_key': ['string', None]} create_command(qiita_plugin, "download_remote_files", "downloads valid study files from remote dir", parameters) # August 31, 2018 # Strip any UTF-8 characters that are not also printable ASCII characters # from study titles. As some analysis packages cannot interpret UTF-8 # characters, it becomes important to remove them from study titles, as # they are used as metadata/identifiers when creating new analyses. # insert new status_types into list, or replace w/a call to an appropriate # method. status_types = ['awaiting_approval', 'sandbox', 'private', 'public'] for status_type in status_types: for study in Study.get_by_status(status_type): new_title = sub(r'[^\x20-\x7E]+', '', study.title) if new_title != study.title: study.title = new_title
def _build_study_info(user, results=None): """builds list of dicts for studies table, with all html formatted""" # get list of studies for table study_list = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) if results is not None: study_list = study_list.intersection(results) if not study_list: # No studies left so no need to continue return [] # get info for the studies cols = ['study_id', 'email', 'principal_investigator_id', 'pmid', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info(study_list, cols) infolist = [] for row, info in enumerate(study_info): study = Study(info['study_id']) status = study.status # Just passing the email address as the name here, since # name is not a required field in qiita.qiita_user PI = StudyPerson(info['principal_investigator_id']) PI = study_person_linkifier((PI.email, PI.name)) if info['pmid'] is not None: pmids = ", ".join([pubmed_linkifier([p]) for p in info['pmid']]) else: pmids = "" if info["number_samples_collected"] is None: info["number_samples_collected"] = "0" shared = _get_shared_links_for_study(study) meta_complete_glyph = "ok" if info["metadata_complete"] else "remove" # build the HTML elements needed for table cell title = ("<a href='#' data-toggle='modal' " "data-target='#study-abstract-modal' " "onclick='fillAbstract(\"studies-table\", {0})'>" "<span class='glyphicon glyphicon-file' " "aria-hidden='true'></span></a> | " "<a href='/study/description/{1}' " "id='study{0}-title'>{2}</a>").format( str(row), str(study.id), info["study_title"]) meta_complete = "<span class='glyphicon glyphicon-%s'></span>" % \ meta_complete_glyph if status == 'public': shared = "Not Available" else: shared = ("<span id='shared_html_{0}'>{1}</span><br/>" "<a class='btn btn-primary btn-xs' data-toggle='modal' " "data-target='#share-study-modal-view' " "onclick='modify_sharing({0});'>Modify</a>".format( study.id, shared)) infolist.append({ "checkbox": "<input type='checkbox' value='%d' />" % study.id, "id": study.id, "title": title, "meta_complete": meta_complete, "num_samples": info["number_samples_collected"], "shared": shared, "num_raw_data": len(study.raw_data()), "pi": PI, "pmid": pmids, "status": status, "abstract": info["study_abstract"] }) return infolist
def _build_study_info(user, study_proc=None, proc_samples=None): """Builds list of dicts for studies table, with all HTML formatted Parameters ---------- user : User object logged in user study_proc : dict of lists, optional Dictionary keyed on study_id that lists all processed data associated with that study. Required if proc_samples given. proc_samples : dict of lists, optional Dictionary keyed on proc_data_id that lists all samples associated with that processed data. Required if study_proc given. Returns ------- infolist: list of dict of lists and dicts study and processed data info for JSON serialiation for datatables Each dict in the list is a single study, and contains the text Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ build_samples = False # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') elif study_proc is None: build_samples = True # get list of studies for table study_set = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) if study_proc is not None: study_set = study_set.intersection(study_proc) if not study_set: # No studies left so no need to continue return [] # get info for the studies cols = ['study_id', 'email', 'principal_investigator_id', 'pmid', 'study_title', 'metadata_complete', 'number_samples_collected', 'study_abstract'] study_info = Study.get_info(study_set, cols) infolist = [] for info in study_info: # Convert DictCursor to proper dict info = dict(info) study = Study(info['study_id']) # Build the processed data info for the study if none passed if build_samples: proc_data_list = study.processed_data() proc_samples = {} study_proc = {study.id: defaultdict(list)} for pid in proc_data_list: proc_data = ProcessedData(pid) study_proc[study.id][proc_data.data_type()].append(pid) proc_samples[pid] = proc_data.samples study_info = _build_single_study_info(study, info, study_proc, proc_samples) infolist.append(study_info) return infolist
# ----------------------------------------------------------------------------- # Copyright (c) 2014--, The Qiita Development Team. # # Distributed under the terms of the BSD 3-clause License. # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- from qiita_db.study import Study studies = Study.get_by_status('private').union( Study.get_by_status('public')).union(Study.get_by_status('sandbox')) raw_data = [pt.artifact for s in studies for pt in s.prep_templates() if pt.artifact is not None] for rd in raw_data: # getting the most open visibility of all the children in the pipeline children = rd.descendants.nodes() vis = [a.visibility for a in children] vis.append(rd.visibility) new_vis = 'sandbox' if 'public' in vis: new_vis = 'public' elif 'private' in vis: new_vis = 'private' rd.visibility = new_vis
# replacing all \t and \n for space as those chars brake QIIME from qiita_db.study import Study from qiita_db.sql_connection import TRN def searcher(df): search = r"\t|\n" return [ col for col in df if df[col].str.contains(search, na=False, regex=True).any() ] studies = Study.get_by_status('private').union( Study.get_by_status('public')).union(Study.get_by_status('sandbox')) # we will start search using pandas as is much easier and faster # than using pgsql. remember that to_dataframe actually transforms what's # in the db to_fix = [] for s in studies: st = s.sample_template if st is None: continue cols = searcher(st.to_dataframe()) if cols: to_fix.append((st, cols)) for pt in s.prep_templates(): if pt is None:
Notes ----- Both study_proc and proc_samples must be passed, or neither passed. """ # Logic check to make sure both needed parts passed if study_proc is not None and proc_samples is None: raise IncompetentQiitaDeveloperError( 'Must pass proc_samples when study_proc given') elif proc_samples is not None and study_proc is None: raise IncompetentQiitaDeveloperError( 'Must pass study_proc when proc_samples given') # get list of studies for table <<<<<<< HEAD study_set = user.user_studies.union( Study.get_by_status('public')).union(user.shared_studies) ======= user_study_set = user.user_studies.union(user.shared_studies) if search_type == 'user': if user.level == 'admin': user_study_set = (user_study_set | Study.get_by_status('sandbox') | Study.get_by_status('private') | Study.get_by_status('awaiting_approval') - Study.get_by_status('public')) study_set = user_study_set elif search_type == 'public': study_set = Study.get_by_status('public') - user_study_set else: raise ValueError('Not a valid search type') >>>>>>> 405cbef0c9f71c620da95a0c1ba6c7d3d588b3ed
def test_get_by_status(self): Study.create( User('*****@*****.**'), 'NOT Identification of the ' 'Microbiomes for Cannabis Soils', [1], self.info) obs = Study.get_by_status('private') self.assertEqual(obs, [1])
def test_get_by_status(self): Study.create(User('*****@*****.**'), 'NOT Identification of the ' 'Microbiomes for Cannabis Soils', [1], self.info) obs = Study.get_by_status('private') self.assertEqual(obs, [1])