示例#1
0
    def write_error(self, status_code, **kwargs):
        '''Overrides the error page created by Tornado'''
        if status_code == 404:
            # just use the 404 page as the error
            self.render("404.html")
            return

        is_admin = False
        user = self.get_current_user()
        if user:
            try:
                is_admin = user.level == 'admin'
            except:
                # Any issue with this check leaves default as not admin
                pass

        # render error page
        self.render('error.html', status_code=status_code, is_admin=is_admin)

        # log the error
        from traceback import format_exception
        exc_info = kwargs["exc_info"]
        trace_info = ''.join(
            ["%s\n" % line for line in format_exception(*exc_info)])
        req_dict = self.request.__dict__
        # must trim body to 1024 chars to prevent huge error messages
        req_dict['body'] = req_dict.get('body', '')[:1024]
        request_info = ''.join([
            "<strong>%s</strong>: %s\n" % (k, req_dict[k])
            for k in req_dict.keys()
        ])
        error = exc_info[1]
        LogEntry.create(
            'Runtime', 'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' %
            (error, trace_info, request_info))
示例#2
0
    def post(self, analysis_id):
        analysis_id = int(analysis_id.split("/")[0])
        analysis_id_sent = int(self.get_argument('analysis_id'))
        action = self.get_argument('action')

        if analysis_id != analysis_id_sent or action != 'delete_analysis':
            raise QiitaPetAuthorizationError(
                self.current_user.id,
                'analysis/results/%d-delete' % analysis_id)

        analysis = Analysis(analysis_id)
        analysis_name = analysis.name
        check_analysis_access(self.current_user, analysis)

        try:
            Analysis.delete(analysis_id)
            msg = ("Analysis <b><i>%s</i></b> has been deleted." % (
                analysis_name))
            level = "success"
        except Exception as e:
            e = str(e)
            msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" % (
                analysis_name, e))
            level = "danger"
            LogEntry.create('Runtime', "Couldn't remove analysis ID %d: %s" %
                            (analysis_id, e))

        self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
示例#3
0
    def post(self):
        message = ""
        level = ""
        page = "lost_pass.html"
        user_id = None

        try:
            user = User(self.get_argument("email"))
        except QiitaDBUnknownIDError:
            message = "ERROR: Unknown user."
            level = "danger"
        else:
            user_id = user.id
            user.generate_reset_code()
            info = user.info
            try:
                send_email(user.id, "Qiita: Password Reset", "Please go to "
                           "the following URL to reset your password: "******"http://qiita.colorado.edu/auth/reset/%s" %
                           info["pass_reset_code"])
                message = ("Check your email for the reset code.")
                level = "success"
                page = "index.html"
            except Exception as e:
                message = ("Unable to send email. Error has been registered. "
                           "Your password has not been reset.")
                level = "danger"
                LogEntry.create('Runtime', "Unable to send forgot password "
                                "email: %s" % str(e), info={'User': user.id})

        self.render(page, user=user_id, message=message, level=level)
示例#4
0
文件: ebi.py 项目: ryanusahk/qiita
    def _generate_demultiplexed_fastq_demux(self, mtime):
        """Modularity helper"""
        # An artifact will hold only one file of type
        # `preprocessed_demux`. Thus, we only use the first one
        # (the only one present)
        ar = self.artifact
        demux = [
            path for _, path, ftype in ar.filepaths
            if ftype == 'preprocessed_demux'
        ][0]

        demux_samples = set()
        with open_file(demux) as demux_fh:
            if not isinstance(demux_fh, File):
                error_msg = ("'%s' doesn't look like a demux file" % demux)
                LogEntry.create('Runtime', error_msg)
                raise EBISubmissionError(error_msg)
            for s, i in to_per_sample_ascii(demux_fh,
                                            self.prep_template.keys()):
                sample_fp = self.sample_demux_fps[s]
                wrote_sequences = False
                with GzipFile(sample_fp, mode='w', mtime=mtime) as fh:
                    for record in i:
                        fh.write(record)
                        wrote_sequences = True

                if wrote_sequences:
                    demux_samples.add(s)
                else:
                    del (self.samples[s])
                    del (self.samples_prep[s])
                    del (self.sample_demux_fps[s])
                    remove(sample_fp)
        return demux_samples
示例#5
0
    def post(self):
        message = ""
        level = ""
        page = "lost_pass.html"
        user_id = None

        try:
            user = User(self.get_argument("email"))
        except QiitaDBUnknownIDError:
            message = "ERROR: Unknown user."
            level = "danger"
        else:
            user_id = user.id
            user.generate_reset_code()
            info = user.info
            try:
                send_email(user.id, "Qiita: Password Reset", "Please go to "
                           "the following URL to reset your password: \n"
                           "%s/auth/reset/%s  \nYou "
                           "have 30 minutes from the time you requested a "
                           "reset to change your password. After this period, "
                           "you will have to request another reset." %
                           (qiita_config.base_url, info["pass_reset_code"]))
                message = ("Check your email for the reset code.")
                level = "success"
                page = "index.html"
            except Exception as e:
                message = ("Unable to send email. Error has been registered. "
                           "Your password has not been reset.")
                level = "danger"
                LogEntry.create('Runtime', "Unable to send forgot password "
                                "email: %s" % str(e), info={'User': user.id})

        self.render(page, user=user_id, message=message, level=level)
示例#6
0
    def get_filepaths(self):
        r"""Retrieves the list of (filepath_id, filepath)"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = SQLConnectionHandler()

        try:
            filepath_ids = conn_handler.execute_fetchall(
                "SELECT filepath_id, filepath FROM qiita.filepath WHERE "
                "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE "
                "{1}=%s) ORDER BY filepath_id DESC".format(
                    self._filepath_table, self._id_column),
                (self.id, ))
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e

        _, fb = get_mountpoint('templates')[0]
        base_fp = partial(join, fb)

        return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
示例#7
0
    def write_error(self, status_code, **kwargs):
        '''Overrides the error page created by Tornado'''
        if status_code == 404:
            # just use the 404 page as the error
            self.render("404.html")
            return

        is_admin = False
        user = self.get_current_user()
        if user:
            try:
                is_admin = user.level == 'admin'
            except:
                # Any issue with this check leaves default as not admin
                pass

        # render error page
        self.render('error.html', status_code=status_code, is_admin=is_admin)

        # log the error
        from traceback import format_exception
        exc_info = kwargs["exc_info"]
        trace_info = ''.join(["%s\n" % line for line in
                             format_exception(*exc_info)])
        req_dict = self.request.__dict__
        # must trim body to 1024 chars to prevent huge error messages
        req_dict['body'] = req_dict.get('body', '')[:1024]
        request_info = ''.join(["<strong>%s</strong>: %s\n" %
                               (k, req_dict[k]) for k in
                                req_dict.keys()])
        error = exc_info[1]
        LogEntry.create(
            'Runtime',
            'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' %
            (error, trace_info, request_info))
示例#8
0
文件: ebi.py 项目: josenavas/QiiTa
    def _generate_demultiplexed_fastq_demux(self, mtime):
        """Modularity helper"""
        # An artifact will hold only one file of type
        # `preprocessed_demux`. Thus, we only use the first one
        # (the only one present)
        ar = self.artifact
        demux = [path for _, path, ftype in ar.filepaths
                 if ftype == 'preprocessed_demux'][0]

        demux_samples = set()
        with open_file(demux) as demux_fh:
            if not isinstance(demux_fh, File):
                error_msg = (
                    "'%s' doesn't look like a demux file" % demux)
                LogEntry.create('Runtime', error_msg)
                raise EBISubmissionError(error_msg)
            for s, i in to_per_sample_ascii(demux_fh,
                                            self.prep_template.keys()):
                sample_fp = self.sample_demux_fps[s]
                wrote_sequences = False
                with GzipFile(sample_fp, mode='w', mtime=mtime) as fh:
                    for record in i:
                        fh.write(record)
                        wrote_sequences = True

                if wrote_sequences:
                    demux_samples.add(s)
                else:
                    del(self.samples[s])
                    del(self.samples_prep[s])
                    del(self.sample_demux_fps[s])
                    remove(sample_fp)
        return demux_samples
示例#9
0
    def write_error(self, status_code, **kwargs):
        '''Overrides the error page created by Tornado'''
        if status_code == 404:
            # just use the 404 page as the error
            self.render("404.html", user=self.current_user)
            return

        if self.current_user:
            is_admin = User(self.current_user).level == 'admin'
        else:
            is_admin = False

        # render error page
        self.render('error.html', user=self.current_user,
                    status_code=status_code, is_admin=is_admin)

        # log the error
        from traceback import format_exception
        exc_info = kwargs["exc_info"]
        trace_info = ''.join(["%s\n" % line for line in
                             format_exception(*exc_info)])
        request_info = ''.join(["<strong>%s</strong>: %s\n" %
                               (k, self.request.__dict__[k]) for k in
                                self.request.__dict__.keys()])
        error = exc_info[1]
        LogEntry.create(
            'Runtime',
            'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' %
            (error, trace_info, request_info))
示例#10
0
    def post(self, analysis_id):
        analysis_id = int(analysis_id.split("/")[0])
        analysis_id_sent = int(self.get_argument('analysis_id'))
        action = self.get_argument('action')

        if analysis_id != analysis_id_sent or action != 'delete_analysis':
            raise QiitaPetAuthorizationError(
                self.current_user.id,
                'analysis/results/%d-delete' % analysis_id)

        analysis = Analysis(analysis_id)
        analysis_name = analysis.name
        check_analysis_access(self.current_user, analysis)

        try:
            Analysis.delete(analysis_id)
            msg = ("Analysis <b><i>%s</i></b> has been deleted." %
                   (analysis_name))
            level = "success"
        except Exception as e:
            e = str(e)
            msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" %
                   (analysis_name, e))
            level = "danger"
            LogEntry.create(
                'Runtime',
                "Couldn't remove analysis ID %d: %s" % (analysis_id, e))

        self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
示例#11
0
    def get_filepaths(self, conn_handler=None):
        r"""Retrieves the list of (filepath_id, filepath)"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = conn_handler if conn_handler else SQLConnectionHandler()

        if self._table == 'required_sample_info':
            table = 'sample_template_filepath'
            column = 'study_id'
        elif self._table == 'common_prep_info':
            table = 'prep_template_filepath'
            column = 'prep_template_id'
        else:
            raise QiitaDBNotImplementedError(
                'get_filepath for %s' % self._table)

        try:
            filepath_ids = conn_handler.execute_fetchall(
                "SELECT filepath_id, filepath FROM qiita.filepath WHERE "
                "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE "
                "{1}=%s) ORDER BY filepath_id DESC".format(table, column),
                (self.id, ))
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e

        _, fb = get_mountpoint('templates', conn_handler)[0]
        base_fp = partial(join, fb)

        return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
示例#12
0
    def _failure_callback(self, msg=None):
        """Callback to execute in case that any of the job nodes failed

        Need to change the preprocessed data process status to 'failed'
        """
        self.preprocessed_data.processing_status = 'failed: %s' % msg
        LogEntry.create('Fatal', msg,
                        info={'preprocessed_data': self.preprocessed_data.id})
示例#13
0
 def test_create_log_entry(self):
     """"""
     log_entry = LogEntry.create(2, 'runtime message')
     log_entry = LogEntry.create(3, 'fatal message', info={1: 2})
     log_entry = LogEntry.create(1, 'warning message', info={9: 0})
     with self.assertRaises(QiitaDBExecutionError):
         # This severity level does not exist in the test schema
         log_entry = LogEntry.create(4, 'warning message', info={9: 0})
示例#14
0
    def _failure_callback(self, msg=None):
        """Callback to execute in case that any of the job nodes failed

        Need to change the preprocessed data process status to 'failed'
        """
        self.preprocessed_data.processing_status = 'failed: %s' % msg
        LogEntry.create('Fatal',
                        msg,
                        info={'preprocessed_data': self.preprocessed_data.id})
示例#15
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime',
                                str(e),
                                info={
                                    'User': self.current_user.id,
                                    'query': query
                                })
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user,
                                 study_proc=study_proc,
                                 proc_samples=proc_samples)
        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len(info),
            "iTotalDisplayRecords": len(info),
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
示例#16
0
    def _failure_callback(self, msg=None):
        """Executed if something fails"""
        # set the analysis to errored
        self.analysis.status = 'error'

        if self._update_status is not None:
            self._update_status("Failed")

        # set any jobs to errored if they didn't execute
        for job in self.analysis.jobs:
            if job.status not in {'error', 'completed'}:
                job.status = 'error'

        LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
示例#17
0
文件: artifact.py 项目: jlab/qiita
def artifact_status_put_req(artifact_id, user_id, visibility):
    """Set the status of the artifact given

    Parameters
    ----------
    artifact_id : int
        Artifact being acted on
    user_id : str
        The user requesting the action
    visibility : {'sandbox', 'awaiting_approval', 'private', 'public'}
        What to change the visibility to

    Returns
    -------
    dict
        Status of action, in the form {'status': status, 'message': msg}
        status: status of the action, either success or error
        message: Human readable message for status
    """
    if visibility not in get_visibilities():
        return {'status': 'error',
                'message': 'Unknown visibility value: %s' % visibility}

    pd = Artifact(int(artifact_id))
    sid = pd.study.id
    access_error = check_access(sid, user_id)
    if access_error:
        return access_error
    user = User(str(user_id))
    status = 'success'
    msg = 'Artifact visibility changed to %s' % visibility
    # Set the approval to private if needs approval and admin
    if visibility == 'private':
        if not qiita_config.require_approval:
            pd.visibility = 'private'
        # Set the approval to private if approval not required
        elif user.level == 'admin':
            pd.visibility = 'private'
        # Trying to set approval without admin privileges
        else:
            status = 'error'
            msg = 'User does not have permissions to approve change'
    else:
        pd.visibility = visibility

    LogEntry.create('Warning', '%s changed artifact %s (study %d) to %s' % (
        user_id, artifact_id, sid, visibility))

    return {'status': status,
            'message': msg}
示例#18
0
    def _failure_callback(self, msg=None):
        """Executed if something fails"""
        # set the analysis to errored
        self.analysis.status = 'error'

        if self._update_status is not None:
            self._update_status("Failed")

        # set any jobs to errored if they didn't execute
        for job in self.analysis.jobs:
            if job.status not in {'error', 'completed'}:
                job.status = 'error'

        LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
示例#19
0
    def post(self):
        passmsg = ""
        msg = ""
        user = self.current_user
        action = self.get_argument("action")
        if action == "profile":
            # tuple of colmns available for profile
            # FORM INPUT NAMES MUST MATCH DB COLUMN NAMES
            form_data = UserProfile()
            form_data.process(data=self.request.arguments)
            profile = {
                name: data[0]
                for name, data in viewitems(form_data.data)
            }

            # Turn default value as list into default strings
            for field in form_data:
                field.data = field.data[0]
            try:
                user.info = profile
                msg = "Profile updated successfully"
            except Exception as e:
                msg = "ERROR: profile could not be updated"
                LogEntry.create('Runtime',
                                "Cound not update profile: %s" % str(e),
                                info={'User': user.id})

        elif action == "password":
            form_data = UserProfile()
            form_data.process(data=user.info)
            oldpass = self.get_argument("oldpass")
            newpass = self.get_argument("newpass")
            try:
                changed = user.change_password(oldpass, newpass)
            except Exception as e:
                passmsg = "ERROR: could not change password"
                LogEntry.create('Runtime',
                                "Could not change password: %s" % str(e),
                                info={'User': user.id})
            else:
                if changed:
                    passmsg = "Password changed successfully"
                else:
                    passmsg = "Incorrect old password"
        self.render("user_profile.html",
                    user=user.id,
                    profile=form_data,
                    msg=msg,
                    passmsg=passmsg)
示例#20
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime', str(e),
                                info={'User': self.current_user.id,
                                      'query': query})
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user, study_proc=study_proc,
                                 proc_samples=proc_samples)
        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len(info),
            "iTotalDisplayRecords": len(info),
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
示例#21
0
 def test_create_log_entry(self):
     """"""
     LogEntry.create('Runtime', 'runtime message')
     LogEntry.create('Fatal', 'fatal message', info={1: 2})
     LogEntry.create('Warning', 'warning message', info={9: 0})
     with self.assertRaises(IncompetentQiitaDeveloperError):
         # This severity level does not exist in the test schema
         LogEntry.create('Chicken', 'warning message', info={9: 0})
示例#22
0
文件: ebi.py 项目: jwdebelius/qiita
    def send_xml(self):
        # Send the XML files
        curl_command = self.generate_curl_command()
        curl_command_parts = shsplit(curl_command)
        temp_fd, temp_fp = mkstemp()
        call(curl_command_parts, stdout=temp_fd)
        close(temp_fd)

        with open(temp_fp, 'U') as curl_output_f:
            curl_result = curl_output_f.read()

        study_accession = None
        submission_accession = None

        if 'success="true"' in curl_result:
            LogEntry.create('Runtime', curl_result)

            print curl_result
            print "SUCCESS"

            accessions = search(
                '<STUDY accession="(?P<study>.+?)".*?'
                '<SUBMISSION accession="(?P<submission>.+?)"', curl_result)
            if accessions is not None:
                study_accession = accessions.group('study')
                submission_accession = accessions.group('submission')

                LogEntry.create('Runtime',
                                "Study accession:\t%s" % study_accession)
                LogEntry.create(
                    'Runtime',
                    "Submission accession:\t%s" % submission_accession)

                print "Study accession:\t", study_accession
                print "Submission accession:\t", submission_accession
            else:
                LogEntry.create('Runtime', ("However, the accession numbers "
                                            "could not be found in the output "
                                            "above."))
                print(
                    "However, the accession numbers could not be found in "
                    "the output above.")
        else:
            LogEntry.create('Fatal', curl_result)
            print curl_result
            print "FAILED"

        return (study_accession, submission_accession)
示例#23
0
 def test_time_property(self):
     """"""
     sql = "SELECT localtimestamp"
     before = self.conn_handler.execute_fetchone(sql)[0]
     log_entry = LogEntry.create('Warning', 'warning test', info=None)
     after = self.conn_handler.execute_fetchone(sql)[0]
     self.assertTrue(before < log_entry.time < after)
示例#24
0
def execute(job_id):
    """Executes a job through the plugin system

    Parameters
    ----------
    job_id : str
        The id of the job to execute
    """
    # Create the new job
    job = ProcessingJob(job_id)
    job_dir = join(get_work_base_dir(), job.id)
    software = job.command.software
    plugin_start_script = software.start_script
    plugin_env_script = software.environment_script

    # Get the command to start the plugin
    cmd = '%s "%s" "%s" "%s" "%s" "%s"' % (
        qiita_config.plugin_launcher, plugin_env_script, plugin_start_script,
        qiita_config.base_url, job.id, job_dir)

    # Start the plugin
    std_out, std_err, return_value = system_call(cmd)
    if return_value != 0:
        # Something wrong happened during the plugin start procedure
        job.status = 'error'
        log = LogEntry.create(
            'Runtime',
            "Error starting plugin '%s':\nStd output:%s\nStd error:%s"
            % (software.name, std_out, std_err))
        job.log = log
示例#25
0
 def test_time_property(self):
     """"""
     sql = "SELECT localtimestamp"
     before = self.conn_handler.execute_fetchone(sql)[0]
     log_entry = LogEntry.create('Warning', 'warning test', info=None)
     after = self.conn_handler.execute_fetchone(sql)[0]
     self.assertTrue(before < log_entry.time < after)
示例#26
0
 def test_create_log_entry(self):
     """"""
     LogEntry.create('Runtime', 'runtime message')
     LogEntry.create('Fatal', 'fatal message', info={1: 2})
     LogEntry.create('Warning', 'warning message', info={9: 0})
     with self.assertRaises(IncompetentQiitaDeveloperError):
         # This severity level does not exist in the test schema
         LogEntry.create('Chicken', 'warning message',
                         info={9: 0})
示例#27
0
    def send_xml(self):
        # Send the XML files
        curl_command = self.generate_curl_command()
        curl_command_parts = shsplit(curl_command)
        temp_fd, temp_fp = mkstemp()
        call(curl_command_parts, stdout=temp_fd)
        close(temp_fd)

        with open(temp_fp, 'U') as curl_output_f:
            curl_result = curl_output_f.read()

        study_accession = None
        submission_accession = None

        if 'success="true"' in curl_result:
            LogEntry.create('Runtime', curl_result)

            print curl_result
            print "SUCCESS"

            accessions = search('<STUDY accession="(?P<study>.+?)".*?'
                                '<SUBMISSION accession="(?P<submission>.+?)"',
                                curl_result)
            if accessions is not None:
                study_accession = accessions.group('study')
                submission_accession = accessions.group('submission')

                LogEntry.create('Runtime', "Study accession:\t%s" %
                                study_accession)
                LogEntry.create('Runtime', "Submission accession:\t%s" %
                                submission_accession)

                print "Study accession:\t", study_accession
                print "Submission accession:\t", submission_accession
            else:
                LogEntry.create('Runtime', ("However, the accession numbers "
                                            "could not be found in the output "
                                            "above."))
                print ("However, the accession numbers could not be found in "
                       "the output above.")
        else:
            LogEntry.create('Fatal', curl_result)
            print curl_result
            print "FAILED"

        return (study_accession, submission_accession)
示例#28
0
    def add_filepath(self, filepath, fp_id=None):
        r"""Populates the DB tables for storing the filepath and connects the
        `self` objects with this filepath"""
        with TRN:
            fp_id = self._fp_id if fp_id is None else fp_id

            try:
                fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", move_files=False)[0]
                sql = """INSERT INTO qiita.{0} ({1}, filepath_id)
                         VALUES (%s, %s)""".format(
                    self._filepath_table, self._id_column
                )
                TRN.add(sql, [self._id, fpp_id])
                TRN.execute()
            except Exception as e:
                LogEntry.create("Runtime", str(e), info={self.__class__.__name__: self.id})
                raise e
示例#29
0
 def test_add_info(self):
     """"""
     log_entry = LogEntry.create('Warning', 'warning test',
                                 info={1: 2, 'test': 'yeah'})
     log_entry.add_info({'another': 'set', 'of': 'entries', 'test': 3})
     self.assertEqual(log_entry.info, [{'1': 2, 'test': 'yeah'},
                                       {'another': 'set', 'of': 'entries',
                                        'test': 3}])
示例#30
0
 def test_info_property(self):
     """"""
     log_entry = LogEntry.create('Warning',
                                 'warning test',
                                 info={
                                     1: 2,
                                     'test': 'yeah'
                                 })
     self.assertEqual(log_entry.info, [{'1': 2, 'test': 'yeah'}])
示例#31
0
 def test_clear_info(self):
     """"""
     log_entry = LogEntry.create('Warning',
                                 'warning test',
                                 info={
                                     1: 2,
                                     'test': 'yeah'
                                 })
     log_entry.clear_info()
     self.assertEqual(log_entry.info, [])
示例#32
0
    def post(self):
        passmsg = ""
        msg = ""
        user = self.current_user
        action = self.get_argument("action")
        if action == "profile":
            # tuple of colmns available for profile
            # FORM INPUT NAMES MUST MATCH DB COLUMN NAMES
            form_data = UserProfile()
            form_data.process(data=self.request.arguments)
            profile = {name: data[0] for name, data in
                       viewitems(form_data.data)}

            # Turn default value as list into default strings
            for field in form_data:
                field.data = field.data[0]
            try:
                user.info = profile
                msg = "Profile updated successfully"
            except Exception as e:
                msg = "ERROR: profile could not be updated"
                LogEntry.create('Runtime', "Cound not update profile: %s" %
                                str(e), info={'User': user.id})

        elif action == "password":
            form_data = UserProfile()
            form_data.process(data=user.info)
            oldpass = self.get_argument("oldpass")
            newpass = self.get_argument("newpass")
            try:
                changed = user.change_password(oldpass, newpass)
            except Exception as e:
                passmsg = "ERROR: could not change password"
                LogEntry.create('Runtime', "Could not change password: %s" %
                                str(e), info={'User': user.id})
            else:
                if changed:
                    passmsg = "Password changed successfully"
                else:
                    passmsg = "Incorrect old password"
        self.render("user_profile.html", user=user.id, profile=form_data,
                    msg=msg, passmsg=passmsg)
示例#33
0
 def post(self):
     error = ""
     try:
         user = User(self.get_argument("email"))
     except QiitaDBUnknownIDError:
         error = "ERROR: Unknown user."
     else:
         user.generate_reset_code()
         info = user.info
         try:
             send_email(user, "QIITA: Password Reset", "Please go to the "
                        "following URL to reset your password: "******"http://qiita.colorado.edu/auth/reset/%s" %
                        info["pass_reset_code"])
             error = "Password reset. Check your email for the reset code."
         except Exception as e:
             error = "Unable to send email."
             LogEntry.create('Runtime', "Unable to send forgot password "
                             "email" % str(e), info={'User': user.id})
     self.render("lost_pass.html", user=None, error=error)
示例#34
0
    def post(self):
        analysis_id = int(self.get_argument('analysis_id'))
        analysis = Analysis(analysis_id)
        analysis_name = analysis.name

        check_analysis_access(self.current_user, analysis)

        try:
            Analysis.delete(analysis_id)
            msg = ("Analysis <b><i>%s</i></b> has been deleted." % (
                analysis_name))
            level = "success"
        except Exception as e:
            e = str(e)
            msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" % (
                analysis_name, e))
            level = "danger"
            LogEntry.create('Runtime', "Couldn't remove analysis ID %d: %s" %
                            (analysis_id, e))

        self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
示例#35
0
    def post(self):
        analysis_id = int(self.get_argument('analysis_id'))
        analysis = Analysis(analysis_id)
        analysis_name = analysis.name

        check_analysis_access(self.current_user, analysis)

        try:
            Analysis.delete(analysis_id)
            msg = ("Analysis <b><i>%s</i></b> has been deleted." %
                   (analysis_name))
            level = "success"
        except Exception as e:
            e = str(e)
            msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" %
                   (analysis_name, e))
            level = "danger"
            LogEntry.create(
                'Runtime',
                "Couldn't remove analysis ID %d: %s" % (analysis_id, e))

        self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
示例#36
0
文件: ebi.py 项目: ryanusahk/qiita
    def _generate_demultiplexed_fastq_per_sample_FASTQ(self):
        """Modularity helper"""
        ar = self.artifact
        fps = [(basename(fp), fp) for _, fp, fpt in ar.filepaths
               if fpt == 'raw_forward_seqs']
        fps.sort(key=lambda x: x[1])
        if 'run_prefix' in self.prep_template.categories():
            rps = [(k, v) for k, v in viewitems(
                self.prep_template.get_category('run_prefix'))]
        else:
            rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()]
        rps.sort(key=lambda x: x[1])
        demux_samples = set()
        for sn, rp in rps:
            for i, (bn, fp) in enumerate(fps):
                if bn.startswith(rp):
                    demux_samples.add(sn)
                    new_fp = self.sample_demux_fps[sn]
                    if fp.endswith('.gz'):
                        copyfile(fp, new_fp)
                    else:
                        cmd = "gzip -c %s > %s" % (fp, new_fp)
                        stdout, stderr, rv = system_call(cmd)
                        if rv != 0:
                            error_msg = (
                                "Error:\nStd output:%s\nStd error:%s" %
                                (stdout, stderr))
                            raise EBISubmissionError(error_msg)
                    del fps[i]
                    break
        if fps:
            error_msg = (
                'Discrepancy between filepaths and sample names. Extra'
                ' filepaths: %s' % ', '.join([fp[0] for fp in fps]))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples, \
            set(self.samples.keys()).difference(set(demux_samples))
示例#37
0
文件: ebi.py 项目: josenavas/QiiTa
    def _generate_demultiplexed_fastq_per_sample_FASTQ(self):
        """Modularity helper"""
        ar = self.artifact
        fps = [(basename(fp), fp) for _, fp, fpt in ar.filepaths
               if fpt == 'raw_forward_seqs']
        fps.sort(key=lambda x: x[1])
        if 'run_prefix' in self.prep_template.categories():
            rps = [(k, v) for k, v in viewitems(
                self.prep_template.get_category('run_prefix'))]
        else:
            rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()]
        rps.sort(key=lambda x: x[1])
        demux_samples = set()
        for sn, rp in rps:
            for i, (bn, fp) in enumerate(fps):
                if bn.startswith(rp):
                    demux_samples.add(sn)
                    new_fp = self.sample_demux_fps[sn]
                    if fp.endswith('.gz'):
                        copyfile(fp, new_fp)
                    else:
                        cmd = "gzip -c %s > %s" % (fp, new_fp)
                        stdout, stderr, rv = system_call(cmd)
                        if rv != 0:
                            error_msg = (
                                "Error:\nStd output:%s\nStd error:%s"
                                % (stdout, stderr))
                            raise EBISubmissionError(error_msg)
                    del fps[i]
                    break
        if fps:
            error_msg = (
                'Discrepancy between filepaths and sample names. Extra'
                ' filepaths: %s' % ', '.join([fp[0] for fp in fps]))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples, \
            set(self.samples.keys()).difference(set(demux_samples))
示例#38
0
    def get_filepaths(self):
        r"""Retrieves the list of (filepath_id, filepath)"""
        with TRN:
            try:
                sql = """SELECT filepath_id, filepath
                         FROM qiita.filepath
                         WHERE filepath_id IN (
                            SELECT filepath_id FROM qiita.{0}
                            WHERE {1}=%s)
                         ORDER BY filepath_id DESC""".format(
                    self._filepath_table, self._id_column)

                TRN.add(sql, [self.id])
                filepath_ids = TRN.execute_fetchindex()
            except Exception as e:
                LogEntry.create('Runtime', str(e),
                                info={self.__class__.__name__: self.id})
                raise e

            _, fb = get_mountpoint('templates')[0]
            base_fp = partial(join, fb)

            return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
示例#39
0
 def post(self):
     error = ""
     try:
         user = User(self.get_argument("email"))
     except QiitaDBUnknownIDError:
         error = "ERROR: Unknown user."
     else:
         user.generate_reset_code()
         info = user.info
         try:
             send_email(
                 user, "QIITA: Password Reset", "Please go to the "
                 "following URL to reset your password: "******"http://qiita.colorado.edu/auth/reset/%s" %
                 info["pass_reset_code"])
             error = "Password reset. Check your email for the reset code."
         except Exception as e:
             error = "Unable to send email."
             LogEntry.create('Runtime',
                             "Unable to send forgot password "
                             "email" % str(e),
                             info={'User': user.id})
     self.render("lost_pass.html", user=None, error=error)
示例#40
0
    def add_filepath(self, filepath, fp_id=None):
        r"""Populates the DB tables for storing the filepath and connects the
        `self` objects with this filepath"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = SQLConnectionHandler()
        fp_id = self._fp_id if fp_id is None else fp_id

        try:
            fpp_id = insert_filepaths([(filepath, fp_id)], None,
                                      "templates", "filepath", conn_handler,
                                      move_files=False)[0]
            values = (self._id, fpp_id)
            conn_handler.execute(
                "INSERT INTO qiita.{0} ({1}, filepath_id) "
                "VALUES (%s, %s)".format(
                    self._filepath_table, self._id_column), values)
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e
示例#41
0
    def add_filepath(self, filepath, conn_handler=None):
        r"""Populates the DB tables for storing the filepath and connects the
        `self` objects with this filepath"""
        # Check that this function has been called from a subclass
        self._check_subclass()

        # Check if the connection handler has been provided. Create a new
        # one if not.
        conn_handler = conn_handler if conn_handler else SQLConnectionHandler()

        if self._table == 'required_sample_info':
            fp_id = convert_to_id("sample_template", "filepath_type",
                                  conn_handler)
            table = 'sample_template_filepath'
            column = 'study_id'
        elif self._table == 'common_prep_info':
            fp_id = convert_to_id("prep_template", "filepath_type",
                                  conn_handler)
            table = 'prep_template_filepath'
            column = 'prep_template_id'
        else:
            raise QiitaDBNotImplementedError(
                'add_filepath for %s' % self._table)

        try:
            fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates",
                                      "filepath", conn_handler,
                                      move_files=False)[0]
            values = (self._id, fpp_id)
            conn_handler.execute(
                "INSERT INTO qiita.{0} ({1}, filepath_id) "
                "VALUES (%s, %s)".format(table, column), values)
        except Exception as e:
            LogEntry.create('Runtime', str(e),
                            info={self.__class__.__name__: self.id})
            raise e
示例#42
0
 def test_add_info(self):
     """"""
     log_entry = LogEntry.create('Warning',
                                 'warning test',
                                 info={
                                     1: 2,
                                     'test': 'yeah'
                                 })
     log_entry.add_info({'another': 'set', 'of': 'entries', 'test': 3})
     self.assertEqual(log_entry.info, [{
         '1': 2,
         'test': 'yeah'
     }, {
         'another': 'set',
         'of': 'entries',
         'test': 3
     }])
示例#43
0
文件: ebi.py 项目: josenavas/QiiTa
    def parse_EBI_reply(self, curl_result):
        """Parse and verify reply from EBI after sending XML files

        Parameters
        ----------
        curl_result : str
            The reply sent by EBI after sending XML files

        Returns
        -------
        str
            The study accession number. None in case of failure
        dict of {str: str}
            The sample accession numbers, keyed by sample id. None in case of
            failure
        dict of {str: str}
            The biosample accession numbers, keyed by sample id. None in case
            of failure
        dict of {str: str}
            The experiment accession numbers, keyed by sample id. None in case
            of failure
        dict of {str: str}
            The run accession numbers, keyed by sample id. None in case of
            failure

        Raises
        ------
        EBISubmissionError
            If curl_result is not a valid XML file
            If the ebi subumission has not been successful
            If multiple study tags are found in the curl result
        """
        try:
            root = ET.fromstring(curl_result)
        except ParseError:
            error_msg = ("The curl result from the EBI submission doesn't "
                         "look like an XML file:\n%s" % curl_result)
            le = LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(
                "The curl result from the EBI submission doesn't look like "
                "an XML file. Contact and admin for more information. "
                "Log id: %d" % le.id)

        success = root.get('success') == 'true'
        if not success:
            raise EBISubmissionError("The EBI submission failed:\n%s"
                                     % curl_result)

        study_elem = root.findall("STUDY")
        if study_elem:
            if len(study_elem) > 1:
                raise EBISubmissionError(
                    "Multiple study tags found in EBI reply: %d"
                    % len(study_elem))
            study_elem = study_elem[0]
            study_accession = study_elem.get('accession')
        else:
            study_accession = None

        sample_accessions = {}
        biosample_accessions = {}
        for elem in root.iter("SAMPLE"):
            alias = elem.get('alias')
            sample_id = self._sample_aliases[alias]
            sample_accessions[sample_id] = elem.get('accession')
            ext_id = elem.find('EXT_ID')
            biosample_accessions[sample_id] = ext_id.get('accession')

        def data_retriever(key, trans_dict):
            res = {}
            for elem in root.iter(key):
                alias = elem.get('alias')
                res[trans_dict[alias]] = elem.get('accession')
            return res
        experiment_accessions = data_retriever("EXPERIMENT",
                                               self._experiment_aliases)
        run_accessions = data_retriever("RUN", self._run_aliases)

        return (study_accession, sample_accessions, biosample_accessions,
                experiment_accessions, run_accessions)
示例#44
0
def artifact_patch_request(user,
                           artifact_id,
                           req_op,
                           req_path,
                           req_value=None,
                           req_from=None):
    """Modifies an attribute of the artifact

    Parameters
    ----------
    user : qiita_db.user.User
        The user performing the patch operation
    artifact_id : int
        Id of the artifact in which the patch operation is being performed
    req_op : str
        The operation to perform on the artifact
    req_path : str
        The prep information and attribute to patch
    req_value : str, optional
        The value that needs to be modified
    req_from : str, optional
        The original path of the element

    Raises
    ------
    QiitaHTTPError
        If `req_op` != 'replace'
        If the path parameter is incorrect
        If missing req_value
        If the attribute to replace is not known
    """
    if req_op == 'replace':
        req_path = [v for v in req_path.split('/') if v]
        if len(req_path) != 1:
            raise QiitaHTTPError(404, 'Incorrect path parameter')

        attribute = req_path[0]

        # Check if the user actually has access to the artifact
        artifact = Artifact(artifact_id)
        check_artifact_access(user, artifact)

        if not req_value:
            raise QiitaHTTPError(404, 'Missing value to replace')

        if attribute == 'name':
            artifact.name = req_value
            return
        elif attribute == 'visibility':
            if req_value not in get_visibilities():
                raise QiitaHTTPError(
                    400, 'Unknown visibility value: %s' % req_value)

            if (req_value == 'private' and qiita_config.require_approval
                    and not user.level == 'admin'):
                raise QiitaHTTPError(
                    403, 'User does not have permissions '
                    'to approve change')

            try:
                artifact.visibility = req_value
            except Exception as e:
                raise QiitaHTTPError(403, str(e).replace('\n', '<br/>'))

            sid = artifact.study.id
            if artifact.visibility == 'awaiting_approval':
                email_to = '*****@*****.**'
                subject = ('QIITA: Artifact %s awaiting_approval. Study %d, '
                           'Prep %d' %
                           (artifact_id, sid, artifact.prep_templates[0].id))
                message = ('%s requested approval. <a '
                           'href="https://qiita.ucsd.edu/study/description/'
                           '%d">Study %d</a>.' % (user.email, sid, sid))
                try:
                    send_email(email_to, subject, message)
                except Exception:
                    msg = ("Couldn't send email to admins, please email us "
                           "directly to <a href='mailto:{0}'>{0}</a>.".format(
                               email_to))
                    raise QiitaHTTPError(400, msg)
            else:
                msg = '%s changed artifact %s (study %d) to %s' % (
                    user.email, artifact_id, sid, req_value)
                LogEntry.create('Warning', msg)
        else:
            # We don't understand the attribute so return an error
            raise QiitaHTTPError(
                404, 'Attribute "%s" not found. Please, '
                'check the path parameter' % attribute)
    else:
        raise QiitaHTTPError(
            400, 'Operation "%s" not supported. Current '
            'supported operations: replace' % req_op)
示例#45
0
文件: ebi.py 项目: mdehollander/qiita
    def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None):
        """Generates demultiplexed fastq

        Parameters
        ----------
        rewrite_fastq : bool, optional
            If true, it forces the rewrite of the fastq files
        mtime : float, optional
            The time to use when creating the gz files. If None, the current
            time will be used by gzip.GzipFile. This is useful for testing.

        Returns
        -------
        demux_samples
            List of successful demultiplexed samples

        Notes
        -----
        - As a performace feature, this method will check if self.full_ebi_dir
        already exists and, if it does, the script will assume that in a
        previous execution this step was performed correctly and will simply
        read the file names from self.full_ebi_dir
        - When the object is created (init), samples, samples_prep and
        sample_demux_fps hold values for all available samples in the database.
        Here some of those values will be deleted (del's, within the loops) for
        those cases where the fastq.gz files weren't written or exist. This is
        an indication that they had no sequences and this kind of files are not
        accepted in EBI

        Raises
        ------
        EBISubmissionError
            - The demux file couldn't be read
            - All samples are removed
        """
        ar = self.artifact

        dir_not_exists = not isdir(self.full_ebi_dir)
        if dir_not_exists or rewrite_fastq:
            makedirs(self.full_ebi_dir)

            # An artifact will hold only one file of type `preprocessed_demux`
            # Thus, we only use the first one (the only one present)
            demux = [
                path for _, path, ftype in ar.filepaths
                if ftype == 'preprocessed_demux'
            ][0]

            demux_samples = set()
            with open_file(demux) as demux_fh:
                if not isinstance(demux_fh, File):
                    error_msg = "'%s' doesn't look like a demux file" % demux
                    LogEntry.create('Runtime', error_msg)
                    raise EBISubmissionError(error_msg)
                for s, i in to_per_sample_ascii(demux_fh,
                                                self.prep_template.keys()):
                    sample_fp = self.sample_demux_fps[s]
                    wrote_sequences = False
                    with GzipFile(sample_fp, mode='w', mtime=mtime) as fh:
                        for record in i:
                            fh.write(record)
                            wrote_sequences = True

                    if wrote_sequences:
                        demux_samples.add(s)
                    else:
                        del (self.samples[s])
                        del (self.samples_prep[s])
                        del (self.sample_demux_fps[s])
                        remove(sample_fp)
        else:
            demux_samples = set()
            extension = '.fastq.gz'
            extension_len = len(extension)
            for f in listdir(self.full_ebi_dir):
                fpath = join(self.full_ebi_dir, f)
                if isfile(fpath) and f.endswith(extension):
                    demux_samples.add(f[:-extension_len])

            missing_samples = set(self.samples.keys()).difference(
                set(demux_samples))
            for ms in missing_samples:
                del (self.samples[ms])
                del (self.samples_prep[ms])
                del (self.sample_demux_fps[ms])

        if not demux_samples:
            error_msg = ("All samples were removed from the submission "
                         "because the demux file is empty or the sample names "
                         "do not match.")
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)
        return demux_samples
示例#46
0
文件: ebi.py 项目: ryanusahk/qiita
    def __init__(self, artifact_id, action):
        error_msgs = []

        if action not in self.valid_ebi_actions:
            error_msg = ("%s is not a valid EBI submission action, valid "
                         "actions are: %s" %
                         (action, ', '.join(self.valid_ebi_actions)))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
        self.action = action
        self.artifact = Artifact(artifact_id)
        if not self.artifact.can_be_submitted_to_ebi:
            error_msg = ("Artifact %d cannot be submitted to EBI" %
                         self.artifact.id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.study = self.artifact.study
        self.sample_template = self.study.sample_template
        # If we reach this point, there should be only one prep template
        # attached to the artifact. By design, each artifact has at least one
        # prep template. Artifacts with more than one prep template cannot be
        # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should
        # be set to false, which is checked in the previous if statement
        self.prep_template = self.artifact.prep_templates[0]

        if self.artifact.is_submitted_to_ebi and action != 'MODIFY':
            error_msg = ("Cannot resubmit! Artifact %d has already "
                         "been submitted to EBI." % artifact_id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.artifact_id = artifact_id
        self.study_title = self.study.title
        self.study_abstract = self.study.info['study_abstract']

        it = self.prep_template.investigation_type
        if it in ena_ontology.terms:
            self.investigation_type = it
            self.new_investigation_type = None
        elif it in ena_ontology.user_defined_terms:
            self.investigation_type = 'Other'
            self.new_investigation_type = it
        else:
            # This should never happen
            error_msgs.append("Unrecognized investigation type: '%s'. This "
                              "term is neither one of the official terms nor "
                              "one of the user-defined terms in the ENA "
                              "ontology." % it)
        _, base_fp = get_mountpoint("preprocessed_data")[0]
        self.ebi_dir = '%d_ebi_submission' % artifact_id
        self.full_ebi_dir = join(base_fp, self.ebi_dir)
        self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt')
        self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml')
        self.xml_dir = join(self.full_ebi_dir, 'xml_dir')
        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.publications = self.study.publications

        # getting the restrictions
        st_restrictions = [self.sample_template.columns_restrictions['EBI']]
        pt_restrictions = [self.prep_template.columns_restrictions['EBI']]
        if self.artifact.data_type in TARGET_GENE_DATA_TYPES:
            # adding restictions on primer and barcode as these are
            # conditionally requiered for target gene
            pt_restrictions.append(
                PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex'])
        st_missing = self.sample_template.check_restrictions(st_restrictions)
        pt_missing = self.prep_template.check_restrictions(pt_restrictions)
        # testing if there are any missing columns
        if st_missing:
            error_msgs.append("Missing column in the sample template: %s" %
                              ', '.join(list(st_missing)))
        if pt_missing:
            error_msgs.append("Missing column in the prep template: %s" %
                              ', '.join(list(pt_missing)))

        # generating all samples from sample template
        self.samples = {}
        self.samples_prep = {}
        self.sample_demux_fps = {}
        get_output_fp = partial(join, self.full_ebi_dir)
        nvp = []
        nvim = []
        for k, v in viewitems(self.sample_template):
            if k not in self.prep_template:
                continue
            sample_prep = self.prep_template[k]

            # validating required fields
            if ('platform' not in sample_prep
                    or sample_prep['platform'] is None):
                nvp.append(k)
            else:
                platform = sample_prep['platform'].upper()
                if platform not in self.valid_platforms:
                    nvp.append(k)
                else:
                    if ('instrument_model' not in sample_prep
                            or sample_prep['instrument_model'] is None):
                        nvim.append(k)
                    else:
                        im = sample_prep['instrument_model'].upper()
                        if im not in self.valid_platforms[platform]:
                            nvim.append(k)

            self.samples[k] = v
            self.samples_prep[k] = sample_prep
            self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k)

        if nvp:
            error_msgs.append("These samples do not have a valid platform "
                              "(instrumet model wasn't checked): %s" %
                              (', '.join(nvp)))
        if nvim:
            error_msgs.append("These samples do not have a valid instrument "
                              "model: %s" % (', '.join(nvim)))
        if error_msgs:
            error_msgs = ("Errors found during EBI submission for study #%d, "
                          "artifact #%d and prep template #%d:\n%s" %
                          (self.study.id, artifact_id, self.prep_template.id,
                           '\n'.join(error_msgs)))
            LogEntry.create('Runtime', error_msgs)
            raise EBISubmissionError(error_msgs)

        self._sample_aliases = {}
        self._experiment_aliases = {}
        self._run_aliases = {}

        self._ebi_sample_accessions = \
            self.sample_template.ebi_sample_accessions
        self._ebi_experiment_accessions = \
            self.prep_template.ebi_experiment_accessions
示例#47
0
文件: ebi.py 项目: ryanusahk/qiita
    def parse_EBI_reply(self, curl_result, test=False):
        """Parse and verify reply from EBI after sending XML files

        Parameters
        ----------
        curl_result : str
            The reply sent by EBI after sending XML files
        test : bool
            If true we will assume is a test and ignore some parsing errors

        Returns
        -------
        str
            The study accession number. None in case of failure
        dict of {str: str}
            The sample accession numbers, keyed by sample id. None in case of
            failure
        dict of {str: str}
            The biosample accession numbers, keyed by sample id. None in case
            of failure
        dict of {str: str}
            The experiment accession numbers, keyed by sample id. None in case
            of failure
        dict of {str: str}
            The run accession numbers, keyed by sample id. None in case of
            failure

        Raises
        ------
        EBISubmissionError
            If curl_result is not a valid XML file
            If the ebi subumission has not been successful
            If multiple study tags are found in the curl result
        """
        try:
            root = ET.fromstring(curl_result)
        except ParseError:
            error_msg = ("The curl result from the EBI submission doesn't "
                         "look like an XML file:\n%s" % curl_result)
            le = LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(
                "The curl result from the EBI submission doesn't look like "
                "an XML file. Contact and admin for more information. "
                "Log id: %d" % le.id)

        success = root.get('success') == 'true'
        if not success:
            # here we want to parse out the errors so the failures are clearer
            errors = {elem.text for elem in root.iter("ERROR")}

            raise EBISubmissionError("The EBI submission failed:\n%s" %
                                     '\n'.join(errors))
        if test:
            study_accession = 'MyStudyAccession'
            sample_accessions = {}
            biosample_accessions = {}
            experiment_accessions = {}
            run_accessions = {}

            return (study_accession, sample_accessions, biosample_accessions,
                    experiment_accessions, run_accessions)

        study_elem = root.findall("STUDY")
        if study_elem:
            if len(study_elem) > 1:
                raise EBISubmissionError(
                    "Multiple study tags found in EBI reply: %d" %
                    len(study_elem))
            study_elem = study_elem[0]
            study_accession = study_elem.get('accession')
        else:
            study_accession = None

        sample_accessions = {}
        biosample_accessions = {}
        for elem in root.iter("SAMPLE"):
            alias = elem.get('alias')
            sample_id = self._sample_aliases[alias]
            sample_accessions[sample_id] = elem.get('accession')
            ext_id = elem.find('EXT_ID')
            biosample_accessions[sample_id] = ext_id.get('accession')

        def data_retriever(key, trans_dict):
            res = {}
            for elem in root.iter(key):
                alias = elem.get('alias')
                res[trans_dict[alias]] = elem.get('accession')
            return res

        experiment_accessions = data_retriever("EXPERIMENT",
                                               self._experiment_aliases)
        run_accessions = data_retriever("RUN", self._run_aliases)

        return (study_accession, sample_accessions, biosample_accessions,
                experiment_accessions, run_accessions)
示例#48
0
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime', str(e),
                                info={'User': self.current_user.id,
                                      'query': query})
                return
        else:
            study_proc = proc_samples = None
<<<<<<< HEAD
        info = _build_study_info(self.current_user, study_proc=study_proc,
                                 proc_samples=proc_samples)
=======
        info = _build_study_info(self.current_user, search_type, study_proc,
                                 proc_samples)
        # linkifying data
        len_info = len(info)
        for i in range(len_info):
            info[i]['shared'] = ", ".join([study_person_linkifier(element)
                                           for element in info[i]['shared']])
示例#49
0
文件: ebi.py 项目: ryanusahk/qiita
    def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None):
        """Generates demultiplexed fastq

        Parameters
        ----------
        rewrite_fastq : bool, optional
            If true, it forces the rewrite of the fastq files
        mtime : float, optional
            The time to use when creating the gz files. If None, the current
            time will be used by gzip.GzipFile. This is useful for testing.

        Returns
        -------
        demux_samples
            List of successful demultiplexed samples

        Notes
        -----
        - As a performace feature, this method will check if self.full_ebi_dir
        already exists and, if it does, the script will assume that in a
        previous execution this step was performed correctly and will simply
        read the file names from self.full_ebi_dir
        - When the object is created (init), samples, samples_prep and
        sample_demux_fps hold values for all available samples in the database.
        Here some of those values will be deleted (del's, within the loops) for
        those cases where the fastq.gz files weren't written or exist. This is
        an indication that they had no sequences and this kind of files are not
        accepted in EBI

        Raises
        ------
        EBISubmissionError
            - The demux file couldn't be read
            - All samples are removed
        """
        dir_not_exists = not isdir(self.full_ebi_dir)
        missing_samples = []
        if dir_not_exists or rewrite_fastq:
            # if it exists, remove folder and start from scratch
            if isdir(self.full_ebi_dir):
                rmtree(self.full_ebi_dir)

            makedirs(self.full_ebi_dir)

            if self.artifact.artifact_type == 'per_sample_FASTQ':
                demux_samples, missing_samples = \
                    self._generate_demultiplexed_fastq_per_sample_FASTQ()
            else:
                demux_samples = self._generate_demultiplexed_fastq_demux(mtime)
        else:
            demux_samples = set()
            extension = '.fastq.gz'
            extension_len = len(extension)
            for f in listdir(self.full_ebi_dir):
                fpath = join(self.full_ebi_dir, f)
                if isfile(fpath) and f.endswith(extension):
                    demux_samples.add(f[:-extension_len])

            missing_samples = set(
                self.samples.keys()).difference(demux_samples)

        if missing_samples:
            for ms in missing_samples:
                del (self.samples[ms])
                del (self.samples_prep[ms])
                del (self.sample_demux_fps[ms])

        if not demux_samples:
            error_msg = ("All samples were removed from the submission "
                         "because the demux file is empty or the sample names "
                         "do not match.")
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples
示例#50
0
文件: ebi.py 项目: mcmk3/qiita
    def _generate_demultiplexed_fastq_per_sample_FASTQ(self):
        """Modularity helper"""

        # helper function to write files in this method
        def _rename_file(fp, new_fp):
            if fp.endswith('.gz'):
                copyfile(fp, new_fp)
            else:
                cmd = "gzip -c %s > %s" % (fp, new_fp)
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    raise EBISubmissionError(error_msg)

        fwd_reads = []
        rev_reads = []
        for x in self.artifact.filepaths:
            if x['fp_type'] == 'raw_forward_seqs':
                fwd_reads.append((basename(x['fp']), x['fp']))
            elif x['fp_type'] == 'raw_reverse_seqs':
                rev_reads.append((basename(x['fp']), x['fp']))
        fwd_reads.sort(key=lambda x: x[1])
        rev_reads.sort(key=lambda x: x[1])
        if rev_reads:
            self.per_sample_FASTQ_reverse = True

        # merging forward and reverse into a single list, note that at this
        # stage the files have passed multiple rounds of reviews: validator
        # when the artifact was created, the summary generator, etc. so we can
        # assure that if a rev exists for 1 fwd, there is one for all of them
        fps = []
        for f, r in zip_longest(fwd_reads, rev_reads):
            sample_name = f[0]
            fwd_read = f[1]
            rev_read = r[1] if r is not None else None
            fps.append((sample_name, (fwd_read, rev_read)))

        if 'run_prefix' in self.prep_template.categories():
            rps = [(k, v) for k, v in self.prep_template.get_category(
                'run_prefix').items()]
        else:
            rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()]
        rps.sort(key=lambda x: x[1])

        demux_samples = set()
        for sn, rp in rps:
            for i, (bn, fp) in enumerate(fps):
                if bn.startswith(rp):
                    demux_samples.add(sn)
                    new_fp = self.sample_demux_fps[sn] + self.FWD_READ_SUFFIX
                    _rename_file(fp[0], new_fp)

                    if fp[1] is not None:
                        new_fp = self.sample_demux_fps[
                            sn] + self.REV_READ_SUFFIX
                        _rename_file(fp[1], new_fp)
                    del fps[i]
                    break
        if fps:
            error_msg = (
                'Discrepancy between filepaths and sample names. Extra'
                ' filepaths: %s' % ', '.join([fp[0] for fp in fps]))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples, \
            set(self.samples.keys()).difference(set(demux_samples))
示例#51
0
文件: ebi.py 项目: josenavas/QiiTa
    def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None):
        """Generates demultiplexed fastq

        Parameters
        ----------
        rewrite_fastq : bool, optional
            If true, it forces the rewrite of the fastq files
        mtime : float, optional
            The time to use when creating the gz files. If None, the current
            time will be used by gzip.GzipFile. This is useful for testing.

        Returns
        -------
        demux_samples
            List of successful demultiplexed samples

        Notes
        -----
        - As a performace feature, this method will check if self.full_ebi_dir
        already exists and, if it does, the script will assume that in a
        previous execution this step was performed correctly and will simply
        read the file names from self.full_ebi_dir
        - When the object is created (init), samples, samples_prep and
        sample_demux_fps hold values for all available samples in the database.
        Here some of those values will be deleted (del's, within the loops) for
        those cases where the fastq.gz files weren't written or exist. This is
        an indication that they had no sequences and this kind of files are not
        accepted in EBI

        Raises
        ------
        EBISubmissionError
            - The demux file couldn't be read
            - All samples are removed
        """
        dir_not_exists = not isdir(self.full_ebi_dir)
        missing_samples = []
        if dir_not_exists or rewrite_fastq:
            makedirs(self.full_ebi_dir)

            if self.artifact.artifact_type == 'per_sample_FASTQ':
                demux_samples, missing_samples = \
                    self._generate_demultiplexed_fastq_per_sample_FASTQ()
            else:
                demux_samples = self._generate_demultiplexed_fastq_demux(mtime)
        else:
            demux_samples = set()
            extension = '.fastq.gz'
            extension_len = len(extension)
            for f in listdir(self.full_ebi_dir):
                fpath = join(self.full_ebi_dir, f)
                if isfile(fpath) and f.endswith(extension):
                    demux_samples.add(f[:-extension_len])

            missing_samples = set(
                self.samples.keys()).difference(demux_samples)

        if missing_samples:
            for ms in missing_samples:
                del(self.samples[ms])
                del(self.samples_prep[ms])
                del(self.sample_demux_fps[ms])

        if not demux_samples:
            error_msg = ("All samples were removed from the submission "
                         "because the demux file is empty or the sample names "
                         "do not match.")
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples
示例#52
0
文件: commands.py 项目: jlab/qiita
def submit_EBI(artifact_id, action, send, test=False, test_size=False):
    """Submit an artifact to EBI

    Parameters
    ----------
    artifact_id : int
        The artifact id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    test : bool
        If True some restrictions will be ignored, only used in parse_EBI_reply
    test_size : bool
        If True the EBI-ENA restriction size will be changed to 6000
    """
    # step 1: init and validate
    ebi_submission = EBISubmission(artifact_id, action)

    # step 2: generate demux fastq files
    try:
        ebi_submission.generate_demultiplexed_fastq()
    except Exception:
        error_msg = format_exc()
        if isdir(ebi_submission.full_ebi_dir):
            rmtree(ebi_submission.full_ebi_dir)
        LogEntry.create('Runtime',
                        error_msg,
                        info={'ebi_submission': artifact_id})
        raise

    # step 3: generate and write xml files
    ebi_submission.generate_xml_files()

    # before we continue let's check the size of the submission
    to_review = [
        ebi_submission.study_xml_fp, ebi_submission.sample_xml_fp,
        ebi_submission.experiment_xml_fp, ebi_submission.run_xml_fp,
        ebi_submission.submission_xml_fp
    ]
    total_size = sum([stat(tr).st_size for tr in to_review if tr is not None])
    # note that the max for EBI is 10M but let's play it safe
    max_size = 10e+6 if not test_size else 5000
    if total_size > max_size:
        LogEntry.create(
            'Runtime', 'The submission: %d is larger than allowed (%d), will '
            'try to fix: %d' % (artifact_id, max_size, total_size))
        # transform current metadata to dataframe for easier curation
        rows = {k: dict(v) for k, v in ebi_submission.samples.items()}
        df = pd.DataFrame.from_dict(rows, orient='index')
        # remove unique columns and same value in all columns
        nunique = df.apply(pd.Series.nunique)
        nsamples = len(df.index)
        cols_to_drop = set(nunique[(nunique == 1) |
                                   (nunique == nsamples)].index)
        # maximize deletion by removing also columns that are almost all the
        # same or almost all unique
        cols_to_drop = set(nunique[(nunique <= int(nsamples * .01)) |
                                   (nunique >= int(nsamples * .5))].index)
        cols_to_drop = cols_to_drop - {
            'taxon_id', 'scientific_name', 'description'
        }
        all_samples = ebi_submission.sample_template.ebi_sample_accessions
        samples = [k for k in ebi_submission.samples if all_samples[k] is None]
        if samples:
            ebi_submission.write_xml_file(
                ebi_submission.generate_sample_xml(samples, cols_to_drop),
                ebi_submission.sample_xml_fp)

        # now let's recalculate the size to make sure it's fine
        new_total_size = sum(
            [stat(tr).st_size for tr in to_review if tr is not None])
        LogEntry.create(
            'Runtime', 'The submission: %d after cleaning is %d and was %d' %
            (artifact_id, total_size, new_total_size))
        if new_total_size > max_size:
            raise ComputeError(
                'Even after cleaning the submission: %d is too large. Before '
                'cleaning: %d, after: %d' %
                (artifact_id, total_size, new_total_size))

    st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None
    if send:
        # getting aspera's password
        old_ascp_pass = environ.get('ASPERA_SCP_PASS', '')
        if old_ascp_pass == '':
            environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass
        ascp_passwd = environ['ASPERA_SCP_PASS']
        LogEntry.create('Runtime',
                        ('Submission of sequences of pre_processed_id: '
                         '%d completed successfully' % artifact_id))

        # step 4: sending sequences
        if action != 'MODIFY':
            LogEntry.create('Runtime',
                            ("Submitting sequences for pre_processed_id: "
                             "%d" % artifact_id))
            for cmd in ebi_submission.generate_send_sequences_cmd():
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    environ['ASPERA_SCP_PASS'] = old_ascp_pass
                    raise ComputeError(error_msg)
                open(ebi_submission.ascp_reply,
                     'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr))
        environ['ASPERA_SCP_PASS'] = old_ascp_pass

        # step 5: sending xml
        xmls_cmds = ebi_submission.generate_curl_command(
            ebi_seq_xfer_pass=ascp_passwd)
        LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: "
                                    "%d" % artifact_id))
        xml_content, stderr, rv = system_call(xmls_cmds)
        if rv != 0:
            error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                         (xml_content, stderr))
            raise ComputeError(error_msg)
        else:
            LogEntry.create('Runtime',
                            ('Submission of sequences of pre_processed_id: '
                             '%d completed successfully' % artifact_id))
        open(ebi_submission.curl_reply,
             'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr))

        # parsing answer / only if adding
        if action == 'ADD' or test:
            try:
                st_acc, sa_acc, bio_acc, ex_acc, run_acc = \
                    ebi_submission.parse_EBI_reply(xml_content, test=test)
            except EBISubmissionError as e:
                error = str(e)
                le = LogEntry.create('Fatal',
                                     "Command: %s\nError: %s\n" %
                                     (xml_content, error),
                                     info={'ebi_submission': artifact_id})
                raise ComputeError("EBI Submission failed! Log id: %d\n%s" %
                                   (le.id, error))

            if st_acc:
                ebi_submission.study.ebi_study_accession = st_acc
            if sa_acc:
                ebi_submission.sample_template.ebi_sample_accessions = sa_acc
            if bio_acc:
                ebi_submission.sample_template.biosample_accessions = bio_acc
            if ex_acc:
                ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
            ebi_submission.artifact.ebi_run_accessions = run_acc

    return st_acc, sa_acc, bio_acc, ex_acc, run_acc
示例#53
0
def submit_EBI(preprocessed_data_id, action, send):
    """Submit a preprocessed data to EBI

    Parameters
    ----------
    preprocessed_data_id : int
        The preprocesssed data id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    """
    # step 1: init and validate
    ebi_submission = EBISubmission(preprocessed_data_id, action)

    # step 2: generate demux fastq files
    ebi_submission.study.ebi_submission_status = 'submitting'
    try:
        ebi_submission.generate_demultiplexed_fastq()
    except:
        error_msg = format_exc()
        if isdir(ebi_submission.full_ebi_dir):
            rmtree(ebi_submission.full_ebi_dir)
        ebi_submission.study.ebi_submission_status = 'failed: %s' % error_msg
        LogEntry.create('Runtime',
                        error_msg,
                        info={'ebi_submission': preprocessed_data_id})
        raise

    # step 3: generate and write xml files
    ebi_submission.generate_xml_files()

    if send:
        # step 4: sending sequences
        if action != 'MODIFY':
            old_ascp_pass = environ.get('ASPERA_SCP_PASS', '')
            environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass

            LogEntry.create('Runtime',
                            ("Submitting sequences for pre_processed_id: "
                             "%d" % preprocessed_data_id))
            for cmd in ebi_submission.generate_send_sequences_cmd():
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    raise ComputeError(error_msg)
                open(ebi_submission.ascp_reply,
                     'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr))
            environ['ASPERA_SCP_PASS'] = old_ascp_pass
            LogEntry.create(
                'Runtime',
                ('Submission of sequences of pre_processed_id: '
                 '%d completed successfully' % preprocessed_data_id))

        # step 5: sending xml and parsing answer
        xmls_cmds = ebi_submission.generate_curl_command()
        LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: "
                                    "%d" % preprocessed_data_id))
        xml_content, stderr, rv = system_call(xmls_cmds)
        if rv != 0:
            error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                         (xml_content, stderr))
            raise ComputeError(error_msg)
        else:
            LogEntry.create(
                'Runtime',
                ('Submission of sequences of pre_processed_id: '
                 '%d completed successfully' % preprocessed_data_id))
        open(ebi_submission.curl_reply,
             'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr))

        try:
            st_acc, sa_acc, bio_acc, ex_acc, run_acc = \
                ebi_submission.parse_EBI_reply(xml_content)
        except EBISubmissionError as e:
            le = LogEntry.create('Fatal',
                                 "Command: %s\nError: %s\n" %
                                 (xml_content, str(e)),
                                 info={'ebi_submission': preprocessed_data_id})
            ebi_submission.study.ebi_submission_status = (
                "failed: XML parsing, log id: %d" % le.id)
            raise ComputeError("EBI Submission failed! Log id: %d" % le.id)

        ebi_submission.study.ebi_submission_status = 'submitted'
        if action == 'ADD':
            if st_acc:
                ebi_submission.study.ebi_study_accession = st_acc
            if sa_acc:
                ebi_submission.sample_template.ebi_sample_accessions = sa_acc
            if bio_acc:
                ebi_submission.sample_template.biosample_accessions = bio_acc
            if ex_acc:
                ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
            ebi_submission.artifact.ebi_run_accessions = run_acc
    else:
        st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None

    return st_acc, sa_acc, bio_acc, ex_acc, run_acc
示例#54
0
文件: ebi.py 项目: josenavas/QiiTa
    def __init__(self, artifact_id, action):
        error_msgs = []

        if action not in self.valid_ebi_actions:
            error_msg = ("%s is not a valid EBI submission action, valid "
                         "actions are: %s" %
                         (action, ', '.join(self.valid_ebi_actions)))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        ena_ontology = Ontology(convert_to_id('ENA', 'ontology'))
        self.action = action
        self.artifact = Artifact(artifact_id)
        if not self.artifact.can_be_submitted_to_ebi:
            error_msg = ("Artifact %d cannot be submitted to EBI"
                         % self.artifact.id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.study = self.artifact.study
        self.sample_template = self.study.sample_template
        # If we reach this point, there should be only one prep template
        # attached to the artifact. By design, each artifact has at least one
        # prep template. Artifacts with more than one prep template cannot be
        # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should
        # be set to false, which is checked in the previous if statement
        self.prep_template = self.artifact.prep_templates[0]

        if self.artifact.is_submitted_to_ebi and action != 'MODIFY':
            error_msg = ("Cannot resubmit! Artifact %d has already "
                         "been submitted to EBI." % artifact_id)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        status = self.study.ebi_submission_status
        if status in self.valid_ebi_submission_states:
            error_msg = ("Cannot perform parallel EBI submission for the same "
                         "study. Current status of the study: %s" % status)
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        self.artifact_id = artifact_id
        self.study_title = self.study.title
        self.study_abstract = self.study.info['study_abstract']

        it = self.prep_template.investigation_type
        if it in ena_ontology.terms:
            self.investigation_type = it
            self.new_investigation_type = None
        elif it in ena_ontology.user_defined_terms:
            self.investigation_type = 'Other'
            self.new_investigation_type = it
        else:
            # This should never happen
            error_msgs.append("Unrecognized investigation type: '%s'. This "
                              "term is neither one of the official terms nor "
                              "one of the user-defined terms in the ENA "
                              "ontology." % it)
        _, base_fp = get_mountpoint("preprocessed_data")[0]
        self.ebi_dir = '%d_ebi_submission' % artifact_id
        self.full_ebi_dir = join(base_fp, self.ebi_dir)
        self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt')
        self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml')
        self.xml_dir = join(self.full_ebi_dir, 'xml_dir')
        self.study_xml_fp = None
        self.sample_xml_fp = None
        self.experiment_xml_fp = None
        self.run_xml_fp = None
        self.submission_xml_fp = None
        self.publications = self.study.publications

        # getting the restrictions
        st_restrictions = [self.sample_template.columns_restrictions['EBI']]
        pt_restrictions = [self.prep_template.columns_restrictions['EBI']]
        if self.artifact.data_type in TARGET_GENE_DATA_TYPES:
            # adding restictions on primer and barcode as these are
            # conditionally requiered for target gene
            pt_restrictions.append(
                PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex'])
        st_missing = self.sample_template.check_restrictions(st_restrictions)
        pt_missing = self.prep_template.check_restrictions(pt_restrictions)
        # testing if there are any missing columns
        if st_missing:
            error_msgs.append("Missing column in the sample template: %s" %
                              ', '.join(list(st_missing)))
        if pt_missing:
            error_msgs.append("Missing column in the prep template: %s" %
                              ', '.join(list(pt_missing)))

        # generating all samples from sample template
        self.samples = {}
        self.samples_prep = {}
        self.sample_demux_fps = {}
        get_output_fp = partial(join, self.full_ebi_dir)
        nvp = []
        nvim = []
        for k, v in viewitems(self.sample_template):
            if k not in self.prep_template:
                continue
            sample_prep = self.prep_template[k]

            # validating required fields
            if ('platform' not in sample_prep or
                    sample_prep['platform'] is None):
                nvp.append(k)
            else:
                platform = sample_prep['platform'].upper()
                if platform not in self.valid_platforms:
                    nvp.append(k)
                else:
                    if ('instrument_model' not in sample_prep or
                            sample_prep['instrument_model'] is None):
                        nvim.append(k)
                    else:
                        im = sample_prep['instrument_model'].upper()
                        if im not in self.valid_platforms[platform]:
                            nvim.append(k)

            self.samples[k] = v
            self.samples_prep[k] = sample_prep
            self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k)

        if nvp:
            error_msgs.append("These samples do not have a valid platform "
                              "(instrumet model wasn't checked): %s" % (
                                  ', '.join(nvp)))
        if nvim:
            error_msgs.append("These samples do not have a valid instrument "
                              "model: %s" % (', '.join(nvim)))
        if error_msgs:
            error_msgs = ("Errors found during EBI submission for study #%d, "
                          "artifact #%d and prep template #%d:\n%s"
                          % (self.study.id, artifact_id,
                             self.prep_template.id, '\n'.join(error_msgs)))
            LogEntry.create('Runtime', error_msgs)
            raise EBISubmissionError(error_msgs)

        self._sample_aliases = {}
        self._experiment_aliases = {}
        self._run_aliases = {}

        self._ebi_sample_accessions = \
            self.sample_template.ebi_sample_accessions
        self._ebi_experiment_accessions = \
            self.prep_template.ebi_experiment_accessions
示例#55
0
文件: ebi.py 项目: antgonza/qiita
    def _generate_demultiplexed_fastq_per_sample_FASTQ(self):
        """Modularity helper"""

        # helper function to write files in this method
        def _rename_file(fp, new_fp):
            if fp.endswith('.gz'):
                copyfile(fp, new_fp)
            else:
                cmd = "gzip -c %s > %s" % (fp, new_fp)
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = (
                        "Error:\nStd output:%s\nStd error:%s"
                        % (stdout, stderr))
                    raise EBISubmissionError(error_msg)

        fwd_reads = []
        rev_reads = []
        for x in self.artifact.filepaths:
            if x['fp_type'] == 'raw_forward_seqs':
                fwd_reads.append((basename(x['fp']), x['fp']))
            elif x['fp_type'] == 'raw_reverse_seqs':
                rev_reads.append((basename(x['fp']), x['fp']))
        fwd_reads.sort(key=lambda x: x[1])
        rev_reads.sort(key=lambda x: x[1])
        if rev_reads:
            self.per_sample_FASTQ_reverse = True

        # merging forward and reverse into a single list, note that at this
        # stage the files have passed multiple rounds of reviews: validator
        # when the artifact was created, the summary generator, etc. so we can
        # assure that if a rev exists for 1 fwd, there is one for all of them
        fps = []
        for f, r in zip_longest(fwd_reads, rev_reads):
            sample_name = f[0]
            fwd_read = f[1]
            rev_read = r[1] if r is not None else None
            fps.append((sample_name, (fwd_read, rev_read)))

        if 'run_prefix' in self.prep_template.categories():
            rps = [(k, v) for k, v in viewitems(
                self.prep_template.get_category('run_prefix'))]
        else:
            rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()]
        rps.sort(key=lambda x: x[1])

        demux_samples = set()
        for sn, rp in rps:
            for i, (bn, fp) in enumerate(fps):
                if bn.startswith(rp):
                    demux_samples.add(sn)
                    new_fp = self.sample_demux_fps[sn] + self.FWD_READ_SUFFIX
                    _rename_file(fp[0], new_fp)

                    if fp[1] is not None:
                        new_fp = self.sample_demux_fps[
                            sn] + self.REV_READ_SUFFIX
                        _rename_file(fp[1], new_fp)
                    del fps[i]
                    break
        if fps:
            error_msg = (
                'Discrepancy between filepaths and sample names. Extra'
                ' filepaths: %s' % ', '.join([fp[0] for fp in fps]))
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples, \
            set(self.samples.keys()).difference(set(demux_samples))
示例#56
0
文件: ebi.py 项目: antgonza/qiita
    def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None):
        """Generates demultiplexed fastq

        Parameters
        ----------
        rewrite_fastq : bool, optional
            If true, it forces the rewrite of the fastq files
        mtime : float, optional
            The time to use when creating the gz files. If None, the current
            time will be used by gzip.GzipFile. This is useful for testing.

        Returns
        -------
        demux_samples
            List of successful demultiplexed samples

        Notes
        -----
        - As a performace feature, this method will check if self.full_ebi_dir
        already exists and, if it does, the script will assume that in a
        previous execution this step was performed correctly and will simply
        read the file names from self.full_ebi_dir
        - When the object is created (init), samples, samples_prep and
        sample_demux_fps hold values for all available samples in the database.
        Here some of those values will be deleted (del's, within the loops) for
        those cases where the fastq.gz files weren't written or exist. This is
        an indication that they had no sequences and this kind of files are not
        accepted in EBI

        Raises
        ------
        EBISubmissionError
            - The demux file couldn't be read
            - All samples are removed
        """
        dir_not_exists = not isdir(self.full_ebi_dir)
        missing_samples = []
        if dir_not_exists or rewrite_fastq:
            # if it exists, remove folder and start from scratch
            if isdir(self.full_ebi_dir):
                rmtree(self.full_ebi_dir)

            create_nested_path(self.full_ebi_dir)

            if self.artifact.artifact_type == 'per_sample_FASTQ':
                demux_samples, missing_samples = \
                    self._generate_demultiplexed_fastq_per_sample_FASTQ()
            else:
                demux_samples = self._generate_demultiplexed_fastq_demux(mtime)
        else:
            # if we are within this else, it means that we already have
            # generated the raw files and for some reason the submission
            # failed so we don't need to generate the files again and just
            # check which files exist in the file path to create our final
            # list of samples
            demux_samples = set()
            extension = self.FWD_READ_SUFFIX
            extension_len = len(extension)
            all_missing_files = set()
            for f in listdir(self.full_ebi_dir):
                fpath = join(self.full_ebi_dir, f)
                if isfile(fpath) and f.endswith(extension):
                    demux_samples.add(f[:-extension_len])
                else:
                    all_missing_files.add(f[:-extension_len])
            # at this stage we have created/reviewed all the files and have
            # all the sample names, however, we are not sure if we are dealing
            # with just forwards or if we are dealing with also reverse. The
            # easiest way to do this is to review the all_missing_files
            missing_files = all_missing_files - demux_samples
            if missing_files != all_missing_files:
                self.per_sample_FASTQ_reverse = True

            missing_samples = set(
                self.samples.keys()).difference(demux_samples)

        if missing_samples:
            for ms in missing_samples:
                del(self.samples[ms])
                del(self.samples_prep[ms])
                del(self.sample_demux_fps[ms])

        if not demux_samples:
            error_msg = ("All samples were removed from the submission "
                         "because the demux file is empty or the sample names "
                         "do not match.")
            LogEntry.create('Runtime', error_msg)
            raise EBISubmissionError(error_msg)

        return demux_samples
示例#57
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        search_type = self.get_argument('search_type')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if search_type not in ['user', 'public']:
            raise HTTPError(400, 'Not a valid search type')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime',
                                str(e),
                                info={
                                    'User': self.current_user.id,
                                    'query': query
                                })
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user, search_type, study_proc,
                                 proc_samples)
        # linkifying data
        len_info = len(info)
        for i in range(len_info):
            info[i]['shared'] = ", ".join([
                study_person_linkifier(element)
                for element in info[i]['shared']
            ])

            ppid = [pubmed_linkifier([p]) for p in info[i]['publication_pid']]
            pdoi = [doi_linkifier([p]) for p in info[i]['publication_doi']]
            del info[i]['publication_pid']
            del info[i]['publication_doi']
            info[i]['pubs'] = ', '.join(ppid + pdoi)

            info[i]['pi'] = study_person_linkifier(info[i]['pi'])

            info[i]['ebi_info'] = info[i]['ebi_submission_status']
            ebi_study_accession = info[i]['ebi_study_accession']
            if ebi_study_accession:
                info[i]['ebi_info'] = '%s (%s)' % (''.join([
                    EBI_LINKIFIER.format(a)
                    for a in ebi_study_accession.split(',')
                ]), info[i]['ebi_submission_status'])

        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len_info,
            "iTotalDisplayRecords": len_info,
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
示例#58
0
def submit_EBI(artifact_id, action, send, test=False):
    """Submit an artifact to EBI

    Parameters
    ----------
    artifact_id : int
        The artifact id
    action : %s
        The action to perform with this data
    send : bool
        True to actually send the files
    test : bool
        If True some restrictions will be ignored, only used in parse_EBI_reply
    """
    # step 1: init and validate
    ebi_submission = EBISubmission(artifact_id, action)

    # step 2: generate demux fastq files
    try:
        ebi_submission.generate_demultiplexed_fastq()
    except Exception:
        error_msg = format_exc()
        if isdir(ebi_submission.full_ebi_dir):
            rmtree(ebi_submission.full_ebi_dir)
        LogEntry.create('Runtime',
                        error_msg,
                        info={'ebi_submission': artifact_id})
        raise

    # step 3: generate and write xml files
    ebi_submission.generate_xml_files()

    if send:
        # getting aspera's password
        old_ascp_pass = environ.get('ASPERA_SCP_PASS', '')
        if old_ascp_pass == '':
            environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass
        ascp_passwd = environ['ASPERA_SCP_PASS']
        LogEntry.create('Runtime',
                        ('Submission of sequences of pre_processed_id: '
                         '%d completed successfully' % artifact_id))

        # step 4: sending sequences
        if action != 'MODIFY':
            LogEntry.create('Runtime',
                            ("Submitting sequences for pre_processed_id: "
                             "%d" % artifact_id))
            for cmd in ebi_submission.generate_send_sequences_cmd():
                stdout, stderr, rv = system_call(cmd)
                if rv != 0:
                    error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" %
                                 (stdout, stderr))
                    environ['ASPERA_SCP_PASS'] = old_ascp_pass
                    raise ComputeError(error_msg)
                open(ebi_submission.ascp_reply,
                     'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr))
        environ['ASPERA_SCP_PASS'] = old_ascp_pass

        # step 5: sending xml and parsing answer
        xmls_cmds = ebi_submission.generate_curl_command(
            ebi_seq_xfer_pass=ascp_passwd)
        LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: "
                                    "%d" % artifact_id))
        xml_content, stderr, rv = system_call(xmls_cmds)
        if rv != 0:
            error_msg = ("Error:\nStd output:%s\nStd error:%s" %
                         (xml_content, stderr))
            raise ComputeError(error_msg)
        else:
            LogEntry.create('Runtime',
                            ('Submission of sequences of pre_processed_id: '
                             '%d completed successfully' % artifact_id))
        open(ebi_submission.curl_reply,
             'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr))

        try:
            st_acc, sa_acc, bio_acc, ex_acc, run_acc = \
                ebi_submission.parse_EBI_reply(xml_content, test=test)
        except EBISubmissionError as e:
            error = str(e)
            le = LogEntry.create('Fatal',
                                 "Command: %s\nError: %s\n" %
                                 (xml_content, error),
                                 info={'ebi_submission': artifact_id})
            raise ComputeError("EBI Submission failed! Log id: %d\n%s" %
                               (le.id, error))

        if action == 'ADD' or test:
            if st_acc:
                ebi_submission.study.ebi_study_accession = st_acc
            if sa_acc:
                ebi_submission.sample_template.ebi_sample_accessions = sa_acc
            if bio_acc:
                ebi_submission.sample_template.biosample_accessions = bio_acc
            if ex_acc:
                ebi_submission.prep_template.ebi_experiment_accessions = ex_acc
            ebi_submission.artifact.ebi_run_accessions = run_acc
    else:
        st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None

    return st_acc, sa_acc, bio_acc, ex_acc, run_acc