Exemplo n.º 1
0
    def __init__(self, parser):
        print("initialising")
        self.analysistype = "rMLST"
        # self.referencefilepath = "/mnt/nas/Adam/assemblypipeline/rMLST/"
        self.referencefilepath = os.path.join(parser.referencedirectory, "")
        self.start = parser.start
        self.loader = SaveLoad()

        # If the file was empty and it couldn't load but created the file
        import json
        try:
            # Fresh file
            if not self.loader.load("bacteria.json", True):
                self.loader.organismdictionary = {'Escherichia': 'Escherichia coli#1',
                                                  'Shigella': 'Escherichia coli#1',
                                                  'Vibrio': 'Vibrio parahaemolyticus',
                                                  'Campylobacter': 'Campylobacter jejuni',
                                                  'Listeria': 'Listeria monocytogenes',
                                                  'Bacillus': 'Bacillus cereus',
                                                  'Klebsiella': 'Klebsiella pneumoniae'}
                self.loader.to_update = list(self.loader.organismdictionary.keys())
                self.loader.dump("bacteria.json")

            if "organismdictionary" not in self.loader.__dict__:
                raise NameError

        except (json.decoder.JSONDecodeError, NameError):
            print("Invalid config file, please delete or fix")
            sys.exit(1)

        self.main()
Exemplo n.º 2
0
    def __init__(self):
        # import logging
        # logging.basicConfig(level=logging.INFO)
        # Vars
        import sys
        self.script_dir = sys.path[0]
        self.config_json = os.path.join(self.script_dir, "config.json")

        # Set up timer/logger
        import datetime
        if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')):
            os.makedirs(os.path.join(self.script_dir, 'runner_logs'))
        self.t = Timer(log_file=os.path.join(
            self.script_dir, 'runner_logs',
            datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H")))
        self.t.set_colour(30)

        # Load issues that the bot has already responded to
        self.issue_loader = SaveLoad(os.path.join(self.script_dir,
                                                  'responded_issues.json'),
                                     create=True)
        self.responded_issues = set(
            self.issue_loader.get('responded_issues', default=[], ask=False))

        # Get encrypted api key from config
        # Load the config
        self.loader = SaveLoad(self.config_json, create=True)
        self.redmine_api_key = self.loader.get('redmine_api_key_encrypted',
                                               default='none',
                                               ask=False)

        # If it's the first run then this will be yes
        self.first_run = self.loader.get('first_run', default='yes', ask=False)

        self.nas_mnt = os.path.normpath(
            self.loader.get('nasmnt', default="/mnt/nas/", get_type=str))
        self.max_histories = self.loader.get('max_histories',
                                             default=6,
                                             get_type=int)
        self.seconds_between_redmine_checks = (self.loader.get(
            'seconds_between_redmine_checks', default=600, get_type=int))

        # Make sure all the arguments are there
        self.loader.get('workflow_id', default="f2db41e1fa331b3e")
        self.loader.get('ip', default="http://192.168.1.3:48888/")
        self.key = 'Sixteen byte key'

        self.redmine = None

        try:
            self.main()
        except Exception as e:
            import traceback
            self.t.time_print("[Error] Dumping...\n%s" %
                              traceback.format_exc())
            raise
    def __init__(self, force):
        # import logging
        # logging.basicConfig(level=logging.INFO)
        # Vars
        import sys
        self.script_dir = sys.path[0]
        self.config_json = os.path.join(self.script_dir, "config.json")

        # Set up timer/logger
        import datetime
        if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')):
            os.makedirs(os.path.join(self.script_dir, 'runner_logs'))
        self.t = Timer(log_file=os.path.join(
            self.script_dir, 'runner_logs',
            datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H")))
        self.t.set_colour(30)

        # Load issues that the bot has already responded to
        self.issue_loader = SaveLoad(os.path.join(self.script_dir,
                                                  'responded_issues.json'),
                                     create=True)
        self.responded_issues = set(
            self.issue_loader.get('responded_issues', default=[], ask=False))

        # Get encrypted api key from config
        # Load the config
        self.loader = SaveLoad(self.config_json, create=True)
        self.redmine_api_key = self.loader.get('redmine_api_key_encrypted',
                                               default='none',
                                               ask=False)

        # If it's the first run then this will be yes
        self.first_run = self.loader.get('first_run', default='yes', ask=False)

        self.nas_mnt = os.path.normpath(
            self.loader.get('nasmnt', default="/mnt/nas/", get_type=str))
        self.seconds_between_redmine_checks = self.loader.get(
            'secs_between_redmine_checks', default=600, get_type=int)
        self.key = 'Sixteen byte key'

        self.redmine = None

        self.botmsg = '\n\n_I am a bot. This action was performed automatically._'

        try:
            self.main(force)
        except Exception as e:
            import traceback
            self.t.time_print("[Error] Dumping...\n%s" %
                              traceback.format_exc())
            raise
Exemplo n.º 4
0
    def __init__(self, inputs=None, args=None):
        self.missing = list()
        self.script_dir = sys.path[0]
        load = SaveLoad(file_name="config.json", create=True)

        if inputs is not None:
            self.automated = True
            self.inputs = inputs
            self.name = os.path.split(inputs['outputfolder'])[-1]
            nasmnt = load.get('nasmnt', default='/mnt/nas/')
            self.outfolder = inputs['outputfolder']
        elif args is not None:
            self.automated = False
            self.name = os.path.normpath(args.output_folder)

            # If bad arguments
            if not args.fastq and not args.fasta:
                print(
                    "Please use -q (fastq) and/or -f (fasta) to choose what filetype you want to retrieve."
                )
                parser.print_help()
                exit(1)

            # Load NAS directory
            if args.nas is not None:
                nasmnt = args.nas
            else:
                nasmnt = load.get('nasmnt', default='/mnt/nas/')

            if not os.path.normpath(self.name).startswith('/'):
                self.outfolder = os.path.join(self.script_dir, self.name)
            else:
                self.outfolder = self.name
        else:
            raise ValueError('No inputs to constructor!')

        self.retriever = SequenceGetter(outputfolder=self.outfolder,
                                        nasmnt=os.path.normpath(nasmnt),
                                        output=False)

        import datetime
        if not os.path.exists(os.path.join(self.script_dir, 'extractor_logs')):
            os.makedirs(os.path.join(self.script_dir, 'extractor_logs'))
        self.t = Timer(log_file=os.path.join(
            self.script_dir, 'extractor_logs',
            datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H")))
        self.t.set_colour(32)
Exemplo n.º 5
0
    def load(self):
        from pyaccessories.SaveLoad import SaveLoad as SaveLoad

        config = SaveLoad(os.path.join(self.script_dir, "config.json"),
                          create=True)

        self.API_KEY = config.get('api_key')
        if not re.match(r"^\w{32}$", self.API_KEY):
            self.t.time_print("Invalid Galaxy API key.")
            exit(1)

        self.WORKFLOW_ID = config.get(
            'workflow_id', default='f2db41e1fa331b3e')  # SNVPhyl paired end
        if not re.match(r"^\w{16}$", self.WORKFLOW_ID):
            self.t.time_print("Invalid workflow ID format.")
            exit(1)

        self.IP = config.get('ip', default="http://192.168.1.3:48888/")
        self.NASMNT = os.path.normpath(
            config.get('nasmnt', default="/mnt/nas/"))
Exemplo n.º 6
0
class Run(object):
    def main(self, force):
        if self.first_run == 'yes':
            choice = 'y'
            if force:
                raise ValueError('Need redmine API key!')
        else:
            if force:
                choice = 'n'
            else:
                self.t.time_print("Would you like to set the redmine api key? (y/n)")
                choice = input()
        if choice == 'y':
            self.t.time_print("Enter your redmine api key (will be encrypted to file)")
            self.redmine_api_key = input()
            # Encode and send to json file
            self.loader.redmine_api_key_encrypted = self.encode(self.key, self.redmine_api_key).decode('utf-8')
            self.loader.first_run = 'no'
            self.loader.dump(self.config_json)
        else:
            # Import and decode from file
            self.redmine_api_key = self.decode(self.key, self.redmine_api_key)

        import re
        if not re.match(r'^[a-z0-9]{40}$', self.redmine_api_key):
            self.t.time_print("Invalid Redmine API key!")
            exit(1)

        self.redmine = RedmineInterface('http://redmine.biodiversity.agr.gc.ca/', self.redmine_api_key)
        # self.respond_to_issue(self.redmine.get_issue_data(8983)['issue'], force=True)  # use this to re-run issues
        # exit()
        self.main_loop()

    @staticmethod
    def generate_args(inputs):
        import argparse

        args = argparse.Namespace()
        args.reference = inputs['reference']
        args.history_name = inputs['name']
        args.noextract = False
        args.manual = False  # Change this to true if you want to manually run the snvphyl

        return args

    @staticmethod
    def get_input(input_file, redmine_id):
        mode = 'none'
        regex = r"^(2\d{3}-\w{2,10}-\d{3,4}|\d{2}-\d{4})$"  # Match xxxx-xxx-xxxx or xx-xxxx
        inputs = {
            'reference': None,
            'fastqs': list(),
            'name': str(redmine_id)
        }
        import re
        for line in input_file:
            # Check for mode changes
            if line.lower().startswith('reference') and len(line) < len('reference') + 3:
                mode = 'ref'
                continue
            elif line.lower().startswith('compare') and len(line) < len('compare') + 3:
                mode = 'comp'
                continue
            elif line.lower() == '':
                # Blank line
                mode = 'none'
                continue

            if inputs['reference'] is not None and len(inputs['fastqs']) > 0 and mode == 'none':
                # Finished gathering all input
                break

            # Get seq-id
            if mode == 'ref':
                if re.match(regex, line):
                    inputs['reference'] = line
                else:
                    raise ValueError("Invalid seq-id \"%s\"" % line)
            elif mode == 'comp':
                if re.match(regex, line):
                    inputs['fastqs'].append(line)
                else:
                    pass
                    raise ValueError("Invalid seq-id \"%s\"" % line)

        # Check for duplicates
        l = inputs['fastqs']
        duplicates = set([x for x in l if l.count(x) > 1])
        if len(duplicates) > 0:
            msg = "Duplicate SEQ-IDs!\n"
            for duplicate in duplicates:
                msg += duplicate + '\n'
            raise ValueError(msg)

        if inputs['reference'] is None or len(inputs['fastqs']) < 1:
            raise ValueError("Invalid format for redmine request.")

        return inputs

    def completed_response(self, result_path, redmine_id):
        from RedmineAPI.RedmineAPI import RedmineUploadError
        notes = "Completed running SNVPhyl. Results stored at %s" % os.path.join("NAS/bio_requests/%s" %
                                                                                              redmine_id)
        try:
            self.redmine.upload_file(result_path, redmine_id, 'application/zip',
                                     file_name_once_uploaded="SNVPhyl_%s_Results.zip" % redmine_id)
        except RedmineUploadError:
            notes = "Couldn't upload your file to redmine. Results stored at %s" % \
                  os.path.join("NAS/bio_requests/%s" % redmine_id)

        # Assign it back to the author
        get = self.redmine.get_issue_data(redmine_id)

        self.redmine.update_issue(redmine_id, notes + self.botmsg, status_change=4, assign_to_id=get['issue']['author']['id'])

    def run_snvphyl(self, inputs):
        # Parse input
        args = self.generate_args(inputs)
        # noinspection PyBroadException
        from main import AutoSNVPhylError
        try:
            runner = AutoSNVPhyl(args, inputs=inputs)
            result_path = runner.run()
            # SNVPhyl finished, copy the zip to the NAS
            import shutil
            bio_request_folder = os.path.join(self.nas_mnt, 'bio_requests', inputs['name'])
            # Create folder with redmine id
            self.t.time_print("Creating directory %s" % bio_request_folder)
            if not os.path.exists(os.path.join(bio_request_folder)):
                os.makedirs(bio_request_folder)

            # Copy results to bio_request folder
            self.t.time_print("Copying %s to %s" % (result_path, bio_request_folder))
            shutil.copy(result_path, bio_request_folder)

            # Respond on redmine
            self.completed_response(result_path, inputs['name'])

        except Exception as e:
            import traceback
            self.t.time_print("[Warning] AutoSNVPhyl had a problem, continuing redmine api anyways.")
            self.t.time_print("[AutoSNVPhyl Error Dump]\n" + traceback.format_exc())
            # Send response
            if type(e) == AutoSNVPhylError or ValueError:
                msg = str(e)
            else:
                msg = traceback.format_exc()

            # Set it to feedback and assign it back to the author
            get = self.redmine.get_issue_data(inputs['name'])
            self.redmine.update_issue(
                                      inputs['name'],
                                      notes="There was a problem with your SNVPhyl. Please create a new issue on"
                                            " Redmine to re-run it.\n%s" % msg + self.botmsg,
                                      status_change=4,
                                      assign_to_id=get['issue']['author']['id']
                                      )

    def main_loop(self):
        import time
        while True:
            self.clear_space()
            self.make_call()
            self.t.time_print("Waiting for next check.")
            time.sleep(self.seconds_between_redmine_checks)

    def clear_space(self):
        from bioblend.galaxy import GalaxyInstance
        from bioblend import ConnectionError
        gi = GalaxyInstance(self.loader.get('ip', default='http://192.168.1.3:48888/'), key=self.loader.get('api_key'))
        self.t.time_print("Clearing space on Galaxy")

        while True:
            try:
                available = gi.histories.get_histories()  # Ping galaxy
                break
            except ConnectionError as e:
                if e.status_code == 403:  # Invalid API key
                    self.t.time_print("Invalid Galaxy API Key!")
                    del self.loader.__dict__['api_key']
                    self.loader.dump()
                    self.loader.get('api_key')
                elif 'Max retries exceeded' in str(e.args[0]):
                    self.t.time_print("Error: Galaxy isn't running/connection error.")
                    self.t.time_print("Waiting 1 hour...")
                    import time
                    time.sleep(3600)
                else:
                    raise

        if len(available) >= self.max_histories:
            msg = 'Clearing data.'
        else:
            msg = 'Not clearing data.'

        self.t.time_print("Currently %d histories on Galaxy. %s" % (len(available), msg))
        while len(available) > self.max_histories:
            self.t.time_print("Deleting history %s to clear space..." % available.pop(len(available)-1)['name'])
            try:
                gi.histories.delete_history(available[-1]['id'], purge=True)
            except ConnectionError as e:
                if e.status_code == 403:  # Invalid API key
                    self.t.time_print("Invalid Galaxy API Key!")
                    exit(1)
                elif 'Max retries exceeded' in str(e.args[0]):
                    self.t.time_print("Error: Galaxy isn't running/connection error.")
                    exit(1)
                else:
                    raise

        self.t.time_print("Finished clearing space")

    def make_call(self):
        self.t.time_print("Checking for SNVPhyl requests...")

        data = self.redmine.get_new_issues('cfia')

        found = []

        for issue in data['issues']:
            if issue['id'] not in self.responded_issues and issue['status']['name'] == 'New':
                if issue['subject'].lower() == 'snvphyl':
                    found.append(issue)

        self.t.time_print("Found %d issues..." % len(found))

        while len(found) > 0:  # While there are still issues to respond to
            self.respond_to_issue(found.pop(len(found)-1))
            self.clear_space()

    def respond_to_issue(self, issue, force=False):
        # Run snvphyl
        if self.redmine.get_issue_data(issue['id'])['issue']['status']['name'] == 'New' or force:
            self.t.time_print("Found SNVPhyl to run. Subject: %s. ID: %s" % (issue['subject'], issue['id']))
            self.t.time_print("Adding to responded to")

            self.responded_issues.add(issue['id'])
            self.issue_loader.responded_issues = list(self.responded_issues)
            self.issue_loader.dump()

            # Turn the description into a list of lines
            input_list = issue['description'].split('\n')
            input_list = map(str.strip, input_list)  # Get rid of \r
            error = False
            try:
                inputs = self.get_input(input_list, issue['id'])
                response = "Running SNVPhyl with reference %s\n\nComparing to:" % inputs['reference']
                for fastq in list(inputs['fastqs']):
                    response += '\n' + fastq
                if inputs['reference'] not in inputs['fastqs']:
                    response += "Did you mean to not compare the reference to itself?"  # TODO ask for answer

            except ValueError as e:
                response = "Sorry, there was a problem with your SNVPhyl request:\n%s\n" \
                           "Please submit a new request and close this one." % e.args[0]
                error = True

            if not error:
                # Rename file'Invalid name to rename %s. Ignoring.'s if the rename.txt text file is include
                more_msg, inputs['rename'] = self.rename_files(issue['id'])

                if more_msg is not None:
                    response += '\n' + more_msg

            self.t.time_print('\n' + response)

            if error:  # If something went wrong set the status to feedback and assign the author the issue
                get = self.redmine.get_issue_data(issue['id'])
                self.redmine.update_issue(issue['id'], notes=response + self.botmsg, status_change=4,
                                          assign_to_id=get['issue']['author']['id'])
            else:
                # Set the issue to in progress since the SNVPhyl is running
                self.redmine.update_issue(issue['id'], notes=response + self.botmsg, status_change=2)
                pass

            if error:
                return
            else:
                self.run_snvphyl(inputs)
                pass

    def rename_files(self, issue_id):
        self.t.time_print('Looking for txt')
        data = self.redmine.get_issue_data(issue_id)
        try:
            attachments = data['issue']['attachments']
            if len(attachments) > 0:
                self.t.time_print('Found attachment to redmine request.')
            else:
                return None, []
        except KeyError:
            # No attachments
            return None, []
        rename = None
        for attachment in attachments:
            if attachment['filename'].endswith('.txt'):
                # Good
                self.t.time_print('Found %s, downloading...' % attachment['filename'])
                rename = self.redmine.download_file(attachment['content_url'])
                break

        if rename is None:
            return None, []

        import re
        regex = r'([^\t\n\r]+)(?:,|\n|\r|\t)([^\t\n\r]+)'  # Matches a list of comma separated pairs eg. seq,a,seq2,b
        pairs = re.findall(regex, rename)

        if len(pairs) == 0:
            return 'Invalid rename.txt file.', []

        # Check its a good file
        ignore = []
        for pair in pairs:
            # Check duplicate
            for compare in pairs:
                if pair[0] == compare[0]:
                    if pair[1] != compare[1]:
                        return 'Not using rename.txt because of duplicate definition in the rename.txt file', []
                    # Otherwise let it go

            # Make sure its valid
            regex = r'.+'  # TODO actual regex
            if not re.fullmatch(regex, pair[1]):
                ignore.append(pair)

        # Feedback
        msg = "Renaming files before starting SNVPhyl."
        if len(ignore) > 0:
            msg += 'Some names were invalid and were ignored:\n'
            for out in ignore:
                msg += '\n%s' % out[1]

        # Convert to dict
        result = {}
        for pair in pairs:
            result[pair[0]] = pair[1]
        self.t.time_print(result)
        return msg, result

    @staticmethod
    def encode(key, string):
        encoded_chars = []
        for i in range(len(string)):
            key_c = key[i % len(key)]
            encoded_c = chr(ord(string[i]) + ord(key_c) % 256)
            encoded_chars.append(encoded_c)
        encoded_string = "".join(encoded_chars)
        encoded_string = bytes(encoded_string, "utf-8")

        return base64.urlsafe_b64encode(encoded_string)

    @staticmethod
    def decode(key, string):
        decoded_chars = []
        string = base64.urlsafe_b64decode(string).decode('utf-8')
        for i in range(len(string)):
            key_c = key[i % len(key)]
            encoded_c = chr(abs(ord(str(string[i]))
                                - ord(key_c) % 256))
            decoded_chars.append(encoded_c)
        decoded_string = "".join(decoded_chars)

        return decoded_string

    def __init__(self, force):
        # import logging
        # logging.basicConfig(level=logging.INFO)
        # Vars
        import sys
        self.script_dir = sys.path[0]
        self.config_json = os.path.join(self.script_dir, "config.json")

        # Set up timer/logger
        import datetime
        if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')):
            os.makedirs(os.path.join(self.script_dir, 'runner_logs'))
        self.t = Timer(log_file=os.path.join(self.script_dir, 'runner_logs',
                                             datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H")))
        self.t.set_colour(30)

        # Load issues that the bot has already responded to
        self.issue_loader = SaveLoad(os.path.join(self.script_dir, 'responded_issues.json'), create=True)
        self.responded_issues = set(self.issue_loader.get('responded_issues', default=[], ask=False))

        # Get encrypted api key from config
        # Load the config
        self.loader = SaveLoad(self.config_json, create=True)
        self.redmine_api_key = self.loader.get('redmine_api_key_encrypted', default='none', ask=False)

        # If it's the first run then this will be yes
        self.first_run = self.loader.get('first_run', default='yes', ask=False)

        self.nas_mnt = os.path.normpath(self.loader.get('nasmnt', default="/mnt/nas/", get_type=str))
        self.max_histories = self.loader.get('max_histories', default=6, get_type=int)
        self.seconds_between_redmine_checks = (self.loader.get('seconds_between_redmine_checks', default=600, get_type=int))

        # Make sure all the arguments are there
        self.loader.get('workflow_id', default="f2db41e1fa331b3e")
        self.loader.get('ip', default="http://192.168.1.3:48888/")
        self.key = 'Sixteen byte key'

        self.redmine = None

        self.botmsg = '\n\n_I am a bot. This action was performed automatically._'

        try:
            self.main(force)
        except Exception as e:
            import traceback
            self.t.time_print("[Error] Dumping...\n%s" % traceback.format_exc())
            raise
class Run(object):
    def main(self, force):
        if self.first_run == 'yes':
            choice = 'y'
            if force:
                raise ValueError('Need redmine API key!')
        else:
            if force:
                choice = 'n'
            else:
                self.t.time_print(
                    "Would you like to set the redmine api key? (y/n)")
                choice = input()
        if choice == 'y':
            self.t.time_print(
                "Enter your redmine api key (will be encrypted to file)")
            self.redmine_api_key = input()
            # Encode and send to json file
            self.loader.redmine_api_key_encrypted = self.encode(
                self.key, self.redmine_api_key).decode('utf-8')
            self.loader.first_run = 'no'
            self.loader.dump(self.config_json)
        else:
            # Import and decode from file
            self.redmine_api_key = self.decode(self.key, self.redmine_api_key)

        import re
        if not re.match(r'^[a-z0-9]{40}$', self.redmine_api_key):
            self.t.time_print("Invalid Redmine API key!")
            exit(1)

        self.redmine = RedmineInterface(
            'http://redmine.biodiversity.agr.gc.ca/', self.redmine_api_key)

        self.main_loop()

    def get_input(self, input_file, redmine_id):
        mode = 'none'
        regex = r'^(2\d{3}-\w{2,10}-\d{3,4})$'
        inputs = {
            'fastqs':
            list(),
            'fastas':
            list(),
            'outputfolder':
            os.path.join(self.nas_mnt, 'bio_requests', str(redmine_id))
        }
        import re
        for line in input_file:
            # Check for mode changes
            if line.lower().startswith(
                    'fasta') and len(line) < len('fasta') + 3:
                mode = 'fasta'
                continue
            elif line.lower().startswith(
                    'fastq') and len(line) < len('fastq') + 3:
                mode = 'fastq'
                continue
            elif line.lower() == '':
                # Blank line
                mode = 'none'
                continue

            # Get seq-id
            if mode == 'fasta':
                if re.match(regex, line):
                    inputs['fastas'].append(line)
                else:
                    raise ValueError("Invalid seq-id \"%s\"" % line)
            elif mode == 'fastq':
                if re.match(regex, line):
                    inputs['fastqs'].append(line)
                else:
                    raise ValueError("Invalid seq-id \"%s\"" % line)

        if len(inputs['fastas']) < 1 and len(inputs['fastqs']) < 1:
            raise ValueError(
                "Invalid format for redmine request. Couldn't find any fastas or fastqs to extract"
            )

        return inputs

    def completed_response(self, redmine_id, missing):
        notes = "Completed extracting files. Results stored at %s" % os.path.join(
            "NAS/bio_requests/%s" % redmine_id)
        if len(missing) > 0:
            notes += '\nMissing some files:\n'
            for file in missing:
                notes += file + '\n'

        # Assign it back to the author
        get = self.redmine.get_issue_data(redmine_id)

        self.redmine.update_issue(redmine_id,
                                  notes + self.botmsg,
                                  status_change=4,
                                  assign_to_id=get['issue']['author']['id'])

    def run_request(self, inputs):
        # Parse input
        # noinspection PyBroadException
        try:
            runner = MassExtractor(inputs=inputs)
            missing_files = runner.run()

            # Respond on redmine
            self.completed_response(
                os.path.split(inputs['outputfolder'])[-1], missing_files)

        except Exception as e:
            import traceback
            self.t.time_print(
                "[Warning] run.py had a problem, continuing redmine api anyways."
            )
            self.t.time_print("[AutoSNVPhyl Error Dump]\n" +
                              traceback.format_exc())
            # Send response
            msg = traceback.format_exc()

            # Set it to feedback and assign it back to the author
            get = self.redmine.get_issue_data(
                os.path.split(inputs['outputfolder'])[-1])
            self.redmine.update_issue(
                os.path.split(inputs['outputfolder'])[-1],
                notes=
                "There was a problem with your request. Please create a new issue on"
                " Redmine to re-run it.\n%s" % msg + self.botmsg,
                status_change=4,
                assign_to_id=get['issue']['author']['id'])

    def main_loop(self):
        import time
        while True:
            self.make_call()
            self.t.time_print("Waiting for next check.")
            time.sleep(self.seconds_between_redmine_checks)

    def make_call(self):
        self.t.time_print("Checking for extraction requests...")

        data = self.redmine.get_new_issues('cfia')

        found = []

        for issue in data['issues']:
            if issue['id'] not in self.responded_issues and issue['status'][
                    'name'] == 'New':
                if issue['subject'].lower() == 'retrieve':
                    found.append(issue)

        self.t.time_print("Found %d issues..." % len(found))

        while len(found) > 0:  # While there are still issues to respond to
            self.respond_to_issue(found.pop(len(found) - 1))

    def respond_to_issue(self, issue):
        # Run extraction
        if self.redmine.get_issue_data(
                issue['id'])['issue']['status']['name'] == 'New':
            self.t.time_print("Found extraction to run. Subject: %s. ID: %s" %
                              (issue['subject'], issue['id']))
            self.t.time_print("Adding to responded to")
            self.responded_issues.add(issue['id'])
            self.issue_loader.responded_issues = list(self.responded_issues)
            self.issue_loader.dump()

            # Turn the description into a list of lines
            input_list = issue['description'].split('\n')
            input_list = map(str.strip, input_list)  # Get rid of \r
            error = False
            try:
                inputs = self.get_input(input_list, issue['id'])
                response = "Retrieving %d fastas and %d fastqs..." % (len(
                    inputs['fastas']), len(inputs['fastqs']))
            except ValueError as e:
                response = "Sorry, there was a problem with your request:\n%s\n" \
                           "Please submit a new request and close this one." % e.args[0]
                error = True

            self.t.time_print('\n' + response)

            if error:  # If something went wrong set the status to feedback and assign the author the issue
                get = self.redmine.get_issue_data(issue['id'])
                self.redmine.update_issue(
                    issue['id'],
                    notes=response + self.botmsg,
                    status_change=4,
                    assign_to_id=get['issue']['author']['id'])
            else:
                # Set the issue to in progress since the SNVPhyl is running
                self.redmine.update_issue(issue['id'],
                                          notes=response + self.botmsg,
                                          status_change=2)

            if error:
                return
            else:
                self.run_request(inputs)

    @staticmethod
    def encode(key, string):
        encoded_chars = []
        for i in range(len(string)):
            key_c = key[i % len(key)]
            encoded_c = chr(ord(string[i]) + ord(key_c) % 256)
            encoded_chars.append(encoded_c)
        encoded_string = "".join(encoded_chars)
        encoded_string = bytes(encoded_string, "utf-8")

        return base64.urlsafe_b64encode(encoded_string)

    @staticmethod
    def decode(key, string):
        decoded_chars = []
        string = base64.urlsafe_b64decode(string).decode('utf-8')
        for i in range(len(string)):
            key_c = key[i % len(key)]
            encoded_c = chr(abs(ord(str(string[i])) - ord(key_c) % 256))
            decoded_chars.append(encoded_c)
        decoded_string = "".join(decoded_chars)

        return decoded_string

    def __init__(self, force):
        # import logging
        # logging.basicConfig(level=logging.INFO)
        # Vars
        import sys
        self.script_dir = sys.path[0]
        self.config_json = os.path.join(self.script_dir, "config.json")

        # Set up timer/logger
        import datetime
        if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')):
            os.makedirs(os.path.join(self.script_dir, 'runner_logs'))
        self.t = Timer(log_file=os.path.join(
            self.script_dir, 'runner_logs',
            datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H")))
        self.t.set_colour(30)

        # Load issues that the bot has already responded to
        self.issue_loader = SaveLoad(os.path.join(self.script_dir,
                                                  'responded_issues.json'),
                                     create=True)
        self.responded_issues = set(
            self.issue_loader.get('responded_issues', default=[], ask=False))

        # Get encrypted api key from config
        # Load the config
        self.loader = SaveLoad(self.config_json, create=True)
        self.redmine_api_key = self.loader.get('redmine_api_key_encrypted',
                                               default='none',
                                               ask=False)

        # If it's the first run then this will be yes
        self.first_run = self.loader.get('first_run', default='yes', ask=False)

        self.nas_mnt = os.path.normpath(
            self.loader.get('nasmnt', default="/mnt/nas/", get_type=str))
        self.seconds_between_redmine_checks = self.loader.get(
            'secs_between_redmine_checks', default=600, get_type=int)
        self.key = 'Sixteen byte key'

        self.redmine = None

        self.botmsg = '\n\n_I am a bot. This action was performed automatically._'

        try:
            self.main(force)
        except Exception as e:
            import traceback
            self.t.time_print("[Error] Dumping...\n%s" %
                              traceback.format_exc())
            raise
Exemplo n.º 8
0
class Run(object):
    def main(self, force):
        if self.first_run == 'yes':
            choice = 'y'
            if force:
                raise ValueError('Need redmine API key!')
        else:
            if force:
                choice = 'n'
            else:
                self.t.time_print(
                    "Would you like to set the redmine api key? (y/n)")
                choice = input()
        if choice == 'y':
            self.t.time_print(
                "Enter your redmine api key (will be encrypted to file)")
            self.redmine_api_key = input()
            # Encode and send to json file
            self.loader.redmine_api_key_encrypted = self.encode(
                self.key, self.redmine_api_key).decode('utf-8')
            self.loader.first_run = 'no'
            self.loader.dump(self.config_json)
        else:
            # Import and decode from file
            self.redmine_api_key = self.decode(self.key, self.redmine_api_key)

        import re
        if not re.match(r'^[a-z0-9]{40}$', self.redmine_api_key):
            self.t.time_print("Invalid Redmine API key!")
            exit(1)

        self.redmine = RedmineInterface(
            'http://redmine.biodiversity.agr.gc.ca/', self.redmine_api_key)

        self.main_loop()

    def completed_response(self, redmine_id, missing):
        notes = "Completed extracting files. Results stored at %s" % os.path.join(
            "NAS/bio_requests/%s" % redmine_id)
        if len(missing) > 0:
            notes += '\nMissing some files:\n'
            for file in missing:
                notes += file + '\n'

        # Assign it back to the author
        get = self.redmine.get_issue_data(redmine_id)

        self.redmine.update_issue(redmine_id,
                                  notes,
                                  status_change=4,
                                  assign_to_id=get['issue']['author']['id'])

    def main_loop(self):
        import time
        while True:
            self.make_call()
            self.t.time_print("Waiting for next check.")
            time.sleep(self.seconds_between_redmine_checks)

    def make_call(self):
        self.t.time_print("Checking for metadata requests...")

        data = self.redmine.get_new_issues('cfia')

        found = []
        import re
        prog = re.compile(r'^assembly&metadata-\d{2}(\d{2}-\d{1,2}-\d{1,2})$')
        for issue in data['issues']:
            if issue['status']['name'] == 'New':
                # Get rid of caps and spaces and match
                subj = ''.join(issue['subject'].lower().split())
                result = re.fullmatch(prog, subj)
                if result:
                    found.append({
                        'id': issue['id'],
                        'folder': ''.join(result.group(1).split('-'))
                    })

        self.t.time_print("Found %d issues..." % len(found))

        while len(found) > 0:  # While there are still issues to respond to
            self.respond_to_issue(found.pop(len(found) - 1))

        # Check on old jobs
        self.t.time_print("Checking on old issues: ")
        for job in self.queue:
            msg = str(job['id']) + ': '
            if self.check_assembly(job['folder']):
                'Uploading.'
                results_zip = self.retrieve_files(job)

                response = "Retrieved data. Also stored at %s." % results_zip
                self.redmine.upload_file(results_zip,
                                         job['id'],
                                         'application/zip',
                                         status_change=4,
                                         additional_notes=response +
                                         self.bottext)
                self.queue.remove(job)
            else:
                msg += 'Not ready.'
            self.t.time_print(msg)

    def respond_to_issue(self, job):
        # Run extraction
        if self.redmine.get_issue_data(
                job['id'])['issue']['status']['name'] == 'New':
            self.t.time_print(
                "Found metadata retrieving job to run. ID: %s, folder %s" %
                (str(job['id']), str(job['folder'])))

            if self.check_assembly(job['folder']):
                self.t.time_print('Uploading files...')
                # Retrieve
                results_zip = self.retrieve_files(job)

                response = "Retrieved data. Also stored at %s." % results_zip
                self.redmine.upload_file(results_zip,
                                         job['id'],
                                         'application/zip',
                                         status_change=4,
                                         additional_notes=response +
                                         self.bottext)
            else:
                # response
                response = "Waiting for assembly to complete..."
                self.t.time_print(response)
                self.t.time_print("Adding to queue")
                self.queue.append(job)
                self.queue_loader.queue = self.queue
                self.queue_loader.dump()

                # Set the issue to in progress
                self.redmine.update_issue(job['id'],
                                          notes=response + self.bottext,
                                          status_change=2)

    def check_assembly(self, datestr):
        directory = os.path.join(self.nas_mnt, 'WGSspades',
                                 datestr + '_Assembled')
        return bool(os.path.isdir(directory))

    def retrieve_files(self, job):
        results_folder = os.path.join(self.nas_mnt, 'bio_requests',
                                      str(job['id']))
        os.makedirs(results_folder)
        results_zip = os.path.join(results_folder, str(job['id']) + '.zip')
        directory = os.path.join(self.nas_mnt, 'WGSspades',
                                 job['folder'] + '_Assembled')
        self.zip_results(os.path.join(directory, 'reports'), results_zip)
        return results_zip

    def zip_results(self, r_folder, outfolder):
        import zipfile
        # Zip all the files
        self.t.time_print("Creating zip file %s" % outfolder)

        try:
            os.remove(outfolder)
        except OSError:
            pass

        zipf = zipfile.ZipFile(outfolder, 'w', zipfile.ZIP_DEFLATED)
        for file in os.listdir(r_folder):
            zipf.write(os.path.join(r_folder, file))
            self.t.time_print("Zipped %s" % file)

        zipf.close()

    @staticmethod
    def encode(key, string):
        encoded_chars = []
        for i in range(len(string)):
            key_c = key[i % len(key)]
            encoded_c = chr(ord(string[i]) + ord(key_c) % 256)
            encoded_chars.append(encoded_c)
        encoded_string = "".join(encoded_chars)
        encoded_string = bytes(encoded_string, "utf-8")

        return base64.urlsafe_b64encode(encoded_string)

    @staticmethod
    def decode(key, string):
        decoded_chars = []
        string = base64.urlsafe_b64decode(string).decode('utf-8')
        for i in range(len(string)):
            key_c = key[i % len(key)]
            encoded_c = chr(abs(ord(str(string[i])) - ord(key_c) % 256))
            decoded_chars.append(encoded_c)
        decoded_string = "".join(decoded_chars)

        return decoded_string

    def __init__(self, force):
        # import logging
        # logging.basicConfig(level=logging.INFO)
        # Vars
        import sys
        self.script_dir = sys.path[0]
        self.config_json = os.path.join(self.script_dir, "config.json")

        # Set up timer/logger
        import datetime
        if not os.path.exists(os.path.join(self.script_dir, 'runner_logs')):
            os.makedirs(os.path.join(self.script_dir, 'runner_logs'))
        self.t = Timer(log_file=os.path.join(
            self.script_dir, 'runner_logs',
            datetime.datetime.now().strftime("%d-%m-%Y_%S:%M:%H")))
        self.t.set_colour(30)

        # Save issues found to a queue (load existing issues if bot needs to restart)
        self.queue_loader = SaveLoad(os.path.join(self.script_dir,
                                                  'queue.json'),
                                     create=True)
        self.queue = self.queue_loader.get('queue', default=[], ask=False)

        # Get encrypted api key from config
        # Load the config
        self.loader = SaveLoad(self.config_json, create=True)
        self.redmine_api_key = self.loader.get('redmine_api_key_encrypted',
                                               default='none',
                                               ask=False)

        # If it's the first run then this will be yes
        self.first_run = self.loader.get('first_run', default='yes', ask=False)

        self.nas_mnt = os.path.normpath(
            self.loader.get('nasmnt', default="/mnt/nas/", get_type=str))
        self.seconds_between_redmine_checks = self.loader.get(
            'secs_between_redmine_checks', default=600, get_type=int)
        self.key = 'Sixteen byte key'

        self.redmine = None

        self.bottext = '\n\n_I am a bot. This action was performed automatically._'

        try:
            self.main(force)
        except Exception as e:
            import traceback
            self.t.time_print("[Error] Dumping...\n%s" %
                              traceback.format_exc())
            raise
Exemplo n.º 9
0
class UpdateDatabase(object):
    def main(self):
        """Main Program, updates the database"""
        print("Main")
        start_time = time.time()
        self.getrmlsthelper(start_time)

        for organism in self.loader.to_update:
            self.getmlsthelper(start_time, organism)

    def getrmlsthelper(self, start):
        """
        Makes a system call to rest_auth.pl, a Perl script modified from
        https://github.com/kjolley/BIGSdb/tree/develop/scripts/test
        And downloads the most up-to-date rMLST profile and alleles
        """
        from subprocess import call
        # Folders are named based on the download date e.g 2016-04-26
        # Find all folders (with the trailing / in the glob search) and remove the trailing /
        try:
            lastfolder = sorted(glob('{}{}/2*/'.format(self.referencefilepath, self.analysistype)))[-1].rstrip('/')
        except IndexError:
            lastfolder = "2000-01-01"

        delta, foldersize, d1 = self.schemedate(lastfolder)
        # Extract the path of the current script from the full path + file name
        homepath = os.path.split(os.path.abspath(__file__))[0]
        # Set the path/name of the folder to contain the new alleles and profile
        newfolder = '{}{}/{}'.format(self.referencefilepath, self.analysistype, d1)
        # System call
        rmlstupdatecall = 'cd {} && perl {}/rest_auth.pl -a {}/secret.txt'.format(newfolder, homepath, homepath)
        if foldersize < 100:
            printtime("Last update of rMLST profile and alleles was {} days ago. Updating".format(str(delta.days)),
                      start)
            # Create the path
            make_path(newfolder)
            # Copy over the access token to be used in the authentication
            shutil.copyfile('{}/access_token'.format(homepath), '{}/access_token'.format(newfolder))
            # Run rest_auth.pl
            call(rmlstupdatecall, shell=True)
            # Get the new alleles into a list, and create the combinedAlleles file
            alleles = glob('{}/*.tfa'.format(newfolder))
            self.combinealleles(start, newfolder, alleles)
        # If the profile and alleles are up-to-date, set :newfolder to :lastfolder
        else:
            newfolder = lastfolder
        # Ensure that the profile/alleles updated successfully
        # Calculate the size of the folder by adding the sizes of all the files within the folder together
        newfoldersize = sum(os.path.getsize('{}/{}'.format(newfolder, f)) for f in os.listdir(newfolder)
                            if os.path.isfile('{}/{}'.format(newfolder, f)))
        # If the profile/allele failed, remove the folder, and use the most recent update
        if newfoldersize < 100:
            shutil.rmtree(newfolder)
            try:
                newfolder = sorted(glob('{}{}/*/'.format(self.referencefilepath, self.analysistype)))[-1].rstrip('/')
            except IndexError:
                pass
        # Return the system call and the folder containing the profile and alleles
        return rmlstupdatecall, newfolder

    def getmlsthelper(self, start, organism):
        """Prepares to run the getmlst.py script provided in SRST2"""
        # Initialise a set to for the organism(s) for which new alleles and profiles are desired
        organismset = set()
        # Allow for Shigella to use the Escherichia MLST profile/alleles
        organism = organism if organism != 'Shigella' else 'Escherichia'
        # As there are multiple profiles for certain organisms, this dictionary has the schemes I use as values

        # Allow for a genus not in the dictionary being specified
        try:
            organismset.add(self.loader.organismdictionary[organism])
        except KeyError:
            # Add the organism to the set
            organismset.add(organism)
        for scheme in organismset:
            organismpath = os.path.join(self.referencefilepath, 'MLST', organism)
            # Find all folders (with the trailing / in the glob search) and remove the trailing /
            try:
                lastfolder = sorted(glob('{}/*/'.format(organismpath)))[-1].rstrip('/')
            except IndexError:
                lastfolder = []
            # Run the method to determine the most recent folder, and how recently it was updated
            delta, foldersize, d1 = self.schemedate(lastfolder)
            # Set the path/name of the folder to contain the new alleles and profile
            newfolder = '{}/{}'.format(organismpath, d1)

            if foldersize < 100:
                printtime('Downloading {} MLST scheme from pubmlst.org'.format(organism), start)
                # Create the object to store the argument attributes to feed to getmlst
                getmlstargs = GenObject()
                getmlstargs.species = scheme
                getmlstargs.repository_url = 'http://pubmlst.org/data/dbases.xml'
                getmlstargs.force_scheme_name = False
                getmlstargs.path = newfolder
                # Create the path to store the downloaded
                make_path(getmlstargs.path)
                getmlst.main(getmlstargs)
                # Even if there is an issue contacting the database, files are created, however, they are populated
                # with XML strings indicating that the download failed
                # Read the first character in the file
                try:
                    profilestart = open(glob('{}/*.txt'.format(newfolder))[0]).readline()
                except IndexError:
                    profilestart = []
                # If it is a <, then the download failed
                if not profilestart or profilestart[0] == '<':
                    # Delete the folder, and use the previous definitions instead
                    shutil.rmtree(newfolder)
                    newfolder = lastfolder
            # If the profile and alleles are up-to-date, set :newfolder to :lastfolder
            else:
                newfolder = lastfolder
            # If update isn't specified, don't update
                # Ensure that the profile/alleles updated successfully
                # Calculate the size of the folder by adding the sizes of all the files within the folder together
            try:
                newfoldersize = sum(os.path.getsize('{}/{}'.format(newfolder, f)) for f in os.listdir(newfolder)
                                    if os.path.isfile('{}/{}'.format(newfolder, f)))
            except (OSError, TypeError):
                newfoldersize = 100
            # If the profile/allele failed, remove the folder, and use the most recent update
            if newfoldersize < 100:
                shutil.rmtree(newfolder)
                try:
                    newfolder = sorted(glob('{}/*/'.format(organismpath)))[-1].rstrip('/')
                except IndexError:
                    newfolder = organismpath
            # Return the name/path of the allele-containing folder
            return newfolder

    @staticmethod
    def combinealleles(start, allelepath, alleles):
        printtime('Creating combined rMLST allele file', start)
        records = []

        # Open each allele file
        for allele in sorted(alleles):
            # with open(allele, 'rU') as fasta:
            for record in SeqIO.parse(open(allele, "rU"), "fasta"):
                # Extract the sequence record from each entry in the multifasta
                # Replace and dashes in the record.id with underscores
                record.id = record.id.replace('-', '_')
                # Remove and dashes or 'N's from the sequence data - makeblastdb can't handle sequences
                # with gaps
                # noinspection PyProtectedMember
                record.seq._data = record.seq._data.replace('-', '').replace('N', '')
                # Clear the name and description attributes of the record
                record.name = ''
                record.description = ''
                # Write each record to the combined file
                # SeqIO.write(record, combinedfile, 'fasta')
                records.append(record)
        with open('{}/rMLST_combined.fasta'.format(allelepath), 'w') as combinedfile:
            SeqIO.write(records, combinedfile, 'fasta')

    def schemedate(self, lastfolder):
        from datetime import date
        try:
            # Extract the folder name (date) from the path/name
            lastupdate = os.path.split(lastfolder)[1]
        except AttributeError:
            lastupdate = '2000-01-01'
        try:
            # Calculate the size of the folder by adding the sizes of all the files within the folder together
            foldersize = sum(os.path.getsize('{}/{}'.format(lastfolder, f)) for f in os.listdir(lastfolder)
                             if os.path.isfile('{}/{}'.format(lastfolder, f)))
        except (TypeError, FileNotFoundError):
            foldersize = 0
        # Try to figure out the year, month, and day from the folder name
        try:
            (year, month, day) = lastupdate.split("-")
            # Create a date object variable with the year, month, and day
            d0 = date(int(year), int(month), int(day))
        except ValueError:
            # Set an arbitrary date in the past to force an update
            d0 = date(2000, 1, 1)
        # Create a date object with the current date
        d1 = date(int(time.strftime("%Y")), int(time.strftime("%m")), int(time.strftime("%d")))
        # Subtract the last update date from the current date
        delta = d1 - d0

        return delta, foldersize, d1

    def __init__(self, parser):
        print("initialising")
        self.analysistype = "rMLST"
        # self.referencefilepath = "/mnt/nas/Adam/assemblypipeline/rMLST/"
        self.referencefilepath = os.path.join(parser.referencedirectory, "")
        self.start = parser.start
        self.loader = SaveLoad()

        # If the file was empty and it couldn't load but created the file
        import json
        try:
            # Fresh file
            if not self.loader.load("bacteria.json", True):
                self.loader.organismdictionary = {'Escherichia': 'Escherichia coli#1',
                                                  'Shigella': 'Escherichia coli#1',
                                                  'Vibrio': 'Vibrio parahaemolyticus',
                                                  'Campylobacter': 'Campylobacter jejuni',
                                                  'Listeria': 'Listeria monocytogenes',
                                                  'Bacillus': 'Bacillus cereus',
                                                  'Klebsiella': 'Klebsiella pneumoniae'}
                self.loader.to_update = list(self.loader.organismdictionary.keys())
                self.loader.dump("bacteria.json")

            if "organismdictionary" not in self.loader.__dict__:
                raise NameError

        except (json.decoder.JSONDecodeError, NameError):
            print("Invalid config file, please delete or fix")
            sys.exit(1)

        self.main()