Пример #1
0
 def run(self):
     if self.fine:
         logging.info('Running job {name} for {label} (fine opt)'.format(name=self.job_name,
                                                                           label=self.species_name))
     elif self.pivots:
         logging.info('Running job {name} for {label} (pivots: {pivots})'.format(name=self.job_name,
                                                                           label=self.species_name,
                                                                           pivots=self.pivots))
     else:
         logging.info('Running job {name} for {label}'.format(name=self.job_name, label=self.species_name))
     logging.debug('writing submit script...')
     self.write_submit_script()
     logging.debug('writing input file...')
     self.write_input_file()
     if self.ess_settings['ssh']:
         ssh = SSH_Client(self.server)
         logging.debug('submitting job...')
         # submit_job returns job server status and job server id
         try:
             self.job_status[0], self.job_id = ssh.submit_job(remote_path=self.remote_path)
         except IndexError:
             # if the connection broke, the files might not have been uploaded correctly
             self.write_submit_script()
             self.write_input_file()
             self.job_status[0], self.job_id = ssh.submit_job(remote_path=self.remote_path)
Пример #2
0
 def _check_job_server_status(self):
     """
     Possible statuses: `initializing`, `running`, `errored on node xx`, `done`
     """
     if self.ess_settings['ssh']:
         ssh = SSH_Client(self.server)
         return ssh.check_job_status(self.job_id)
Пример #3
0
 def delete(self):
     logging.debug('Deleting job {name} for {label}'.format(
         name=self.job_name, label=self.species_name))
     if self.settings['ssh']:
         ssh = SSH_Client(self.server)
         logging.debug('deleting job...')
         ssh.delete_job(self.job_id)
Пример #4
0
 def _download_output_file(self):
     ssh = SSH_Client(self.server)
     remote_file_path = os.path.join(self.remote_path,
                                     output_filename[self.software])
     local_file_path = os.path.join(self.local_path, 'output.out')
     ssh.download_file(remote_file_path=remote_file_path,
                       local_file_path=local_file_path)
     self.final_time = ssh.get_last_modified_time(
         remote_file_path=remote_file_path)
     self.determine_run_time()
     if not os.path.isfile(local_file_path):
         raise JobError(
             'output file for {0} was not downloaded properly'.format(
                 self.job_name))
Пример #5
0
 def _get_additional_job_info(self):
     """
     Download the additional information of stdout and stderr from the server
     """
     lines1, lines2 = list(), list()
     content = ''
     ssh = SSH_Client(self.server)
     cluster_soft = servers[self.server]['cluster_soft'].lower()
     if cluster_soft in ['oge', 'sge']:
         remote_file_path = os.path.join(self.remote_path, 'out.txt')
         local_file_path1 = os.path.join(self.local_path, 'out.txt')
         try:
             ssh.download_file(remote_file_path=remote_file_path, local_file_path=local_file_path1)
         except (TypeError, IOError) as e:
             logging.warning('Got the following error when trying to download out.txt for {0}:'.format(self.job_name))
             logging.warning(e.message)
         remote_file_path = os.path.join(self.remote_path, 'err.txt')
         local_file_path2 = os.path.join(self.local_path, 'err.txt')
         try:
             ssh.download_file(remote_file_path=remote_file_path, local_file_path=local_file_path2)
         except (TypeError, IOError) as e:
             logging.warning('Got the following error when trying to download err.txt for {0}:'.format(self.job_name))
             logging.warning(e.message)
         if os.path.isfile(local_file_path1):
             with open(local_file_path1, 'r') as f:
                 lines1 = f.readlines()
         if os.path.isfile(local_file_path2):
             with open(local_file_path2, 'r') as f:
                 lines2 = f.readlines()
         content += ''.join([line for line in lines1])
         content += '\n'
         content += ''.join([line for line in lines2])
     elif cluster_soft == 'slurm':
         respond = ssh.send_command_to_server(command='ls -alF', remote_path=self.remote_path)
         files = list()
         for line in respond[0][0].splitlines():
             files.append(line.split()[-1])
         for file in files:
             if 'slurm' in file and '.out' in file:
                 remote_file_path = os.path.join(self.remote_path, file)
                 local_file_path = os.path.join(self.local_path, file)
                 try:
                     ssh.download_file(remote_file_path=remote_file_path, local_file_path=local_file_path)
                 except (TypeError, IOError) as e:
                     logging.warning('Got the following error when trying to download {0} for {1}:'.format(
                         file, self.job_name))
                     logging.warning(e.message)
                 if os.path.isfile(local_file_path):
                     with open(local_file_path, 'r') as f:
                         lines1 = f.readlines()
                 content += ''.join([line for line in lines1])
                 content += '\n'
     return content
Пример #6
0
 def _upload_input_file(self):
     ssh = SSH_Client(self.server)
     ssh.send_command_to_server(
         command='mkdir -p {0}'.format(self.remote_path))
     remote_file_path = os.path.join(self.remote_path,
                                     input_filename[self.software])
     ssh.upload_file(remote_file_path=remote_file_path,
                     file_string=self.input)
     self.initial_time = ssh.get_last_modified_time(
         remote_file_path=remote_file_path)
Пример #7
0
 def _upload_submit_file(self):
     ssh = SSH_Client(self.server)
     ssh.send_command_to_server(
         command='mkdir -p {0}'.format(self.remote_path))
     remote_file_path = os.path.join(
         self.remote_path,
         submit_filename[servers[self.server]['cluster_soft']])
     ssh.upload_file(remote_file_path=remote_file_path,
                     file_string=self.submit)
Пример #8
0
 def troubleshoot_server(self):
     if self.ess_settings['ssh']:
         if servers[self.server]['cluster_soft'].lower() == 'oge':
             # delete present server run
             logging.error('Job {name} has server status "{stat}" on {server}. Troubleshooting by changing node.'.format(
                 name=self.job_name, stat=self.job_status[0], server=self.server))
             ssh = SSH_Client(self.server)
             ssh.send_command_to_server(command=delete_command[servers[self.server]['cluster_soft']] +
                                        ' ' + str(self.job_id))
             # find available nodes
             stdout, _ = ssh.send_command_to_server(
                 command=list_available_nodes_command[servers[self.server]['cluster_soft']])
             for line in stdout:
                 node = line.split()[0].split('.')[0].split('node')[1]
                 if servers[self.server]['cluster_soft'] == 'OGE' and '0/0/8' in line and node not in self.server_nodes:
                     self.server_nodes.append(node)
                     break
             else:
                 logging.error('Could not find an available node on the server')
                 # TODO: continue troubleshooting; if all else fails, put job to sleep for x min and try again searching for a node
                 return
             # modify submit file
             content = ssh.read_remote_file(remote_path=self.remote_path,
                                            filename=submit_filename[servers[self.server]['cluster_soft']])
             for i, line in enumerate(content):
                 if '#$ -l h=node' in line:
                     content[i] = '#$ -l h=node{0}.cluster'.format(node)
                     break
             else:
                 content.insert(7, '#$ -l h=node{0}.cluster'.format(node))
             content = ''.join(content)  # convert list into a single string, not to upset paramico
             # resubmit
             ssh.upload_file(remote_file_path=os.path.join(self.remote_path,
                             submit_filename[servers[self.server]['cluster_soft']]), file_string=content)
             self.run()
         elif servers[self.server]['cluster_soft'].lower() == 'slurm':
             # TODO: change node on Slurm
             # delete present server run
             logging.error('Job {name} has server status "{stat}" on {server}. Re-running job.'.format(
                 name=self.job_name, stat=self.job_status[0], server=self.server))
             ssh = SSH_Client(self.server)
             ssh.send_command_to_server(command=delete_command[servers[self.server]['cluster_soft']] +
                                        ' ' + str(self.job_id))
             # resubmit
             self.run()