Exemplo n.º 1
0
def make_dispatcher(mdata, mdata_resource=None, work_path=None, run_tasks=None, group_size=None):
    if 'ali_auth' in mdata:
        pass
        #from maptool.dispatcher.ALI import ALI
        #nchunks = len(_split_tasks(run_tasks, group_size))
        #dispatcher = ALI(mdata['ali_auth'], mdata_resource, mdata, nchunks)
        #dispatcher.init(work_path, run_tasks, group_size)
        #return dispatcher
    else:    
        try:
            hostname = mdata['hostname']
            context_type = 'ssh'
        except:
            context_type = 'local'
        try:
            batch_type = mdata['batch']
        except:
            mlog.info('cannot find key "batch" in machine file, try to use deprecated key "machine_type"')
            batch_type = mdata['machine_type']
        try:
            lazy_local = mdata['lazy_local']
        except:
            lazy_local = False
        if lazy_local and context_type == 'local':
            mlog.info('Dispatcher switches to the lazy local mode')
            context_type = 'lazy-local'
        disp = Dispatcher(mdata, context_type=context_type, batch_type=batch_type)
        return disp
Exemplo n.º 2
0
 def download(self,
              job_dirs,
              remote_down_files,
              check_exists=False,
              mark_failure=True,
              back_error=False):
     cwd = os.getcwd()
     for ii in job_dirs:
         local_job = os.path.join(self.local_root, ii)
         remote_job = os.path.join(self.remote_root, ii)
         flist = remote_down_files
         if back_error:
             os.chdir(remote_job)
             flist += glob('error*')
             os.chdir(cwd)
         for jj in flist:
             rfile = os.path.join(remote_job, jj)
             lfile = os.path.join(local_job, jj)
             if not os.path.realpath(rfile) == os.path.realpath(lfile):
                 if (not os.path.exists(rfile)) and (
                         not os.path.exists(lfile)):
                     if check_exists:
                         if mark_failure:
                             with open(
                                     os.path.join(
                                         self.local_root, ii,
                                         'tag_failure_download_%s' % jj),
                                     'w') as fp:
                                 pass
                         else:
                             pass
                     else:
                         raise RuntimeError('do not find download file ' +
                                            rfile)
                 elif (not os.path.exists(rfile)) and (
                         os.path.exists(lfile)):
                     # already downloaded
                     pass
                 elif (os.path.exists(rfile)) and (
                         not os.path.exists(lfile)):
                     # trivial case, download happily
                     shutil.move(rfile, lfile)
                 elif (os.path.exists(rfile)) and (os.path.exists(lfile)):
                     # both exists, replace!
                     mlog.info('find existing %s, replacing by %s' %
                               (lfile, rfile))
                     if os.path.isdir(lfile):
                         shutil.rmtree(lfile)
                     elif os.path.isfile(lfile) or os.path.islink(lfile):
                         os.remove(lfile)
                     shutil.move(rfile, lfile)
                 else:
                     raise RuntimeError('should not reach here!')
             else:
                 # no nothing in the case of linked files
                 pass
     os.chdir(cwd)
Exemplo n.º 3
0
 def _rmtree(self, sftp, remotepath, level=0, verbose = False):
     for f in sftp.listdir_attr(remotepath):
         rpath = os.path.join(remotepath, f.filename)
         if stat.S_ISDIR(f.st_mode):
             self._rmtree(sftp, rpath, level=(level + 1))
         else:
             rpath = os.path.join(remotepath, f.filename)
             if verbose: mlog.info('removing %s%s' % ('    ' * level, rpath))
             sftp.remove(rpath)
     if verbose: mlog.info('removing %s%s' % ('    ' * level, remotepath))
     sftp.rmdir(remotepath)
Exemplo n.º 4
0
 def ensure_alive(self,
                  max_check = 10,
                  sleep_time = 10):
     count = 1
     while not self._check_alive():
         if count == max_check:
             raise RuntimeError('cannot connect ssh after %d failures at interval %d s' %
                                (max_check, sleep_time))
         mlog.info('connection check failed, try to reconnect to ' + self.remote_host)
         self._setup_ssh(self.remote_host,
                         self.remote_port,
                         username=self.remote_uname,
                         password=self.remote_password)
         count += 1
         time.sleep(sleep)
Exemplo n.º 5
0
 def all_finished(self, 
                  job_handler, 
                  mark_failure):
     task_chunks = job_handler['task_chunks']
     task_chunks_str = ['+'.join(ii) for ii in task_chunks]
     task_hashes = [sha1(ii.encode('utf-8')).hexdigest() for ii in task_chunks_str]
     job_list = job_handler['job_list']
     job_record = job_handler['job_record']
     command = job_handler['command']
     tag_failure_list = ['tag_failure_%d' % ii for ii in range(len(command))]
     resources = job_handler['resources']
     outlog = job_handler['outlog']
     errlog = job_handler['errlog']
     backward_task_files = job_handler['backward_task_files']
     mlog.debug('checking jobs')
     nchunks = len(task_chunks)
     for idx in range(nchunks) :
         cur_hash = task_hashes[idx]
         rjob = job_list[idx]
         if not job_record.check_finished(cur_hash) :
             # chunk not finished according to record
             status = rjob['batch'].check_status()
             job_uuid = rjob['context'].job_uuid
             mlog.debug('checked job %s' % job_uuid)
             if status == JobStatus.terminated :
                 job_record.increase_nfail(cur_hash)
                 if job_record.check_nfail(cur_hash) > 3:
                     raise RuntimeError('Job %s failed for more than 3 times' % job_uuid)
                 mlog.info('job %s terminated, submit again'% job_uuid)
                 mlog.debug('try %s times for %s'% (job_record.check_nfail(cur_hash), job_uuid))
                 rjob['batch'].submit(task_chunks[idx], command, res = resources, outlog=outlog, errlog=errlog,restart=True)
             elif status == JobStatus.finished :
                 mlog.info('job %s finished' % job_uuid)
                 if mark_failure:
                     rjob['context'].download(task_chunks[idx], tag_failure_list, check_exists = True, mark_failure = False)
                     rjob['context'].download(task_chunks[idx], backward_task_files, check_exists = True)
                 else:
                     rjob['context'].download(task_chunks[idx], backward_task_files)
                 rjob['context'].clean()
                 job_record.record_finish(cur_hash)
                 job_record.dump()
     job_record.dump()
     return job_record.check_all_finished()
Exemplo n.º 6
0
def build_operation(choice):
    assert choice in ["1", "2", "3"]
    if choice == "1":
        structs, fnames = read_structures()
        multi_structs(structs, fnames)
        wait_sep()
        tip = """
Several options are available:

a. A full 3x3 scaling matrix defining the linear combination
   the old lattice vectors. E.g., 2 1 0  0 1 0  0 0 3
   generates a new structure with lattice vectors a' =
   2a + b, b' = 3b, c' = c where a, b, and c are the lattice
   vectors of the original structure.
b. An sequence of three scaling factors. E.g., 2 1 1
   specifies that the supercell should have dimensions 2a x b x
   c.
c. A number, which simply scales all lattice vectors by the
   same factor.
        """
        print(tip)
        wait_sep()
        in_str = wait()
        scaling_list = [int(x) for x in in_str.split()]
        print("scaling list:")
        print(scaling_list)
        for struct, fname in zip(structs, fnames):
            if len(scaling_list) == 1:
                scales = scaling_list[0]
                sufix = [scales]
            elif len(scaling_list) == 3:
                scales = scaling_list
            elif len(scaling_list) == 9:
                scales = [
                    scaling_list[0:3], scaling_list[3:6], scaling_list[6:9]
                ]
            struct_cp = struct.copy()
            struct_cp.make_supercell(scales)
            fname = 'maptool_SC_' + fname + '.vasp'
            struct_cp.to(filename=fname, fmt='poscar')
        return True
    elif choice == "2":
        print('Only support for CNT now !')
        print('Input the n and m for tube')
        print('Paramter format, i.e. :')
        print('3 3')
        wait_sep()
        in_str = wait()
        m, n = [int(i) for i in in_str.split()]
        atoms = nanotube(m, n, vacuum=15)
        struct = ase2pmg(atoms)
        struct.to('POSCAR', 'CNT_' + str(m) + '-' + str(n) + '.vasp')
        return True
    else:
        data = {
            'max_index': 2,
            'min_vacum': 20,
            'min_slab': 8,
            'repeat': [3, 3, 1]
        }

        def read_adsorb_config(filename):
            with open(filename, 'r') as f:
                datas = f.readlines()
            list_data = []
            for i in range(len(datas)):
                list_data.append(
                    datas[i][0:datas[i].find('#')].strip().split('='))

            defined_keys = [
                'method', 'crystal', 'molecule', 'max_index', 'min_vacum',
                'min_slab', 'repeat'
            ]
            data_dict = {}
            for key in defined_keys:
                for li in list_data:
                    if key in li[0]:
                        data_dict[key] = li[1]

            data_dict['method'] = int(data_dict.get('method').strip())
            data_dict['crystal'] = data_dict.get('crystal').strip()
            data_dict['molecule'] = data_dict.get('molecule').strip()
            data_dict['max_index'] = int(
                data_dict.get('max_index', '1').strip())
            data_dict['min_vacum'] = int(
                data_dict.get('min_vacum', '15').strip())
            data_dict['min_slab'] = int(data_dict.get('min_slab', '5').strip())
            data_dict['repeat'] = [
                int(x)
                for x in data_dict.get('repeat', '1 1 1').strip().split()
            ]
            return data_dict

        def proc_adsorb(cryst, mol, data):
            if data['method'] == 1:
                asf_slab = AdsorbateSiteFinder(cryst)
                ads_sites = asf_slab.find_adsorption_sites()
                ads_structs = asf_slab.generate_adsorption_structures(
                    mol, repeat=data['repeat'])
                for i in range(len(ads_structs)):
                    ads_struct = ads_structs[i]
                    try:
                        miller_str = [str(j) for j in cryst.miller_index]
                    except:
                        miller_str = ['adsorb']
                    filename = '_'.join(miller_str) + '-' + str(i) + '.vasp'
                    ads_struct.to(filename=filename, fmt='POSCAR')
            else:
                slabs = generate_all_slabs(cryst,
                                           max_index=data['max_index'],
                                           min_slab_size=data['min_slab'],
                                           min_vacuum_size=data['min_vacum'],
                                           lll_reduce=True)
                for slab in slabs:
                    asf_slab = AdsorbateSiteFinder(slab)
                    ads_sites = asf_slab.find_adsorption_sites()
                    ads_structs = asf_slab.generate_adsorption_structures(
                        mol, repeat=data['repeat'])
                    for i in range(len(ads_structs)):
                        ads_struct = ads_structs[i]
                        miller_str = [str(j) for j in slab.miller_index]
                        filename = 'adsorb' + '_'.join(miller_str) + '-' + str(
                            i) + '.vasp'
                        ads_struct.to(filename=filename, fmt='POSCAR')

        filename = 'adsorb.cfg'
        if os.path.exists(filename):
            data = read_adsorb_config(filename)
            assert data['method'] in [1, 2]
            cryst = read_structures_from_file(data['crystal'])
            mol = read_structures_from_file(data['molecule'])
            proc_adsorb(cryst, mol, data)
        else:
            print('your choice ?')
            print('{} >>> {}'.format('1', 'read slab from file'))
            print('{} >>> {}'.format('2', 'build slab by bulk'))
            wait_sep()
            in_str = wait()
            choice = int(in_str)
            assert choice in [1, 2]
            data['method'] = choice
            tips = """\
Input the structure filename of molecule and substrate
The first file should be molecule and 2nd for crystal
supported structure format: xsf .vasp POSCAR .nc .json .xyz ...
paramter format, i.e. :
mol.xyz POSCAR"""
            structs, fnames = read_structures(tips)

            mol = structs[0]
            mlog.info("read mol from %s" % (fnames[0]))
            mlog.info(mol)
            assert isinstance(mol,
                              Molecule), "the first file should be molecule"
            cryst = structs[1]
            mlog.info("read crystal from %s" % (fnames[1]))
            mlog.info(cryst)
            assert isinstance(cryst,
                              Structure), "the second file should be crystal"
            proc_adsorb(cryst, mol, data)

        return True
Exemplo n.º 7
0
 def submit_jobs(self,
                 resources,
                 command,
                 work_path,
                 tasks,
                 group_size,
                 forward_common_files,
                 forward_task_files,
                 backward_task_files,
                 forward_task_deference = True,
                 outlog = 'log',
                 errlog = 'err') :
     self.backward_task_files = backward_task_files
     # task_chunks = [
     #     [os.path.basename(j) for j in tasks[i:i + group_size]] \
     #     for i in range(0, len(tasks), group_size)
     # ]
     task_chunks = _split_tasks(tasks, group_size)    
     task_chunks_str = ['+'.join(ii) for ii in task_chunks]
     task_hashes = [sha1(ii.encode('utf-8')).hexdigest() for ii in task_chunks_str]
     job_record = JobRecord(work_path, task_chunks, fname = self.jrname)
     nchunks = len(task_chunks)
     
     job_list = []
     for ii in range(nchunks) :            
         cur_chunk = task_chunks[ii]
         cur_hash = task_hashes[ii]
         if not job_record.check_finished(cur_hash):                
             # chunk is not finished
             # check if chunk is submitted
             submitted = job_record.check_submitted(cur_hash)
             if not submitted:
                 job_uuid = None
             else :
                 job_uuid = job_record.get_uuid(cur_hash)
                 mlog.debug("load uuid %s for chunk %s" % (job_uuid, cur_hash))
             # communication context, bach system
             context = self.context(work_path, self.session, job_uuid)
             batch = self.batch(context, uuid_names = self.uuid_names)
             rjob = {'context':context, 'batch':batch}
             # upload files
             tag_upload = '%s_tag_upload' % rjob['context'].job_uuid
             if not rjob['context'].check_file_exists(tag_upload):
                 rjob['context'].upload('.',
                                        forward_common_files)
                 rjob['context'].upload(cur_chunk,
                                        forward_task_files, 
                                        dereference = forward_task_deference)
                 rjob['context'].write_file(tag_upload, '')
                 mlog.debug('uploaded files for %s' % task_chunks_str[ii])
             # submit new or recover old submission
             if not submitted:
                 rjob['batch'].submit(cur_chunk, command, res = resources, outlog=outlog, errlog=errlog)
                 job_uuid = rjob['context'].job_uuid
                 mlog.debug('assigned uuid %s for %s ' % (job_uuid, task_chunks_str[ii]))
                 mlog.info('new submission of %s for chunk %s' % (job_uuid, cur_hash))
             else:
                 rjob['batch'].submit(cur_chunk, command, res = resources, outlog=outlog, errlog=errlog, restart = True)
                 mlog.info('restart from old submission %s for chunk %s' % (job_uuid, cur_hash))
             # record job and its remote context
             job_list.append(rjob)
             ip = None
             instance_id = None
             if 'ali_auth' in self.remote_profile:
                 ip = self.remote_profile['hostname']
                 instance_id = self.remote_profile['instance_id']
             job_record.record_remote_context(cur_hash,                                                 
                                              context.local_root, 
                                              context.remote_root, 
                                              job_uuid,
                                              ip,
                                              instance_id)
         else :
             # finished job, append a None to list
             job_list.append(None)
     job_record.dump()
     assert(len(job_list) == nchunks)
     job_handler = {
         'task_chunks': task_chunks,
         'job_list': job_list,
         'job_record': job_record,
         'command': command,
         'resources': resources,
         'outlog': outlog,
         'errlog': errlog,
         'backward_task_files': backward_task_files
     }
     return job_handler