示例#1
0
    def test_filemanager_update_local(self):
        """
        run filemanager set and populate, then create a dummy file in the 
        input directory and run update_local which should mark it as present
        """
        """ 
        #############   SETUP   ################
        """
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        sta = False
        types = ['atm', 'ice', 'ocn', 'rest', 'streams.cice', 'streams.ocean']
        database = '{}.db'.format(inspect.stack()[0][3])
        simstart = 51
        simend = 60
        remote_path = '/global/homes/r/renata/ACME_simulations/20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil'
        experiment = '20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil'
        mutex = threading.Lock()
        """ 
        ###############  TEST   #################
        """
        filemanager = FileManager(event_list=EventList(),
                                  mutex=mutex,
                                  sta=sta,
                                  types=types,
                                  database=database,
                                  remote_endpoint=self.remote_endpoint,
                                  remote_path=remote_path,
                                  local_endpoint=self.local_endpoint,
                                  local_path=self.local_path,
                                  experiment=experiment)
        filemanager.populate_file_list(simstart=simstart,
                                       simend=simend,
                                       experiment=experiment)

        filemanager.mutex.acquire()
        df = DataFile.select().limit(1)
        filemanager.mutex.release()

        name = df[0].name
        head, tail = os.path.split(df[0].local_path)
        if not os.path.exists(head):
            os.makedirs(head)
        dummy_file_path = df[0].local_path
        print '----- writing out dummy file at {} -----'.format(
            dummy_file_path)
        with open(dummy_file_path, 'w') as fp:
            fp.write('this is a test file')

        filemanager.update_local_status()
        filemanager.mutex.acquire()
        df = DataFile.select().where(DataFile.name == name)[0]
        filemanager.mutex.release()
        self.assertEqual(df.local_status, 0)
        self.assertTrue(df.local_size > 0)
        """ 
        ###############  CLEANUP   #################
        """
        os.remove(database)
示例#2
0
    def test_filemanager_populate_no_sta(self):
        """
        run filemanager set and populate with sta turned off
        """
        """ 
        ###############  SETUP   ################
        """
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        sta = False
        database = '{}.db'.format(inspect.stack()[0][3])
        simstart = 1
        simend = 10
        experiment = '20180215.DECKv1b_1pctCO2.ne30_oEC.edison'
        mutex = threading.Lock()
        """ 
        ##############    TEST    ###############
        """
        filemanager = FileManager(event_list=EventList(),
                                  mutex=mutex,
                                  sta=sta,
                                  types=self.file_types,
                                  database=database,
                                  remote_endpoint=self.remote_endpoint,
                                  remote_path=self.remote_path,
                                  local_endpoint=self.local_endpoint,
                                  local_path=self.local_path,
                                  experiment=self.experiment)
        filemanager.populate_file_list(simstart=simstart,
                                       simend=simend,
                                       experiment=experiment)

        filemanager.mutex.acquire()
        simlength = simend - simstart + 1

        for _type in ['atm', 'lnd', 'ocn', 'ice']:
            file_names = [
                x.name
                for x in DataFile.select().where(DataFile.datatype == _type)
            ]
            if not len(file_names) == (simlength * 12):
                print _type + ' does not have ' + str(
                    simlength * 12) + ' files'
            self.assertEqual(len(file_names), (simlength * 12))

            for year in range(simstart, simend + 1):
                for month in range(1, 13):
                    name = (file_type_map[_type].replace(
                        'EXPERIMENT',
                        experiment).replace('YEAR',
                                            '{:04d}'.format(year)).replace(
                                                'MONTH',
                                                '{:02}'.format(month)))
                    self.assertTrue(name in file_names)
        filemanager.mutex.release()
        """ 
        ##############  CLEANUP  ###############
        """
        os.remove(database)
示例#3
0
 def test_filemanager_update_remote_yes_sta(self):
     """
     run filemanager setup and populate, then run update_remote_status on a directory
     that has been short term archived
     """
     """ 
     ############### SETUP  #################
     """
     print '\n'
     print_message(
         '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
     sta = True
     types = [
         'atm', 'ice', 'ocn', 'rest', 'streams.ocean', 'streams.cice',
         'mpas-o_in', 'mpas-cice_in', 'meridionalHeatTransport'
     ]
     database = '{}.db'.format(inspect.stack()[0][3])
     simstart = 51
     source_path = '/global/cscratch1/sd/golaz/ACME_simulations/20180215.DECKv1b_1pctCO2.ne30_oEC.edison'
     simend = 60
     experiment = '20180215.DECKv1b_1pctCO2.ne30_oEC.edison'
     mutex = threading.Lock()
     """ 
     ###############   TEST  #################
     """
     filemanager = FileManager(event_list=EventList(),
                               mutex=mutex,
                               sta=sta,
                               types=types,
                               database=database,
                               remote_endpoint=self.remote_endpoint,
                               remote_path=source_path,
                               local_endpoint=self.local_endpoint,
                               local_path=self.local_path,
                               experiment=self.experiment)
     filemanager.populate_file_list(simstart=simstart,
                                    simend=simend,
                                    experiment=experiment)
     client = get_client()
     filemanager.update_remote_status(client)
     filemanager.mutex.acquire()
     for datafile in DataFile.select():
         if datafile.remote_status != 0:
             print datafile.name, datafile.remote_path
         self.assertEqual(datafile.remote_status, 0)
     if filemanager.mutex.locked():
         filemanager.mutex.release()
     self.assertTrue(filemanager.all_data_remote())
     """ 
     ##############  CLEANUP  ###############
     """
     os.remove(database)
示例#4
0
    def test_filemanager_update_remote_no_sta(self):
        """
        run filemanager setup and populate, then run update_remote_status 
        with 10 years of atm output, and finally run all_data_remote to show that
        all the remote data has been recognized
        """
        """ 
        #############   SETUP   ##################
        """
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        remote_path = '/global/homes/r/renata/ACME_simulations/20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil'
        sta = False
        types = ['atm']
        database = '{}.db'.format(inspect.stack()[0][3])
        simstart = 51
        simend = 60
        experiment = '20170926.FCT2.A_WCYCL1850S.ne30_oECv3.anvil'
        mutex = threading.Lock()
        """ 
        ################  TEST  ##################
        """
        filemanager = FileManager(event_list=EventList(),
                                  mutex=mutex,
                                  sta=False,
                                  types=types,
                                  database=database,
                                  remote_endpoint=self.remote_endpoint,
                                  remote_path=remote_path,
                                  local_endpoint=self.local_endpoint,
                                  local_path=self.local_path,
                                  experiment=experiment)
        filemanager.populate_file_list(simstart=simstart,
                                       simend=simend,
                                       experiment=experiment)

        client = get_client()
        filemanager.update_remote_status(client)

        filemanager.mutex.acquire()
        for datafile in DataFile.select():
            if datafile.remote_status != 0:
                print datafile.name, datafile.remote_path, datafile.remote_status, datafile.datatype
            self.assertEqual(datafile.remote_status, 0)
        if filemanager.mutex.locked():
            filemanager.mutex.release()
        self.assertTrue(filemanager.all_data_remote())
        """ 
        ##############  CLEANUP  ###############
        """
        os.remove(database)
示例#5
0
    def test_cmor_valid_completed(self):
        """
        tests that a valid config on a completed case will mark itself as
        already being run and not start
        """
        print_message(
            '\n---- Starting Test: {} ----'.format(inspect.stack()[0][3]),
            'ok')
        config = ConfigObj(self.valid_config_path)
        config['post-processing']['cmor']['variable_list'] = [
            config['post-processing']['cmor']['variable_list']
        ]
        case_name = '20180129.DECKv1b_piControl.ne30_oEC.edison'
        case = config['simulations'][case_name]
        messages = verify_config(config)
        self.assertEqual(len(messages), 0)
        config['global']['resource_path'] = 'resources/'
        filemanager = FileManager(config=config, event_list=EventList())
        filemanager.populate_file_list()
        filemanager.update_local_status()

        timeseries = Timeseries(short_name=case['short_name'],
                                case=case_name,
                                start=config['simulations']['start_year'],
                                end=config['simulations']['end_year'],
                                config=config,
                                run_type='atm')
        timeseries.check_data_ready(filemanager=filemanager)
        timeseries.setup_data(config=config,
                              filemanager=filemanager,
                              case=case_name)
        timeseries.execute(config=config, event_list=EventList())
        timeseries.handle_completion(filemanager=filemanager,
                                     config=config,
                                     event_list=EventList())

        cmor = Cmor(short_name=case['short_name'],
                    case=case_name,
                    start=config['simulations']['start_year'],
                    end=config['simulations']['end_year'],
                    config=config)
        cmor.check_data_ready(filemanager=filemanager)
        cmor.setup_data(config=config, filemanager=filemanager, case=case_name)
        self.assertTrue(cmor.postvalidate(config=config))
        self.assertTrue(cmor.execute(config=config, event_list=EventList()))
        self.assertEquals(cmor.status, JobStatus.COMPLETED)
        self.assertTrue(
            cmor.handle_completion(filemanager=filemanager,
                                   event_list=EventList(),
                                   config=config))
示例#6
0
    def test_filemanager_setup_valid_with_inplace_data(self):
        """
        run the filemanager setup with sta turned on
        """
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        config_path = 'tests/test_configs/e3sm_diags_complete.cfg'
        config = ConfigObj(config_path)
        db = '{}.db'.format(inspect.stack()[0][3])

        filemanager = FileManager(database=db,
                                  event_list=EventList(),
                                  config=config)
        filemanager.populate_file_list()
        filemanager.update_local_status()

        self.assertTrue(isinstance(filemanager, FileManager))
        self.assertTrue(os.path.exists(db))
        self.assertTrue(filemanager.all_data_local())
        os.remove(db)
示例#7
0
    def test_filemanager_get_file_paths(self):
        """
        run the filemanager setup with sta turned on
        """
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        config_path = 'tests/test_configs/filemanager_partial_data.cfg'
        config = ConfigObj(config_path)
        db = '{}.db'.format(inspect.stack()[0][3])

        filemanager = FileManager(database=db,
                                  event_list=EventList(),
                                  config=config)
        filemanager.populate_file_list()
        self.assertTrue(isinstance(filemanager, FileManager))
        self.assertTrue(os.path.exists(db))

        filemanager.update_local_status()
        filemanager.write_database()
        self.assertFalse(filemanager.all_data_local())

        # test that the filemanager returns correct paths
        paths = filemanager.get_file_paths_by_year(
            datatype='atm',
            case='20180129.DECKv1b_piControl.ne30_oEC.edison',
            start_year=1,
            end_year=2)
        for path in paths:
            self.assertTrue(os.path.exists(path))

        # test that the filemanager returns correct paths with no year
        paths = filemanager.get_file_paths_by_year(
            datatype='ocn_streams',
            case='20180129.DECKv1b_piControl.ne30_oEC.edison')
        for path in paths:
            self.assertTrue(os.path.exists(path))

        # test nothing is returned for incorrect yeras
        paths = filemanager.get_file_paths_by_year(
            datatype='ocn_streams',
            case='20180129.DECKv1b_piControl.ne30_oEC.edison',
            start_year=1,
            end_year=100)
        self.assertTrue(paths is None)

        # test the filemanager knows when data is ready
        ready = filemanager.check_data_ready(
            data_required=['atm'],
            case='20180129.DECKv1b_piControl.ne30_oEC.edison',
            start_year=1,
            end_year=2)
        self.assertTrue(ready)

        # test the filemanager knows when data is NOT ready
        ready = filemanager.check_data_ready(
            data_required=['atm'],
            case='20180129.DECKv1b_piControl.ne30_oEC.edison',
            start_year=1,
            end_year=3)
        self.assertFalse(ready)

        ready = filemanager.check_data_ready(
            data_required=['ocn_streams'],
            case='20180129.DECKv1b_piControl.ne30_oEC.edison')
        self.assertTrue(ready)

        os.remove(db)
示例#8
0
    def test_filemanager_all_data_local(self):
        """
        Create a dummy project and populate it with empty files 
        to test that filemanager.all_data_local works correctly"""
        """ 
        ############### SETUP ##################
        """
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        sta = True
        database = '{}.db'.format(inspect.stack()[0][3])
        simstart = 1
        simend = 10
        event_list = EventList()
        remote_path = '/dummy/remote/20180215.DECKv1b_1pctCO2.ne30_oEC.edison/run/something'
        local_path = '/p/user_pub/e3sm/baldwin32/E3SM_test_data/dummyproject'
        experiment = '20180215.DECKv1b_1pctCO2.ne30_oEC.edison'
        types = ['atm', 'ocn', 'lnd', 'ice']
        mutex = threading.Lock()
        if os.path.exists(local_path):
            shutil.rmtree(local_path)
        """ 
        ############### TEST ##################
        """
        filemanager = FileManager(event_list=EventList(),
                                  mutex=mutex,
                                  sta=sta,
                                  types=types,
                                  database=database,
                                  remote_endpoint=self.remote_endpoint,
                                  remote_path=remote_path,
                                  local_endpoint=self.local_endpoint,
                                  local_path=local_path,
                                  experiment=self.experiment)
        self.assertEqual(
            filemanager.remote_path,
            '/dummy/remote/20180215.DECKv1b_1pctCO2.ne30_oEC.edison')
        filemanager.populate_file_list(simstart=simstart,
                                       simend=simend,
                                       experiment=experiment)
        filemanager.update_local_status()
        self.assertFalse(filemanager.all_data_local())

        filemanager.mutex.acquire()
        for df in DataFile.select():
            name = df.name
            head, tail = os.path.split(df.local_path)
            if not os.path.exists(head):
                os.makedirs(head)
            with open(df.local_path, 'w') as fp:
                fp.write('this is a test file')
            size = os.path.getsize(df.local_path)
            df.remote_size = size
            df.local_size = size
            df.save()
        if filemanager.mutex.locked():
            filemanager.mutex.release()
        filemanager.update_local_status()
        self.assertTrue(filemanager.all_data_local())
        """ 
        #########################################
        """
        os.remove(database)
示例#9
0
def setup(argv, display_event, **kwargs):
    """
    Parse the commandline arguments, and setup the master config dict

    Parameters:
        parser (argparse.ArgumentParser): The parser object
        display_event (Threadding_event): The event to turn the display on and off
    """
    print "entering setup"
    # Setup the parser
    args = parse_args(argv=argv)
    if not args.config:
        parse_args(print_help=True)
        return False, False, False

    event_list = kwargs['event_list']
    thread_list = kwargs['thread_list']
    mutex = kwargs['mutex']

    # check if globus config is valid, else remove it
    globus_config = os.path.join(os.path.expanduser('~'), '.globus.cfg')
    if os.path.exists(globus_config):
        try:
            conf = ConfigObj(globus_config)
        except:
            os.remove(globus_config)

    if not os.path.exists(args.config):
        print "Invalid config, {} does not exist".format(args.config)
        return False, False, False

    # Check that there are no white space errors in the config file
    line_index = check_config_white_space(args.config)
    if line_index != 0:
        print '''
ERROR: line {num} does not have a space after the \'=\', white space is required.
Please add a space and run again.'''.format(num=line_index)
        return False, False, False

    # read the config file and setup the config dict
    try:
        config = ConfigObj(args.config)
    except Exception as e:
        print "Error parsing config file {}".format(args.config)
        parse_args(print_help=True)
        return False, False, False

    # run validator for config file
    if config.get('global'):
        if args.resource_dir:
            config['global']['resource_dir'] = args.resource_dir
        else:
            config['global']['resource_dir'] = os.path.join(
                sys.prefix, 'share', 'processflow', 'resources')
    else:
        return False, False, False

    template_path = os.path.join(config['global']['resource_dir'],
                                 'config_template.json')

    with open(template_path, 'r') as template_file:
        template = json.load(template_file)

    valid, messages = verify_config(config, template)
    if not valid:
        for message in messages:
            print message
        return False, False, False

    config['global']['input_path'] = os.path.join(
        config['global']['project_path'], 'input')
    config['global']['output_path'] = os.path.join(
        config['global']['project_path'], 'output')

    # setup output and cache directories
    if not os.path.exists(config['global']['input_path']):
        os.makedirs(config['global']['input_path'])
    if not os.path.exists(config['global']['output_path']):
        os.makedirs(config['global']['output_path'])

    # Copy the config into the input directory for safe keeping
    input_config_path = os.path.join(config['global']['input_path'], 'run.cfg')
    try:
        copy(args.config, input_config_path)
    except:
        pass

    # setup logging
    if args.log:
        log_path = args.log
    else:
        log_path = os.path.join(
            config.get('global').get('output_path'), 'workflow.log')
    config['global']['log_path'] = log_path
    logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        filename=log_path,
                        filemode='w',
                        level=logging.INFO)
    logging.getLogger('globus_sdk').setLevel(logging.WARNING)

    # Make sure the set_frequency is a list of ints
    set_frequency = config['global']['set_frequency']
    if not isinstance(set_frequency, list):
        set_frequency = [int(set_frequency)]
    else:
        # These are sometimes strings which break things later
        new_freqs = []
        for freq in set_frequency:
            new_freqs.append(int(freq))
        set_frequency = new_freqs
    config['global']['set_frequency'] = set_frequency

    # setup config for file type directories
    if not isinstance(config['global']['file_types'], list):
        config['global']['file_types'] = [config['global']['file_types']]

    # setup run_scipts_path
    run_script_path = os.path.join(config['global']['output_path'],
                                   'run_scripts')
    config['global']['run_scripts_path'] = run_script_path
    if not os.path.exists(run_script_path):
        os.makedirs(run_script_path)

    # setup tmp_path
    tmp_path = os.path.join(config['global']['output_path'], 'tmp')
    config['global']['tmp_path'] = tmp_path
    if os.path.exists(tmp_path):
        rmtree(tmp_path)
    os.makedirs(tmp_path)

    # setup the year_set list
    config['global']['simulation_start_year'] = int(
        config['global']['simulation_start_year'])
    config['global']['simulation_end_year'] = int(
        config['global']['simulation_end_year'])
    sim_start_year = int(config['global']['simulation_start_year'])
    sim_end_year = int(config['global']['simulation_end_year'])

    config['global']['short_term_archive'] = int(
        config['global']['short_term_archive'])

    # initialize the filemanager
    event_list.push(message='Initializing file manager')
    head, tail = os.path.split(config['global']['source_path'])
    if tail == 'run':
        config['global']['source_path'] = head

    filemanager = FileManager(
        database=os.path.join(config['global']['project_path'], 'input',
                              'workflow.db'),
        types=config['global']['file_types'],
        sta=config['global']['short_term_archive'],
        remote_path=config['global']['source_path'],
        remote_endpoint=config['transfer']['source_endpoint'],
        local_path=os.path.join(config['global']['project_path'], 'input'),
        local_endpoint=config['transfer']['destination_endpoint'],
        mutex=mutex)
    filemanager.populate_file_list(
        simstart=config['global']['simulation_start_year'],
        simend=config['global']['simulation_end_year'],
        experiment=config['global']['experiment'])
    print 'Updating local status'
    filemanager.update_local_status()
    print 'Local status update complete'
    all_data = filemanager.all_data_local()
    if all_data:
        print 'All data is local'
    else:
        print 'Additional data needed'

    logging.info("FileManager setup complete")
    logging.info(str(filemanager))

    if all_data or args.no_monitor:
        print "skipping globus setup"
    else:
        endpoints = [endpoint for endpoint in config['transfer'].values()]
        if args.no_ui:
            print 'Running in no-ui mode'
            addr = config.get('global').get('email')
            if not addr:
                print 'When running in no-ui mode, you must enter an email address.'
                return False, False, False
            setup_success = setup_globus(endpoints=endpoints,
                                         no_ui=True,
                                         src=config.get('global').get('email'),
                                         dst=config.get('global').get('email'),
                                         event_list=event_list)
        else:
            output_path = config.get('global').get('output_path')
            error_output = os.path.join(output_path, 'workflow.error')
            config['global']['error_path'] = error_output
            if not os.path.exists(output_path):
                os.makedirs(output_path)
            sys.stderr = open(error_output, 'w')
            msg = 'Activating endpoints {}'.format(' '.join(endpoints))
            logging.info(msg)
            setup_success = setup_globus(endpoints=endpoints,
                                         display_event=display_event,
                                         no_ui=False)
        if not setup_success:
            print "Globus setup error"
            return False, False, False
        else:
            print 'Globus authentication complete'
        print 'Checking file access on globus transfer nodes'
        setup_success, endpoint = check_globus(
            source_endpoint=config['transfer']['source_endpoint'],
            source_path=config['global']['source_path'],
            destination_endpoint=config['transfer']['destination_endpoint'],
            destination_path=config['global']['input_path'])
        if not setup_success:
            print 'ERROR! Unable to access {} globus node'.format(
                endpoint['type'])
            print 'The node may be down, or you may not have access to the requested directory'
            return False, False, False

    # setup the runmanager
    runmanager = RunManager(event_list=event_list,
                            output_path=config['global']['output_path'],
                            caseID=config['global']['experiment'],
                            scripts_path=run_script_path,
                            thread_list=kwargs['thread_list'],
                            event=kwargs['kill_event'])
    runmanager.setup_job_sets(set_frequency=config['global']['set_frequency'],
                              sim_start_year=sim_start_year,
                              sim_end_year=sim_end_year,
                              config=config,
                              filemanager=filemanager)

    # Turning off the GUI for the time being
    # config['global']['ui'] = False if args.no_ui else True
    config['global']['ui'] = False
    config['global']['no_cleanup'] = True if args.no_cleanup else False
    config['global']['no_monitor'] = True if args.no_monitor else False
    config['global']['print_file_list'] = True if args.file_list else False

    logging.info('Starting run with config')
    logging.info(pformat(config))
    return config, filemanager, runmanager