Пример #1
0
class TestRepair2(TestRepair):

    genfuncs = (gen_drs.write_eg3_1, gen_drs.write_eg3_2)

    def setUp(self):
        TestEg.setUp(self)

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert self.pt.state == self.pt.STATE_VERSIONED

        self.breakme()

    def _cmor1(self):
        genfunc = self.genfuncs[0]
        genfunc(self.tmpdir)

        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        genfunc = self.genfuncs[1]
        genfunc(self.tmpdir)

        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')
Пример #2
0
class TestListing(TestEg):

    # Set the following in subclasses
    #   listing_file 

    def setUp(self):
        super(TestListing, self).setUp()

        listing_path = os.path.join(test_dir, self.listing_file)
        gen_drs.write_listing(self.tmpdir, listing_path)

        self._init_drs_fs()
        self.dt = DRSTree(self.drs_fs)

    def _init_drs_fs(self):
        self.drs_fs = CMIP5FileSystem(self.tmpdir)

    def _discover(self, institute, model):
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', 
                         institute=institute, 
                         model=model)

    def _do_version(self, pt):
        assert pt.state == pt.STATE_INITIAL
        pt.do_version()
        assert pt.state == pt.STATE_VERSIONED
        assert pt.versions.keys() == [self.today]
Пример #3
0
class TestEg4(TestEg3):
    __test__ = True

    def _cmor1(self):
        gen_drs.write_eg4_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        gen_drs.write_eg4_2(self.tmpdir)
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

    def test_1(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert self._exists('files')
        assert self._exists('files/tas_20100102')
        assert self._exists('v20100102/tas')


    def test_2(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert len(self._listdir('files/tas_20100101')) == 3
        assert len(self._listdir('files/tas_20100102')) == 2
        assert len(self._listdir('v20100101/tas')) == 3
        assert len(self._listdir('v20100102/tas')) == 5

    # Do test_3 from superclass
        
    # Do test_4 from superclass


    def test_6(self):
        # Test differencing 2 versions

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()

        v1 = []
        todo = []
        for state, path1, path2 in self.pt.diff_version(20100101):
            if state == self.pt.DIFF_V1_ONLY:
                v1.append(path1)
            elif state == self.pt.DIFF_V2_ONLY:
                todo.append(path2)

        assert len(v1) == 3
        assert len(todo) == 2
Пример #4
0
    def test_1(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')

        assert len(dt.pub_trees) == 2
        assert set([x.drs.realm for x in dt.pub_trees.values()]) == set(['atmos', 'ocean'])
Пример #5
0
    def setUp(self):
        super(TestRepair, self).setUp()

        gen_drs.write_listing(self.tmpdir, op.join(test_dir, self.listing))

        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, **self.drs_components)
        self.pt = dt.pub_trees.values()[0]
Пример #6
0
    def test_2(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')

        assert len(dt.pub_trees) == 3
        pt = dt.pub_trees.values()[0]
        assert pt.drs.realm == 'atmos'
Пример #7
0
class TestEg5(TestEg4):
    __test__ = False

    def _cmor1(self):
        gen_drs.write_eg5_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        gen_drs.write_eg5_2(self.tmpdir)
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

    # Do test1 from superclass

    def test_2(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert len(self._listdir('files/tas_20100101')) == 5
        assert len(self._listdir('files/tas_20100102')) == 2
        assert len(self._listdir('v20100101/tas')) == 5
        assert len(self._listdir('v20100102/tas')) == 5

    # Do test_3 from superclass
        
    # Do test_4 from superclass

    def test_6(self):
        # Test differencing 2 versions

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()

        v1 = []
        todo = []
        diff = []
        same = []
        for state, path1, path2 in self.pt.diff_version(20100101):
            if state == self.pt.DIFF_V1_ONLY:
                v1.append(path1)
            elif state == self.pt.DIFF_V2_ONLY:
                todo.append(path2)
            elif state == self.pt.DIFF_SIZE:
                diff.append(path1)
            elif state == self.pt.DIFF_NONE:
                same.append(path1)

        #!TODO: not same?  This test needs reviewing.
        assert len(v1) == 3
        assert len(same) == 2
Пример #8
0
    def setUp(self):
        super(TestMapfile, self).setUp()

        gen_drs.write_eg1(self.tmpdir)

        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')
        self.pt = dt.pub_trees.values()[0]
        self.pt.do_version()
        assert self.pt.state == self.pt.STATE_VERSIONED
Пример #9
0
    def test_1(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')
        
        pt = dt.pub_trees.values()[0]
        assert pt.state == pt.STATE_INITIAL

        pt.do_version()

        for path, drs in pt.versions[pt.latest]:
            lnk = os.readlink(path)
            assert not os.path.isabs(lnk)
Пример #10
0
    def test_3(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3')
        
        pt = dt.pub_trees.values()[0]
        assert pt.state == pt.STATE_INITIAL

        pt.do_version()
        assert pt.state == pt.STATE_VERSIONED
        assert len(pt.versions.keys()) == 1

        assert self.today in pt.versions.keys()
Пример #11
0
class TestEmptyPubdir(TestEg):
    # Regression for bug where drs_tool crashes if the PublishTree directory 
    # exists but is empty
    __test__ = True
    def setUp(self):
        super(TestEmptyPubdir, self).setUp()
        pubdir = op.join(self.tmpdir,
                         'output2/MOHC/HadGEM2-ES/esmControl/day/seaIce/day/r1i1p1')
        os.makedirs(pubdir)

        self.dt = DRSTree(self.tmpdir)

    def test_1(self):
        self.dt.discover()
Пример #12
0
    def test_1(self):
        dt = DRSTree(self.tmpdir)
        dt.discover(self.incoming, activity='cmip5',
                    product='output1', institute='MOHC', model='HadCM3', 
                    experiment='1pctto4x', realm='atmos')

        assert len(dt.pub_trees) == 3
        k = sorted(dt.pub_trees.keys())[2]
        assert k == 'cmip5.output1.MOHC.HadCM3.1pctto4x.day.atmos.day.r3i1p1'
        pt = dt.pub_trees[k]

        assert pt.versions == {}
        assert len(pt._todo) == 15
        vars = set(x[1].variable for x in pt._todo)
        assert vars == set(('pr', 'rsus', 'tas'))
        assert pt.state == pt.STATE_INITIAL
Пример #13
0
    def test_1(self):
        """Test incremental discovery"""
        dt = DRSTree(self.tmpdir)
        components = dict(activity='cmip5',
                          product='output1', institute='MOHC', model='HadCM3')
        # Call discover without incoming_dir
        dt.discover(None, **components)
        assert len(dt.pub_trees) == 0

        # Discover ocean realm
        dt.discover_incoming(self.tmpdir, realm='ocean', **components)
        assert len(dt.pub_trees) == 1

        # Discover atmos realm
        dt.discover_incoming(self.tmpdir, realm='atmos', **components)
        assert len(dt.pub_trees) == 2

        assert set([x.drs.realm for x in dt.pub_trees.values()]) == set(['atmos', 'ocean'])
Пример #14
0
class TestDups(TestEg):
    def setUp(self):
        super(TestDups, self).setup()

        # Create test data
        gen_drs.write_eg1(self.tmpdir)

        # Do initial version change
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')
        self.pt = dt.pub_trees.values()[0]
        self.pt.do_version()

    def tearDown(self):
        shutil.rmtree(self.tmpdir)

    def _make_incoming1(self):
        # Original ingest
        gen_drs.write_listing(self.incoming, os.path.join(test_dir, 'dups1.ls'))

    def _make_incoming2(self):
        # Ingest with some new files and 2 duplicates
        gen_drs.write_listing(self.incoming, os.path.join(test_dir, 'dups2.ls'))
        
    def _make_incoming3(self):
        # As incoming2 except one of the dups differs in size
        self._make_incoming2()

        fh = open(os.path.join(self.incoming, CHANGE_FILE), 'a')
        print >>fh, 'File has grown'

    def _make_incoming4(self):
        # As incoming2 except one of the dups only differs by contents
        self._make_incoming2()

        fh = open(os.path.join(self.incoming, CHANGE_FILE), 'r+')
        fh.seek(0)
        fh.write('XXX')
        fh.close()
Пример #15
0
class Command(object):
    def __init__(self, opts, args):
        self.opts = opts
        self.args = args
        self.shelve_dir = None
        self.p_cmip5_config = None
        self.drs_root = None
        self.drs_tree = None

        self.make_drs_tree()

    def _config_p_cmip5(self):
        """
        Ensure self.shelve_dir is set.  This is required for InitCommand
        and any command that uses p_cmip5.

        """
        self.shelve_dir = self.opts.shelve_dir
        if self.shelve_dir is None:
            try:
                self.shelve_dir = config.config.get('p_cmip5', 'shelve-dir')
            except NoSectionError:
                raise Exception("Shelve directory not specified.  Please use --shelve-dir or set shelve_dir via metaconfig")

    def _setup_p_cmip5(self):
        """
        Instantiate the p_cmip5.cmip5_product object ready for deducing
        the product component.

        """
        
        shelves = p_cmip5.init._find_shelves(self.shelve_dir)
    
        self.p_cmip5_config = self.opts.p_cmip5_config
        if self.p_cmip5_config is None:
            try:
                self.p_cmip5_config = config.config.get('p_cmip5', 'config')
            except (NoSectionError, NoOptionError):
                raise Exception("p_cmip5 configuration file not specified.  Please use --p-cmip5-config or set via metaconfig")

        self.drs_tree.set_p_cmip5(p_cmip5.product.cmip5_product(
                mip_table_shelve=shelves['stdo_mip'],
                template=shelves['template'],
                stdo=shelves['stdo'],
                config=self.p_cmip5_config,
                not_ok_excpt=True))


    def make_drs_tree(self):
        if self.opts.root:
            self.drs_root = self.opts.root
        else:
            try:
                self.drs_root = config.drs_defaults['root']
            except KeyError:
                raise Exception('drs-root not defined')

        if self.opts.incoming:
            incoming = self.opts.incoming
        else:
            try:
                incoming = config.drs_defaults['incoming']
            except KeyError:
                incoming = os.path.join(self.drs_root, config.DEFAULT_INCOMING)

        self.drs_tree = DRSTree(self.drs_root)

        if self.opts.move_cmd:
            self.drs_tree.set_move_cmd(self.opts.move_cmd)


        kwargs = {}
        for attr in ['activity', 'product', 'institute', 'model', 'experiment', 
                     'frequency', 'realm', 'ensemble']:
            try:
                val = getattr(self.opts, attr)
                # val may be there but None
                if val is None:
                    raise AttributeError
            except AttributeError:
                val = config.drs_defaults.get(attr)

            kwargs[attr] = val

        # Get the template DRS from args
        if self.args:
            dataset_id = self.args[0]
            drs = DRS.from_dataset_id(dataset_id, **kwargs)
        else:
            drs = DRS(**kwargs)

        # Product detection
        if self.opts.detect_product:
            self._config_p_cmip5()
            self._setup_p_cmip5()

        self.drs_tree.discover(incoming, **drs)

    def do(self):
        raise NotImplementedError("Unimplemented command")
    

    def print_header(self):
        print """\
==============================================================================
DRS Tree at %s
------------------------------------------------------------------------------\
""" % self.drs_root

    def print_sep(self):
        print """\
------------------------------------------------------------------------------\
"""

    def print_footer(self):
        print """\
Пример #16
0
class TestEg3(TestEg):
    __test__ = True

    def _cmor1(self):
        gen_drs.write_eg3_1(self.tmpdir)
        self.dt = DRSTree(self.tmpdir)
        self.dt.discover(self.incoming, activity='cmip5',
                         product='output1', institute='MOHC', model='HadCM3')

        (self.pt, ) = self.dt.pub_trees.values()

    def _cmor2(self):
        gen_drs.write_eg3_2(self.tmpdir)
        self.dt.discover_incoming(self.incoming, activity='cmip5',
                                  product='output1')

        
    def _exists(self, x):
        return os.path.exists(os.path.join(self.pt.pub_dir, x))
    def _listdir(self, x):
        return os.listdir(os.path.join(self.pt.pub_dir, x))
    def _listlinks(self, x):
        links = glob('%s/*' % os.path.join(self.pt.pub_dir, x))
        return [os.readlink(lnk) for lnk in links if os.path.islink(lnk)]


    def test_01(self):
        self._cmor1()
        assert len(self.pt.drs_tree.incoming) > 0

        self.pt.do_version()
        assert len(self.pt.drs_tree.incoming) == 0
        assert self.pt.count_todo() == 0
        assert len(list(self.pt.list_todo())) == 0

    def test_1(self):
        self._cmor1()
        self.pt.do_version(20100101)

        self._cmor2()
        self.pt.do_version(20100102)

        assert len(self.pt.drs_tree.incoming) == 0

        assert self._exists('files')
        assert self._exists('files/rsus_20100102')
        assert not self._exists('files/rsus_20100101')

        assert self._exists('v20100101/tas')
        assert self._exists('v20100101/pr')
        assert not self._exists('v20100101/rsus')
        assert self._exists('v20100102/rsus')

    def test_2(self):
        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()
        self.pt.do_version(20100102)

        assert self._exists('v20100102/pr/pr_day_HadCM3_1pctto4x_r1i1p1_2000010100-2001123114.nc')

    def test_3(self):
        self._cmor1()
        assert self.pt.state == self.pt.STATE_INITIAL
        self.pt.do_version()
        assert self.pt.state == self.pt.STATE_VERSIONED
        self._cmor2()
        assert self.pt.state == self.pt.STATE_VERSIONED_TRANS
        self.pt.do_version()
        assert self.pt.state == self.pt.STATE_VERSIONED
    

    def test_4(self):
        # Check all links are to the "files" branch
        self._cmor1()
        self.pt.do_version()
        self._cmor2()
        self.pt.do_version()

        links = self._listlinks('v2/tas/r1i1p1')
        for link in links:
            assert '/files/' in link

    def test_5(self):
        self._cmor1()
        self.pt.do_version(20100101)

        latest = os.readlink(os.path.join(self.pt.pub_dir, 'latest'))
        assert latest == 'v20100101'

        self._cmor2()
        self.pt.do_version(20100102)

        latest = os.readlink(os.path.join(self.pt.pub_dir, 'latest'))
        assert latest == 'v20100102'


    def test_6(self):
        # Test differencing 2 versions

        self._cmor1()
        self.pt.do_version(20100101)
        self._cmor2()

        v1 = []
        todo = []
        for state, path1, path2 in self.pt.diff_version(20100101):
            if state == self.pt.DIFF_V1_ONLY:
                assert not 'rsus' in path1
                v1.append(path1)
            elif state == self.pt.DIFF_V2_ONLY:
                assert 'rsus' in path2
                todo.append(path2)

        assert len(v1) == 10
        assert len(todo) == 5
Пример #17
0
class Command(object):
    def __init__(self, op, opts, args):
        self.op = op
        self.opts = opts
        self.args = args
        self.shelve_dir = None
        self.p_cmip5_config = None
        self.drs_root = None
        self.drs_tree = None

        self.make_drs_tree()

    def _config_p_cmip5(self):
        """
        Ensure self.shelve_dir is set.  This is required for InitCommand
        and any command that uses p_cmip5.

        """
        self.shelve_dir = self.opts.shelve_dir
        if self.shelve_dir is None:
            try:
                self.shelve_dir = config.config.get("p_cmip5", "shelve-dir")
            except NoSectionError:
                raise Exception(
                    "Shelve directory not specified.  Please use --shelve-dir or set shelve_dir via metaconfig"
                )

    def _setup_p_cmip5(self):
        """
        Instantiate the p_cmip5.cmip5_product object ready for deducing
        the product component.

        """

        shelves = p_cmip5.init._find_shelves(self.shelve_dir)

        self.p_cmip5_config = self.opts.p_cmip5_config
        if self.p_cmip5_config is None:
            try:
                self.p_cmip5_config = config.config.get("p_cmip5", "config")
            except (NoSectionError, NoOptionError):
                raise Exception(
                    "p_cmip5 configuration file not specified.  Please use --p-cmip5-config or set via metaconfig"
                )

        self.drs_tree.set_p_cmip5(
            p_cmip5.product.cmip5_product(
                mip_table_shelve=shelves["stdo_mip"],
                template=shelves["template"],
                stdo=shelves["stdo"],
                config=self.p_cmip5_config,
                not_ok_excpt=True,
            )
        )

    def make_drs_tree(self):
        if self.opts.root:
            self.drs_root = self.opts.root
        else:
            try:
                self.drs_root = config.drs_defaults["root"]
            except KeyError:
                raise Exception("drs-root not defined")

        if self.opts.incoming:
            incoming = self.opts.incoming
        else:
            try:
                incoming = config.drs_defaults["incoming"]
            except KeyError:
                incoming = os.path.join(self.drs_root, config.DEFAULT_INCOMING)

        if self.opts.json_drs:
            json_drs = self.opts.json_drs
        else:
            json_drs = None

        drs_root = os.path.normpath(os.path.abspath(self.drs_root))

        if self.opts.scheme:
            scheme = self.opts.scheme
        else:
            scheme = config.default_drs_scheme

        try:
            fs_cls = config.get_drs_scheme(scheme)
        except KeyError:
            raise ValueError("Unrecognised DRS scheme %s" % scheme)

        self.drs_fs = fs_cls(drs_root)
        self.drs_tree = DRSTree(self.drs_fs)

        if self.opts.move_cmd:
            self.drs_tree.set_move_cmd(self.opts.move_cmd)

        # This code is specifically for the deprecated DRS setting options
        # Generic DRS component setting is handled below
        kwargs = {}
        for attr in ["activity", "product", "institute", "model", "experiment", "frequency", "realm", "ensemble"]:
            try:
                val = getattr(self.opts, attr)
                # val may be there but None
                if val is None:
                    raise AttributeError
                warn("Option --%s is deprecated.  Use --component instead" % attr)

            except AttributeError:
                val = config.drs_defaults.get(attr)

            # Only add this component if it is valid for the DRS scheme
            if attr in self.drs_fs.drs_cls.DRS_ATTRS:
                log.info("Setting DRS component %s=%s" % (attr, val))
                kwargs[attr] = val

        try:
            component_dict = self.opts.component_dict
        except AttributeError:
            component_dict = {}

        for component in self.drs_fs.drs_cls._iter_components(to_publish_level=True):
            if component in component_dict:
                val = component_dict.get(component)
                log.info("Setting DRS component %s=%s" % (component, val))

                kwargs[component] = self.drs_fs.drs_cls._decode_component(component, val)
                del component_dict[component]

        # Error for any components not valid
        for component in component_dict:
            self.op.error("Unrecognised component %s for scheme %s" % (component, scheme))

        # Get the template DRS from args
        if self.args:
            dataset_id = self.args[0]
            drs = self.drs_fs.drs_cls.from_dataset_id(dataset_id, **kwargs)
        else:
            drs = self.drs_fs.drs_cls(**kwargs)

        # Product detection
        if self.opts.detect_product:
            self._config_p_cmip5()
            self._setup_p_cmip5()

        # If JSON file selected use that, otherwise discover from filesystem
        if json_drs:
            with open(json_drs) as fh:
                #!TODO: Remove json-array case
                # This is a work-around until we have a stable json format
                # The file might be a json array or it might be a series
                # of json files, 1 per line
                json_str = fh.readline()
                if json_str[0] == "[":
                    json_obj = json.loads(json_str)
                else:
                    json_obj = []
                    while json_str:
                        json_obj.append(json.loads(json_str))
                        json_str = fh.readline()

            self.drs_tree.discover_incoming_fromjson(json_obj, **drs)
        else:
            self.drs_tree.discover(incoming, **drs)

    def do(self):
        raise NotImplementedError("Unimplemented command")

    def print_header(self):
        print """\
==============================================================================
DRS Tree at %s
------------------------------------------------------------------------------\
""" % self.drs_root

    def print_sep(self):
        print """\
------------------------------------------------------------------------------\
"""

    def print_footer(self):
        print """\