Exemplo n.º 1
0
def s3BuildOps(conf):
    """
    Compare a source folder with what's already in S3 and given
    the direction you specify it should figure out what to do.
    :param src_files:
    :param keyprefix:
    :param bucket:
    :return:
    """
    s3 = Transfer(conf['bucket'])
    opstore = {}
    log = Logger("s3BuildOps")
    prefix = "{0}/".format(conf['keyprefix']).replace("//", "/")

    log.title('The following locations were found:')
    if conf['direction'] == S3Operation.Direction.UP:
        tostr = 's3://{0}/{1}'.format(conf['bucket'], conf['keyprefix'])
        fromstr = conf['localroot']
    else:
        fromstr = 's3://{0}/{1}'.format(conf['bucket'], conf['keyprefix'])
        tostr = conf['localroot']
    log.info('FROM: {0}'.format(fromstr))
    log.info('TO  : {0}'.format(tostr))

    log.title('The following operations are queued:')

    response = s3.list(prefix)

    # Get all the files we have locally
    files = {}
    if os.path.isdir(conf['localroot']):
        files = {}
        localProductWalker(conf['localroot'], files)

    # Fill in any files we find on the remote
    if 'Contents' in response:
        for result in response['Contents']:
            dstkey = result['Key'].replace(prefix, '')
            if dstkey in files:
                files[dstkey]['dst'] = result
            else:
                files[dstkey] = {'dst': result}

    for relname in files:
        fileobj = files[relname]
        opstore[relname] = S3Operation(relname, fileobj, conf)

    if len(opstore) == 0:
        log.info("-- NO Operations Queued --")

    return opstore
Exemplo n.º 2
0
class Project():
    def __init__(self, projectRoot, projXMLFile):
        self.log = Logger('Project')
        self.DOM = None
        self.getProgramFromXML(path.join(projectRoot, projXMLFile))
        self.LocalRoot = projectRoot

    def getProgramFromXML(self, progXMLpath):
        assert path.isfile(
            progXMLpath), "ERROR: could not find file called: {}".format(
                progXMLpath)
        self.DOM = ET.parse(progXMLpath).getroot()

    def getPath(self, program):
        """
        Figure out what the repository path should be
        :param project:
        :param program:
        :return:
        """
        self.log.title('Getting remote path...')

        # First let's get the project type
        projType = self.DOM.find('./ProjectType').text.strip()
        assert not _strnullorempty(
            projType), "ERROR: <ProjectType> not found in project XML."
        self.log.info("Project Type Detected: {0}".format(projType))

        # Now go get the product node from the program XML
        patharr = program.findprojpath(projType)
        assert patharr is not None, "ERROR: Product '{0}' not found anywhere in the program XML".format(
            projType)
        self.log.title("Building Path to Product: ".format(projType))

        extpath = ''
        for idx, level in enumerate(patharr):
            if level['type'] == 'collection':
                col = self.getcollection(level['name'])
                self.log.info("{0}/collection:{1} => {2}".format(
                    idx * '  ', level['name'], col))
                name = col
                if program.testAllowedCollection(level['id'], col):
                    name = program.getAllowedLookup(level['id'], col)
                extpath += '/' + name
            elif level['type'] == 'group':
                self.log.info("{0}/group:{1}".format(idx * '  ',
                                                     level['name']))
                extpath += '/' + level['folder']
            elif level['type'] == 'product':
                self.log.info("{0}/product:{1}".format(idx * '  ',
                                                       level['name']))
                extpath += '/' + level['folder']

        # Trim the first slash for consistency elsewhere
        if len(extpath) > 0 and extpath[0] == '/':
            extpath = extpath[1:]
        self.log.info("Final remote path to product: {0}".format(extpath))

        return extpath

    def getcollection(self, colname):
        """
        Try to pull the Collection out of the project file
        :param colname: string with the Collection we're looking for
        :param project: the ET node with the project xml
        :return:
        """
        try:
            val = self.DOM.find(
                "MetaData/Meta[@name='{0}']".format(colname)).text.strip()
        except AttributeError:
            raise ValueError(
                "ERROR: Could not find <Meta name='{0}'>########</Meta> tag in project XML"
                .format(colname))
        return val
Exemplo n.º 3
0
class Program():
    def __init__(self, programpath):
        self.DOM = None
        self.getProgram(programpath)
        self.Collections = {}
        self.Groups = {}
        self.Products = {}
        self.Hierarchy = {}
        self.Bucket = None
        self.log = Logger('Program')

        # Populate everything
        self.getBucket()
        self.getProjectFile()
        self.parseCollections()
        self.parseGroups()
        self.parseProducts()
        self.parseTree(self.DOM.find('Hierarchy/*'))

    def parseCollections(self):
        """
        Pull all the collections out of the program XML
        :return:
        """
        for col in self.DOM.findall('Definitions/Collections/Collection'):
            self.Collections[col.attrib['id']] = {
                'id': col.attrib['id'],
                'type': 'collection',
                'name': col.attrib['name'],
                'allows': self.parseCollectionAllowed(col.findall('Allow'))
            }
            allowType = 'fixed'
            allows = self.Collections[col.attrib['id']]['allows']
            if len(allows) > 0:
                allowType = allows[0]['type']
            self.Collections[col.attrib['id']]['allowtype'] = allowType

    def getProgram(self, progpath):
        """
        Either uses a local path or downloads an online version of the program XML
        :param path:
        :return:
        """
        if re.match('^https*:\/\/.*', progpath) is not None:
            try:
                request = urllib2.Request(progpath)
                request.add_header('Pragma', 'no-cache')
                file = urllib2.build_opener().open(request)
                data = file.read()
                file.close()
                self.DOM = ET.fromstring(data)
            except:
                err = "ERROR: Could not download <{0}>".format(progpath)
                self.log.error(err)
                raise ValueError(err)
        else:
            self.DOM = ET.parse(progpath).getroot()

    def parseCollectionAllowed(self, allowETs):
        allows = []
        for allow in allowETs:
            if 'pattern' in allow.attrib:
                allows.append({
                    'type': 'pattern',
                    'pattern': allow.attrib['pattern'],
                })
            else:
                attrs = allow.attrib
                attrs['type'] = 'fixed'
                allows.append(attrs)
        return allows

    def testAllowedCollection(self, colName, desiredName):
        """
        Test if this is a valid collection to ask for
        :param collection:
        :param colName:
        :return:
        """
        collection = self.Collections[colName]
        if len(collection['allows']) == 0:
            return True

        assert len(
            desiredName
        ) > 0, "ERROR: Desired collection name for collection {0} is empty.".format(
            collection['name'])

        bGood = False
        for allow in collection['allows']:
            if allow['type'] == 'pattern':
                try:
                    matchObj = re.match(allow['pattern'], desiredName)
                    if matchObj:
                        bGood = True
                        continue
                except Exception as e:
                    self.log.error(
                        "Something went wrong with the allow RegEx in the Program XML file",
                        e)
            else:
                if allow['name'] == desiredName:
                    bGood = True
                    continue
                elif 'aliases' in allow and desiredName in allow[
                        'aliases'].split(','):
                    bGood = True
                    continue

        assert bGood, "ERROR: Desired Collection: {0} did not pass the allowed values test for collection: {1}".format(
            desiredName, collection['name'])
        return bGood

    def getAllowedLookup(self, colName, desiredName):
        """
        Get the actual allowed name. Most of the time this is just what you pass in
        but in the case of non-pattern allows this will do a lookup
        :param collection:
        :param colName:
        :return:
        """
        if len(self.Collections[colName]['allows']) == 0:
            return desiredName

        name = desiredName
        for allow in self.Collections[colName]['allows']:
            if allow['type'] == 'fixed' and allow['name'] == desiredName:
                name = allow['folder']
                continue
        return name

    def parseGroups(self):
        for grp in self.DOM.findall('Definitions/Groups/Group'):
            self.Groups[grp.attrib['id']] = {
                'id': grp.attrib['id'],
                'type': 'group',
                'name': grp.attrib['name'],
                'folder': grp.attrib['folder']
            }

    def parseProducts(self):
        for prod in self.DOM.findall('Definitions/Products/Product'):
            self.Products[prod.attrib['id']] = {
                'id': prod.attrib['id'],
                'type': 'product',
                'name': prod.attrib['name'],
                'folder': prod.attrib['folder']
            }

    def parseTree(self, etNode, treeNode=None):

        obj = {}

        if etNode.tag == 'Product' and 'ref' in etNode.attrib:
            obj['type'] = 'product'
            obj['node'] = self.Products[etNode.attrib['ref']]

        elif etNode.tag in ['Group', 'Collection']:
            obj['children'] = []
            if etNode.tag == 'Group':
                obj['type'] = 'group'
                obj['node'] = self.Groups[etNode.attrib['ref']]
            else:
                obj['type'] = 'collection'
                obj['node'] = self.Collections[etNode.attrib['ref']]

            for child in etNode.getchildren():
                obj['children'].append(self.parseTree(child, obj['children']))

        if treeNode is None:
            self.Hierarchy = obj

        return obj

    def getProjectFile(self):
        try:
            self.ProjectFile = self.DOM.find(
                "MetaData/Meta[@name='projectfile']").text.strip()
            self.log.info("Project File we're looking for: {0}".format(
                self.ProjectFile))
        except:
            msg = "ERROR: No <Meta Name='projectfile'>project.rs.xml</Meta> tag found in program XML"
            self.log.error(msg)
            raise ValueError(msg)

    def getBucket(self):
        try:
            self.Bucket = self.DOM.find(
                "MetaData/Meta[@name='s3bucket']").text.strip()
            self.log.info("S3 Bucket Detected: {0}".format(self.Bucket))
        except:
            msg = "ERROR: No <Meta Name='s3bucket'>riverscapes</Meta> tag found in program XML"
            self.log.error(msg)
            raise ValueError(msg)

    def getProdPath(self, prodName):

        self.log.title('Getting remote path structure...')

        # First let's get the project type
        assert not _strnullorempty(
            prodName), "ERROR: <ProjectType> not found in project XML."
        self.log.info("Project Type Detected: {0}".format(prodName))

        # Now go get the product node from the program XML
        patharr = self.findprojpath(prodName)
        assert patharr is not None, "ERROR: Product '{0}' not found anywhere in the program XML".format(
            prodName)
        self.log.title("Building Path to Product: ".format(prodName))

        return patharr

    def findprojpath(self, prodname, node=None, path=[]):
        """
        Find the path to the desired project
        :param prodname:
        :param node:
        :param path:
        :return:
        """
        if node is None:
            node = self.Hierarchy
        if node['type'] == 'product' and node['node']['name'] == prodname:
            path.append(node['node'])
            return path
        elif node['type'] in ['group', 'collection']:

            newpath = path[:]
            newpath.append(node['node'])

            for child in node['children']:
                result = self.findprojpath(prodname, child, newpath)
                if result is not None:
                    return result

    def progtos3path(self, progpath, level=0, currpath=[], paths=[]):
        """
        A program path to a series of real S3 paths
        :param progpath:
        :param level:
        :param currpath:
        :param paths:
        :return:
        """
        # Are we at the end yet? last level must be a product
        if (level - 1) == len(progpath):
            currpath.append(progpath[level])
            paths.append('/'.join(currpath))
            return paths

        # One choice. Just move on:
        if len(progpath[level]) == 1:
            currpath.append(progpath[level])
            self.progtos3path(progpath, level + 1, paths)
        else:
            for el in progpath[level]:
                newpath = currpath[:].append(el)
                self.progtos3path(progpath, level + 1, paths)