Exemplos de RequestManager.process em Python, exemplos de SiteDB.HTTPRequest.RequestManager.process em Python

Exemplo n.º 1

0

Exibir arquivo

class RebusFetchThread(Thread):
    """A task thread to synchronise federation pledges from REBUS. This runs on
  a single node only in the cluster."""

    _baseurl = "http://gstat-wlcg.cern.ch/apps/pledges/resources/"
    _ident = "SiteDB/%s Python/%s" % \
             (os.environ["SITEDB_VERSION"], ".".join(map(str, sys.version_info[:3])))

    def __init__(self,
                 app,
                 baseurl,
                 mount,
                 cacertdir="/etc/grid-security/certificates",
                 minreq=1000,
                 interval=300,
                 instance="test"):
        Thread.__init__(self, name="RebusFetch")
        self.sectoken = "".join(random.sample(string.letters, 30))
        self._inturl = "http://localhost:%d%s/%s/rebusfetch" % \
                       (app.srvconfig.port, mount, instance)
        self._headers = \
          fake_authz_headers(open(app.srvconfig.tools.cms_auth.key_file).read(),
                             method = "Internal", login = self.sectoken,
                             name = self.__class__.__name__, dn = None,
                             roles = {"Global Admin": {"group": ["global"]}}) \
          + [("Accept", "application/json")]
        self._cv = Condition()
        if isinstance(baseurl, str):
            self._baseurl = baseurl

        self._cacertdir = cacertdir
        self._minreq = minreq
        self._interval = interval
        self._stopme = False
        self._full = (0, [], [])
        self._warnings = {}

        self._intreq = RequestManager(num_connections=2,
                                      user_agent=self._ident,
                                      handle_init=self._handle_init,
                                      request_respond=self._respond,
                                      request_init=self._int_init)
        cherrypy.engine.subscribe("stop", self.stop)
        cherrypy.engine.subscribe("start", self.start)

    def status(self):
        """Get the processing status. Returns time of last successful full
    synchronisation with REBUS."""
        with self._cv:
            return self._full[:]

    def stop(self, *args):
        """Tell the task thread to quit."""
        with self._cv:
            self._stopme = True
            self._cv.notifyAll()

    def run(self):
        """Run synchronisation thread."""
        while True:
            now = time.time()
            until = now + self._interval

            try:
                self._sync(now)
            except Exception as e:
                cherrypy.log("SYNC ERROR %s.%s REBUS sync failed %s" %
                             (getattr(e, "__module__", "__builtins__"),
                              e.__class__.__name__, str(e)))
                for line in format_exc().rstrip().split("\n"):
                    cherrypy.log("  " + line)

            with self._cv:
                while not self._stopme and now < until:
                    self._cv.wait(until - now)
                    now = time.time()

                if self._stopme:
                    return

    def _validate(self, input, type, regexp, now):
        """Convenience method to validate ldap data"""
        if isinstance(input, type):
            m = regexp.match(input)
            if m: return m
        if input not in self._warnings:
            cherrypy.log("WARNING: REBUS data failed validation: '%s'" % input)
            self._warnings[input] = now
        return None

    def _sync(self, now):
        """Perform full synchronisation."""

        # Delete warnings older than 24 hours
        for k, v in self._warnings.items():
            if v < now - 86400:
                del self._warnings[k]
        result = []
        # Get the user information from CERN/LDAP
        ldresult = self._ldget(self._baseurl)
        # get data from oracle database
        # Process each user record returned
        rows = []
        id = 0
        for name, values in ldresult.iteritems():
            for year, val1 in values["pledges"].iteritems():
                i = {
                    'name': name,
                    'country': values["country"],
                    'year': str(year),
                    'cpu': str(0),
                    'disk': str(0),
                    'tape': str(0)
                }
                if 'CPU' in val1.keys():
                    i['cpu'] = str(val1['CPU'] / float(1000))
                if 'Disk' in val1.keys():
                    i['disk'] = str(val1['Disk'])
                if 'Tape' in val1.keys():
                    i['tape'] = str(val1['Tape'])
                rows.append(i)
        gett = self._intreq.put(("PUT", rows, result))
        gettt = self._intreq.process()

    def _ldget(self, url):
        """Get data from REBUS."""
        year_next = date.today().year + 2
        result = self._read_rebus_data(2008, year_next)
        return result

    def _handle_init(self, c):
        """Initialise curl handle `c`."""
        c.setopt(pycurl.SSL_VERIFYPEER, 0)
        c.setopt(pycurl.SSL_VERIFYHOST, 0)

    def _respond(self, c):
        """Respond to data on curl handle `c`."""
        code = c.getinfo(pycurl.HTTP_CODE)
        if code != 200:
            raise RuntimeError("HTTP status %d for %s" %
                               (code, c.getinfo(pycurl.EFFECTIVE_URL)))
        c.result.append(c.buffer.getvalue())

    def _int_init(self, c, method, rows, result):
        """Initialise curl handle `c` for an internal REST API request."""
        if method == "PUT":
            type, body = self._encode(rows)
            headers = self._headers[:] + [("Content-Type", type),
                                          ("Content-Length", str(len(body)))]
            c.setopt(pycurl.POST, 0)
            c.setopt(pycurl.UPLOAD, 1)
            c.setopt(pycurl.URL, self._inturl)
            c.setopt(pycurl.HTTPHEADER, ["%s: %s" % h for h in headers])
            c.setopt(pycurl.READFUNCTION, StringIO(body).read)
            c.result = result
        elif method == "GET":
            headers = self._headers[:]
            c.setopt(pycurl.URL, self._inturl)
            c.setopt(pycurl.HTTPHEADER, ["%s: %s" % h for h in headers])
            c.result = result
        else:
            assert False, "Unsupported method"

    def _encode(self, rows):
        """Encode dictionaries in `rows` for POST/PUT body as a HTML form."""
        body, sep = "", ""
        for obj in rows:
            for key, value in obj.iteritems():
                body += "%s%s=%s" % (sep, key,
                                     urllib2.quote(value.encode("utf-8")))
                sep = "&"
        return ("application/x-www-form-urlencoded", body)

    def _read_rebus_data(self, year, yearTo):
        """REBUS json data fetch from 2008 to year.now + 2. All data returned in dictionary."""
        data = {}
        for x in range(year, yearTo):
            url = "http://wlcg-rebus.cern.ch/apps/pledges/resources/" + str(
                x) + "/all/json"
            req = urllib2.Request(url)
            opener = urllib2.build_opener()
            f = opener.open(req)
            pledges = json.loads(f.read())
            for index, item in enumerate(pledges):
                federname = item["Federation"]
                cms = item["CMS"]
                pledgetype = item["PledgeType"]
                country = item["Country"]
                if cms and pledgetype:
                    if federname in data.keys():
                        if x in data[federname]["pledges"].keys():
                            data[federname]["pledges"][x][pledgetype] = cms
                        else:
                            data[federname]["pledges"][x] = {
                                pledgetype: cms
                            }
                    else:
                        data[federname] = {
                            "country": country,
                            "pledges": {}
                        }
                        data[federname]["pledges"] = {
                            x: {
                                pledgetype: cms
                            }
                        }
        return data

Exemplo n.º 2

0

Exibir arquivo

Arquivo: DataLdapSync.py Projeto: dmwm/sitedb

class LdapSyncThread(Thread):
  """A task thread to synchronise SiteDB from CERN/LDAP. This runs on
  a single node only in the cluster."""

  _baseurl = "ldaps://xldap.cern.ch:636"
  _ident = "SiteDB/%s Python/%s" % \
           (os.environ["SITEDB_VERSION"], ".".join(map(str, sys.version_info[:3])))

  # The buggy ca.cern.ch user interface allows to put anything, so some users
  # have uploaded CA certificates or even SSH keys. So try to ignore them.
  RX_ALTDN = re.compile(r"(?iu)^X509:.*<S>(([A-Z]+=([-\w _@'.()/]+),?)*(?<!berosservice|CERN Root CA|on Authority|s,CN=lsfcert|s,CN=acronmc))$")

  def __init__(self, app, baseurl, mount, cacertdir = "/etc/grid-security/certificates", minreq = 1000, interval = 300, instance = "test"):
    Thread.__init__(self, name = "LdapSync")
    self.sectoken = "".join(random.sample(string.letters, 30))
    self._inturl = "http://localhost:%d%s/%s/ldapsync" % \
                   (app.srvconfig.port, mount, instance)
    self._headers = \
      fake_authz_headers(open(app.srvconfig.tools.cms_auth.key_file).read(),
                         method = "Internal", login = self.sectoken,
                         name = self.__class__.__name__, dn = None,
                         roles = {"Global Admin": {"group": ["global"]}}) \
      + [("Accept", "application/json")]

    self._cv = Condition()
    if isinstance(baseurl, str):
      self._baseurl = baseurl

    self._cacertdir = cacertdir
    self._minreq = minreq
    self._interval = interval
    self._stopme = False
    self._full = (0, [], [])
    self._warnings = {}

    self._intreq = RequestManager(num_connections = 2,
                                  user_agent = self._ident,
                                  handle_init = self._handle_init,
                                  request_respond = self._respond,
                                  request_init = self._int_init)
    cherrypy.engine.subscribe("stop", self.stop)
    cherrypy.engine.subscribe("start", self.start)

  def status(self):
    """Get the processing status. Returns time of last successful full
    synchronisation with CERN/LDAP."""
    with self._cv:
      return self._full[:]

  def stop(self, *args):
    """Tell the task thread to quit."""
    with self._cv:
      self._stopme = True
      self._cv.notifyAll()

  def run(self):
    """Run synchronisation thread."""
    while True:
      now = time.time()
      until = now + self._interval

      try:
        self._sync(now)
      except Exception as e:
        cherrypy.log("SYNC ERROR %s.%s LDAP sync failed %s"
                     % (getattr(e, "__module__", "__builtins__"),
                        e.__class__.__name__, str(e)))
        for line in format_exc().rstrip().split("\n"):
          cherrypy.log("  " + line)

      with self._cv:
        while not self._stopme and now < until:
          self._cv.wait(until - now)
          now = time.time()

        if self._stopme:
          return

  def _validate(self, input, type, regexp, now):
    """Convenience method to validate ldap data"""
    if isinstance(input, type):
      m = regexp.match(input)
      if m: return m
    if input not in self._warnings:
      #Printing to logs only ascii encoded string
      try:
          input.decode('ascii')
      except UnicodeDecodeError:
          cherrypy.log("WARNING: ldap data failed validation, binary input, skipped printing.")
      else:
          cherrypy.log("WARNING: ldap data failed validation: '%s'" % input[:1000])
      self._warnings[input] = now
    return None
    
  def _sync(self, now):
    """Perform full synchronisation."""

    # Delete warnings older than 24 hours
    for k, v in self._warnings.items():
      if v < now - 86400:
        del self._warnings[k]

    # Get the user information from CERN/LDAP
    ldresult = self._ldget(self._baseurl)

    # Process each user record returned
    rows = []
    for (dn, attrs) in ldresult:
      u = { 'username': attrs['sAMAccountName'][0],
            'passwd'  : 'NeedsToBeUpdated',
            'dn'      : dn,
            'name'    : attrs['displayName'][0],
            'email'   : attrs['mail'][0] }
      perid = attrs['employeeID'][0]
      accstatus = attrs['userAccountControl'][0]

      # Do the input validation
      if not ( self._validate(u['username'], str,        RX_USER,   now) and \
               self._validate(u['name'],     basestring, RX_NAME,   now) and \
               self._validate(perid,         str,        RX_UID,    now) and \
               self._validate(u['dn'],       basestring, RX_LDAPDN, now) and \
               self._validate(u['email'],    str,        RX_EMAIL,  now) and \
               self._validate(accstatus,     str,        RX_UID,    now) ):
        cherrypy.log('WARNING: ignoring user with invalid non-optional ldap' \
                     ' data: %s' % u['username'])
        continue

      # Only process normal accounts (aka enabled user accounts).
      if accstatus == '512':
        # newdn is the reversed elements from full name + personid + dn
        newdn = ','.join(('CN='+ u['name'] +',CN='+ perid +','+ u['dn']).split(',')[::-1])
        # in case non-Cern DN was mapped to the account, use it as newdn instead
        if 'altSecurityIdentities' in attrs.keys():
          for altdn in attrs['altSecurityIdentities']:
            m = self._validate(altdn, basestring, self.RX_ALTDN, now)
            if not m: continue
            # get the last mapped DN not matching the Kerberosservice|CAs
            newdn = m.group(1)
        u['dn'] = '/'+newdn.replace(',','/')

        # add this user to the bulk data to be updated
        rows.append(u)

    # check number of rows is sane
    if len(rows) < self._minreq:
      cherrypy.log("ERROR: cowardly refusing full ldap synchronisation"
                   " with only %d users received, fewer than required %d"
                   % (len(rows), self._minreq))
      return
    cherrypy.log("INFO: found %d valid users in ldap" % len(rows))

    # do the internal api call for the bulk update
    result = []
    self._intreq.put(("PUT", rows, result))
    self._intreq.process()
    self._full = (now, rows, ldresult, result and result[0])

  def _ldget(self, url):
    """Get data from LDAP."""
    result = []

    ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, self._cacertdir)
    l = ldap.initialize(url)
    l.protocol_version = ldap.VERSION3

    # Fetch paged results from ldap server.
    # This is needed because there is a size limit on the CERN ldap server
    # side to return at most 1000 entries per request.
    # For more information, see http://tools.ietf.org/html/rfc2696.html
    srv_ctrls = [ldap.controls.SimplePagedResultsControl(criticality=False, cookie="")]
    while True:
      srv_ctrls[0].size = 1000 # dont necessarily need to match the server limit
      s = l.search_ext('OU=Users,OU=Organic Units,DC=cern,DC=ch',
                       ldap.SCOPE_SUBTREE,
                       '(memberOf:1.2.840.113556.1.4.1941:=CN=cms-authorized-users,OU=e-groups,OU=Workgroups,DC=cern,DC=ch)',
                       ['sAMAccountName','displayName','employeeID','mail','altSecurityIdentities','userAccountControl'],
                       serverctrls=srv_ctrls,
                       sizelimit=0)
      _, res_data, _, srv_ctrls = l.result3(s, timeout=100)
      result.extend(res_data)
      if not srv_ctrls[0].cookie: break

    if not result:
      raise RuntimeError("Ldap returned no data for %s" % url)
    return result

  def _handle_init(self, c):
    """Initialise curl handle `c`."""
    c.setopt(pycurl.SSL_VERIFYPEER, 0)
    c.setopt(pycurl.SSL_VERIFYHOST, 0)

  def _respond(self, c):
    """Respond to data on curl handle `c`."""
    code = c.getinfo(pycurl.HTTP_CODE)
    if code != 200:
      raise RuntimeError("HTTP status %d for %s" % (code, c.getinfo(pycurl.EFFECTIVE_URL)))
    c.result.append(c.buffer.getvalue())

  def _int_init(self, c, method, rows, result):
    """Initialise curl handle `c` for an internal REST API request."""
    type, body = self._encode(rows)
    headers = self._headers[:] + [("Content-Type", type),
                                  ("Content-Length", str(len(body)))]
    if method == "PUT":
      c.setopt(pycurl.POST, 0)
      c.setopt(pycurl.UPLOAD, 1)
    else:
      assert False, "Unsupported method"
    c.setopt(pycurl.URL, self._inturl)
    c.setopt(pycurl.HTTPHEADER, ["%s: %s" % h for h in headers])
    c.setopt(pycurl.READFUNCTION, StringIO(body).read)
    c.result = result

  def _encode(self, rows):
    """Encode dictionaries in `rows` for POST/PUT body as a HTML form."""
    body, sep = "", ""
    for obj in rows:
      for key, value in obj.iteritems():
        body += "%s%s=%s" % (sep, key, urllib.quote(value.encode("utf-8")))
        sep = "&"
    return ("application/x-www-form-urlencoded", body)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: DataLdapSync.py Projeto: nikmagini/sitedb

class LdapSyncThread(Thread):
    """A task thread to synchronise SiteDB from CERN/LDAP. This runs on
  a single node only in the cluster."""

    _baseurl = "ldaps://xldap.cern.ch:636"
    _ident = "SiteDB/%s Python/%s" % \
             (os.environ["SITEDB_VERSION"], ".".join(map(str, sys.version_info[:3])))

    # The buggy ca.cern.ch user interface allows to put anything, so some users
    # have uploaded CA certificates or even SSH keys. So try to ignore them.
    RX_ALTDN = re.compile(
        r"(?iu)^X509:.*<S>(([A-Z]+=([-\w _@'.()/]+),?)*(?<!berosservice|CERN Root CA|on Authority|s,CN=lsfcert|s,CN=acronmc))$"
    )

    def __init__(self,
                 app,
                 baseurl,
                 mount,
                 cacertdir="/etc/grid-security/certificates",
                 minreq=1000,
                 interval=300,
                 instance="test"):
        Thread.__init__(self, name="LdapSync")
        self.sectoken = "".join(random.sample(string.letters, 30))
        self._inturl = "http://localhost:%d%s/%s/ldapsync" % \
                       (app.srvconfig.port, mount, instance)
        self._headers = \
          fake_authz_headers(open(app.srvconfig.tools.cms_auth.key_file).read(),
                             method = "Internal", login = self.sectoken,
                             name = self.__class__.__name__, dn = None,
                             roles = {"Global Admin": {"group": ["global"]}}) \
          + [("Accept", "application/json")]

        self._cv = Condition()
        if isinstance(baseurl, str):
            self._baseurl = baseurl

        self._cacertdir = cacertdir
        self._minreq = minreq
        self._interval = interval
        self._stopme = False
        self._full = (0, [], [])
        self._warnings = {}

        self._intreq = RequestManager(num_connections=2,
                                      user_agent=self._ident,
                                      handle_init=self._handle_init,
                                      request_respond=self._respond,
                                      request_init=self._int_init)
        cherrypy.engine.subscribe("stop", self.stop)
        cherrypy.engine.subscribe("start", self.start)

    def status(self):
        """Get the processing status. Returns time of last successful full
    synchronisation with CERN/LDAP."""
        with self._cv:
            return self._full[:]

    def stop(self, *args):
        """Tell the task thread to quit."""
        with self._cv:
            self._stopme = True
            self._cv.notifyAll()

    def run(self):
        """Run synchronisation thread."""
        while True:
            now = time.time()
            until = now + self._interval

            try:
                self._sync(now)
            except Exception as e:
                cherrypy.log("SYNC ERROR %s.%s LDAP sync failed %s" %
                             (getattr(e, "__module__", "__builtins__"),
                              e.__class__.__name__, str(e)))
                for line in format_exc().rstrip().split("\n"):
                    cherrypy.log("  " + line)

            with self._cv:
                while not self._stopme and now < until:
                    self._cv.wait(until - now)
                    now = time.time()

                if self._stopme:
                    return

    def _validate(self, input, type, regexp, now):
        """Convenience method to validate ldap data"""
        if isinstance(input, type):
            m = regexp.match(input)
            if m: return m
        if input not in self._warnings:
            #Printing to logs only ascii encoded string
            try:
                input.decode('ascii')
            except UnicodeDecodeError:
                cherrypy.log(
                    "WARNING: ldap data failed validation, binary input, skipped printing."
                )
            else:
                cherrypy.log("WARNING: ldap data failed validation: '%s'" %
                             input[:1000])
            self._warnings[input] = now
        return None

    def _sync(self, now):
        """Perform full synchronisation."""

        # Delete warnings older than 24 hours
        for k, v in self._warnings.items():
            if v < now - 86400:
                del self._warnings[k]

        # Get the user information from CERN/LDAP
        ldresult = self._ldget(self._baseurl)

        # Process each user record returned
        rows = []
        for (dn, attrs) in ldresult:
            u = {
                'username': attrs['sAMAccountName'][0],
                'passwd': 'NeedsToBeUpdated',
                'dn': dn,
                'name': attrs['displayName'][0],
                'email': attrs['mail'][0]
            }
            perid = attrs['employeeID'][0]
            accstatus = attrs['userAccountControl'][0]

            # Do the input validation
            if not ( self._validate(u['username'], str,        RX_USER,   now) and \
                     self._validate(u['name'],     basestring, RX_NAME,   now) and \
                     self._validate(perid,         str,        RX_UID,    now) and \
                     self._validate(u['dn'],       basestring, RX_LDAPDN, now) and \
                     self._validate(u['email'],    str,        RX_EMAIL,  now) and \
                     self._validate(accstatus,     str,        RX_UID,    now) ):
                cherrypy.log('WARNING: ignoring user with invalid non-optional ldap' \
                             ' data: %s' % u['username'])
                continue

            # Only process normal accounts (aka enabled user accounts).
            if accstatus == '512':
                # newdn is the reversed elements from full name + personid + dn
                newdn = ','.join(('CN=' + u['name'] + ',CN=' + perid + ',' +
                                  u['dn']).split(',')[::-1])
                # in case non-Cern DN was mapped to the account, use it as newdn instead
                if 'altSecurityIdentities' in attrs.keys():
                    for altdn in attrs['altSecurityIdentities']:
                        m = self._validate(altdn, basestring, self.RX_ALTDN,
                                           now)
                        if not m: continue
                        # get the last mapped DN not matching the Kerberosservice|CAs
                        newdn = m.group(1)
                u['dn'] = '/' + newdn.replace(',', '/')

                # add this user to the bulk data to be updated
                rows.append(u)

        # check number of rows is sane
        if len(rows) < self._minreq:
            cherrypy.log(
                "ERROR: cowardly refusing full ldap synchronisation"
                " with only %d users received, fewer than required %d" %
                (len(rows), self._minreq))
            return
        cherrypy.log("INFO: found %d valid users in ldap" % len(rows))

        # do the internal api call for the bulk update
        result = []
        self._intreq.put(("PUT", rows, result))
        self._intreq.process()
        self._full = (now, rows, ldresult, result and result[0])

    def _ldget(self, url):
        """Get data from LDAP."""
        result = []

        ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, self._cacertdir)
        l = ldap.initialize(url)
        l.protocol_version = ldap.VERSION3

        # Fetch paged results from ldap server.
        # This is needed because there is a size limit on the CERN ldap server
        # side to return at most 1000 entries per request.
        # For more information, see http://tools.ietf.org/html/rfc2696.html
        srv_ctrls = [
            ldap.controls.SimplePagedResultsControl(criticality=False,
                                                    cookie="")
        ]
        while True:
            srv_ctrls[
                0].size = 1000  # dont necessarily need to match the server limit
            s = l.search_ext(
                'OU=Users,OU=Organic Units,DC=cern,DC=ch',
                ldap.SCOPE_SUBTREE,
                '(memberOf:1.2.840.113556.1.4.1941:=CN=cms-authorized-users,OU=e-groups,OU=Workgroups,DC=cern,DC=ch)',
                [
                    'sAMAccountName', 'displayName', 'employeeID', 'mail',
                    'altSecurityIdentities', 'userAccountControl'
                ],
                serverctrls=srv_ctrls,
                sizelimit=0)
            _, res_data, _, srv_ctrls = l.result3(s, timeout=100)
            result.extend(res_data)
            if not srv_ctrls[0].cookie: break

        if not result:
            raise RuntimeError("Ldap returned no data for %s" % url)
        return result

    def _handle_init(self, c):
        """Initialise curl handle `c`."""
        c.setopt(pycurl.SSL_VERIFYPEER, 0)
        c.setopt(pycurl.SSL_VERIFYHOST, 0)

    def _respond(self, c):
        """Respond to data on curl handle `c`."""
        code = c.getinfo(pycurl.HTTP_CODE)
        if code != 200:
            raise RuntimeError("HTTP status %d for %s" %
                               (code, c.getinfo(pycurl.EFFECTIVE_URL)))
        c.result.append(c.buffer.getvalue())

    def _int_init(self, c, method, rows, result):
        """Initialise curl handle `c` for an internal REST API request."""
        type, body = self._encode(rows)
        headers = self._headers[:] + [("Content-Type", type),
                                      ("Content-Length", str(len(body)))]
        if method == "PUT":
            c.setopt(pycurl.POST, 0)
            c.setopt(pycurl.UPLOAD, 1)
        else:
            assert False, "Unsupported method"
        c.setopt(pycurl.URL, self._inturl)
        c.setopt(pycurl.HTTPHEADER, ["%s: %s" % h for h in headers])
        c.setopt(pycurl.READFUNCTION, StringIO(body).read)
        c.result = result

    def _encode(self, rows):
        """Encode dictionaries in `rows` for POST/PUT body as a HTML form."""
        body, sep = "", ""
        for obj in rows:
            for key, value in obj.iteritems():
                body += "%s%s=%s" % (sep, key,
                                     urllib.quote(value.encode("utf-8")))
                sep = "&"
        return ("application/x-www-form-urlencoded", body)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: DataRebusFetch.py Projeto: dmwm/sitedb

class RebusFetchThread(Thread):
  """A task thread to synchronise federation pledges from REBUS. This runs on
  a single node only in the cluster."""

  _baseurl = "http://gstat-wlcg.cern.ch/apps/pledges/resources/"
  _ident = "SiteDB/%s Python/%s" % \
           (os.environ["SITEDB_VERSION"], ".".join(map(str, sys.version_info[:3])))

  def __init__(self, app, baseurl, mount, cacertdir = "/etc/grid-security/certificates", minreq = 1000, interval = 300, instance = "test"):
    Thread.__init__(self, name = "RebusFetch")
    self.sectoken = "".join(random.sample(string.letters, 30))
    self._inturl = "http://localhost:%d%s/%s/rebusfetch" % \
                   (app.srvconfig.port, mount, instance)
    self._headers = \
      fake_authz_headers(open(app.srvconfig.tools.cms_auth.key_file).read(),
                         method = "Internal", login = self.sectoken,
                         name = self.__class__.__name__, dn = None,
                         roles = {"Global Admin": {"group": ["global"]}}) \
      + [("Accept", "application/json")]
    self._cv = Condition()
    if isinstance(baseurl, str):
      self._baseurl = baseurl

    self._cacertdir = cacertdir
    self._minreq = minreq
    self._interval = interval
    self._stopme = False
    self._full = (0, [], [])
    self._warnings = {}

    self._intreq = RequestManager(num_connections = 2,
                                  user_agent = self._ident,
                                  handle_init = self._handle_init,
                                  request_respond = self._respond,
                                  request_init = self._int_init)
    cherrypy.engine.subscribe("stop", self.stop)
    cherrypy.engine.subscribe("start", self.start)

  def status(self):
    """Get the processing status. Returns time of last successful full
    synchronisation with REBUS."""
    with self._cv:
      return self._full[:]

  def stop(self, *args):
    """Tell the task thread to quit."""
    with self._cv:
      self._stopme = True
      self._cv.notifyAll()

  def run(self):
    """Run synchronisation thread."""
    while True:
      now = time.time()
      until = now + self._interval

      try:
        self._sync(now)
      except Exception as e:
        cherrypy.log("SYNC ERROR %s.%s REBUS sync failed %s"
                     % (getattr(e, "__module__", "__builtins__"),
                        e.__class__.__name__, str(e)))
        for line in format_exc().rstrip().split("\n"):
          cherrypy.log("  " + line)

      with self._cv:
        while not self._stopme and now < until:
          self._cv.wait(until - now)
          now = time.time()

        if self._stopme:
          return

  def _validate(self, input, type, regexp, now):
    """Convenience method to validate ldap data"""
    if isinstance(input, type):
      m = regexp.match(input)
      if m: return m
    if input not in self._warnings:
      cherrypy.log("WARNING: REBUS data failed validation: '%s'" % input)
      self._warnings[input] = now
    return None

  def _sync(self, now):
    """Perform full synchronisation."""

    # Delete warnings older than 24 hours
    for k, v in self._warnings.items():
      if v < now - 86400:
        del self._warnings[k]
    result = []
    # Get the user information from CERN/LDAP
    ldresult = self._ldget(self._baseurl)
    # get data from oracle database
    # Process each user record returned
    rows = []
    id = 0;
    for name, values in ldresult.iteritems():
      for year, val1 in values["pledges"].iteritems():
        i = { 'name' : name, 'country' : values["country"], 'year' : str(year), 'cpu' : str(0), 'disk' : str(0), 'tape' : str(0)}
        if 'CPU' in val1.keys():
          i['cpu'] = str(val1['CPU']/float(1000))
        if 'Disk' in val1.keys():
          i['disk'] = str(val1['Disk'])
        if 'Tape' in val1.keys():
          i['tape'] = str(val1['Tape'])
        rows.append(i);
    gett = self._intreq.put(("PUT", rows, result))
    gettt = self._intreq.process()

  def _ldget(self, url):
    """Get data from REBUS."""
    year_next = date.today().year + 2
    result = self._read_rebus_data(2008, year_next);
    return result

  def _handle_init(self, c):
    """Initialise curl handle `c`."""
    c.setopt(pycurl.SSL_VERIFYPEER, 0)
    c.setopt(pycurl.SSL_VERIFYHOST, 0)

  def _respond(self, c):
    """Respond to data on curl handle `c`."""
    code = c.getinfo(pycurl.HTTP_CODE)
    if code != 200:
      raise RuntimeError("HTTP status %d for %s" % (code, c.getinfo(pycurl.EFFECTIVE_URL)))
    c.result.append(c.buffer.getvalue())

  def _int_init(self, c, method, rows, result):
    """Initialise curl handle `c` for an internal REST API request."""
    if method == "PUT":
      type, body = self._encode(rows)
      headers = self._headers[:] + [("Content-Type", type),
                                    ("Content-Length", str(len(body)))]
      c.setopt(pycurl.POST, 0)
      c.setopt(pycurl.UPLOAD, 1)
      c.setopt(pycurl.URL, self._inturl)
      c.setopt(pycurl.HTTPHEADER, ["%s: %s" % h for h in headers])
      c.setopt(pycurl.READFUNCTION, StringIO(body).read)
      c.result = result
    elif method  == "GET":
      headers = self._headers[:]
      c.setopt(pycurl.URL, self._inturl)
      c.setopt(pycurl.HTTPHEADER, ["%s: %s" % h for h in headers])
      c.result = result
    else:
      assert False, "Unsupported method"

  def _encode(self, rows):
    """Encode dictionaries in `rows` for POST/PUT body as a HTML form."""
    body, sep = "", ""
    for obj in rows:
      for key, value in obj.iteritems():
        body += "%s%s=%s" % (sep, key, urllib2.quote(value.encode("utf-8")))
        sep = "&"
    return ("application/x-www-form-urlencoded", body)

  def _read_rebus_data(self, year, yearTo):
    """REBUS json data fetch from 2008 to year.now + 2. All data returned in dictionary."""
    data = {};
    for x in range(year, yearTo):
      url = "http://wlcg-rebus.cern.ch/apps/pledges/resources/"+str(x)+"/all/json";
      req = urllib2.Request(url)
      opener = urllib2.build_opener()
      f = opener.open(req)
      pledges = json.loads(f.read())
      for index, item in enumerate(pledges):
        federname = item["Federation"];
        cms = item["CMS"];
        pledgetype = item["PledgeType"];
        country = item["Country"];
        if cms and pledgetype:
          if federname in data.keys():
            if x in data[federname]["pledges"].keys():
              data[federname]["pledges"][x][pledgetype]= cms;
            else:
              data[federname]["pledges"][x] = {pledgetype : cms};
          else:
            data[federname] = {"country" : country, "pledges" : {}};
            data[federname]["pledges"] = {x : {pledgetype : cms}};
    return data;