def command(cls, config_ini): config_ini_filepath = os.path.abspath(config_ini) cls.load_config(config_ini_filepath) engine = engine_from_config(config, 'sqlalchemy.') from ckan import model from ckanext.dgu.drupalclient import DrupalClient logging.config.fileConfig(config_ini_filepath) log = logging.getLogger(os.path.basename(__file__)) global global_log global_log = log model.init_model(engine) model.repo.new_revision() cls.drupal_client = DrupalClient({'xmlrpc_domain': 'data.gov.uk', 'xmlrpc_username': '******', 'xmlrpc_password': config.get('dgu.xmlrpc_password')}) publisher_dicts = cls.drupal_client.get_organisation_list() for publisher_dict in publisher_dicts: if not (publisher_dict['status'] == '1' or \ publisher_dict['nid'] == '16248'): # Make an exception for 16248 - Met Office under BIS is correct log.info('Ignoring unpublished publisher with status %r: %r', publisher_dict['status'], publisher_dict) continue cls.add_publisher(publisher_dict['nid']) all_groups = model.Session.query(model.Group).\ filter(model.Group.type == 'organization').order_by('title').all() log.info('Total number of groups: %i', len(all_groups)) log.info('Warnings: %r', warnings)
def test_sync_one(self): groups = model.Session.query(model.Group) assert groups.count() == 0 rev = model.repo.new_revision() rev.author = 'okfn_maintenance' rev.message = 'Syncing organisations.' drupal_client = DrupalClient() sync.sync_organisation(drupal_client, '16203') #HESA model.repo.commit_and_remove() groups = model.Session.query(model.Group) assert_equal(groups.count(), 2) group_hesa = model.Group.get(u'higher-education-statistics-agency') group_bis = model.Group.get( u'department-for-business-innovation-and-skills') assert group_hesa assert group_bis assert_equal(group_hesa.title, 'Higher Education Statistics Agency') assert_equal(group_hesa.extras['drupal_id'], '16203') assert_equal(group_hesa.extras['department_id'], group_bis.id) assert_equal(group_bis.title, 'Department for Business, Innovation and Skills') assert_equal(group_bis.extras['drupal_id'], '11399') assert_equal(group_bis.extras['department_id'], group_bis.id)
def department_or_agency_to_organisation(cls, dept_or_agency, include_id=True, organisation_cache=None, drupal_client_cache=None, ): '''Returns None if not found.''' if organisation_cache is None: organisation_cache = {} if not drupal_client_cache: drupal_client_cache = DrupalClient() if dept_or_agency not in organisation_cache: try: organisation_id = drupal_client_cache.match_organisation(dept_or_agency) except DrupalKeyError: name = canonise_organisation_name(dept_or_agency) try: organisation_id = drupal_client_cache.match_organisation(name) except DrupalKeyError: organisation_id = None if organisation_id: organisation_name = drupal_client_cache.get_organisation_name(organisation_id) organisation_cache[dept_or_agency] = (organisation_name, organisation_id) else: organisation_cache[dept_or_agency] = None if not organisation_cache[dept_or_agency]: return None if include_id: return '%s [%s]' % organisation_cache[dept_or_agency] else: return '%s' % organisation_cache[dept_or_agency][0]
def generate(cls, xmlrpc_settings): drupal = DrupalClient(xmlrpc_settings) orgs = {} has_errors = False orgs_to_lookup = set() orgs_to_lookup.add('Northern Ireland Executive') for org_name in orgs_to_lookup: org_name = canonise_organisation_name(org_name) org_id = drupal.match_organisation(org_name) if org_id == False: log.error('Could not find organisation %r', org_name) has_errors = True continue proper_org_name = drupal.get_organisation_name(org_id) parent_department_id = drupal.get_department_from_organisation( org_id) orgs[org_id] = { 'name': proper_org_name, 'parent_department_id': parent_department_id } f = open(cls.lots_of_orgs_filepath, 'w') try: f.write(json.dumps(orgs)) finally: f.close() if has_errors: print 'Finished with ERRORS' sys.exit(1) else: print 'Finished with SUCCESS'
def get_user_realname(user): from ckanext.dgu.drupalclient import DrupalClient from HTMLParser import HTMLParser if user.name.startswith('user_d'): user_id = user.name[len('user_d'):] html_parser = HTMLParser() try: dc = DrupalClient() properties = dc.get_user_properties(user_id) except Exception, ex: return user.fullname try: first_name = properties['field_first_name']['und'][0]['safe_value'] first_name = html_parser.unescape(first_name) except: first_name = '' try: surname = properties['field_surname']['und'][0]['safe_value'] surname = html_parser.unescape(surname) except: surname = ''
def test_match_organisation(self): drupal_config = get_mock_drupal_config() client = DrupalClient() org_id = client.match_organisation('Ealing PCT') assert_equal(org_id, '2') assert_raises(DrupalKeyError, client.match_organisation, '') assert_raises(DrupalKeyError, client.match_organisation, None)
def command(cls, config_ini): config_ini_filepath = os.path.abspath(config_ini) cls.load_config(config_ini_filepath) engine = engine_from_config(config, 'sqlalchemy.') from ckan import model from ckanext.dgu.drupalclient import DrupalClient, DrupalRequestError import ckanext.dgu.drupalclient logging.config.fileConfig(config_ini_filepath) log = logging.getLogger(os.path.basename(__file__)) global global_log global_log = log model.init_model(engine) model.repo.new_revision() # disable xmlrpc logs ckanext.dgu.drupalclient.log.disabled = True cls.drupal_client = DrupalClient({ 'xmlrpc_domain': 'data.gov.uk', 'xmlrpc_username': '******', 'xmlrpc_password': config.get('dgu.xmlrpc_password') }) f = open('users.csv', 'wb') users = csv.writer(f, quoting=csv.QUOTE_ALL) rows = [] for nid in range(28, 35000): try: user = cls.drupal_client.get_user_properties(nid) except DrupalRequestError, e: if '404' in str(e): # node not a user continue else: raise publishers = user['publishers'] if len(publishers) > 1: log.info('Multiple publishers for user %s [%s]!: %r', user['name'], user['uid'], repr(publishers)[:100]) if len(publishers) > 100: warn('Ignoring user %s [%s] with %i publishers!', user['name'], user['uid'], len(publishers)) continue for publisher in publishers: row = [user['uid'], user['name'], user['mail'], publisher] rows.append(row) log.info('User: %r', row) users.writerow(row) f.flush()
def _do_drupal_login(self, environ, drupal_session_id, new_headers): if self.drupal_client is None: self.drupal_client = DrupalClient() # ask drupal for the drupal_user_id for this session try: drupal_user_id = self.drupal_client.get_user_id_from_session_id( drupal_session_id) except DrupalRequestError, e: log.error('Error checking session with Drupal: %s', e) return
def test_get_user_properties(self): drupal_config = get_mock_drupal_config() test_user_id = '62' expected_user = drupal_config['test_users'][test_user_id] client = DrupalClient() user = client.get_user_properties(test_user_id) assert user assert isinstance(user, dict) assert_equal(user['name'], expected_user['name']) expected_publishers = expected_user['publishers'] assert_equal(user['publishers'], expected_publishers)
def test_get_organisation(self): drupal_config = get_mock_drupal_config() client = DrupalClient() org_name = client.get_organisation_name('2') assert_equal(org_name, 'Ealing PCT') org_name = client.get_organisation_name(2) assert_equal(org_name, 'Ealing PCT') assert_raises(DrupalKeyError, client.get_organisation_name, '999') assert_raises(DrupalKeyError, client.get_organisation_name, '') assert_raises(DrupalKeyError, client.get_organisation_name, None)
def sync(self, write, user): from ckan import model from ckanext.dgu.drupalclient import DrupalClient, DrupalRequestError from ckanext.dgu.authentication.drupal_auth import DrupalUserMapping log = self.log update_keys = set(('email', 'fullname')) drupal = DrupalClient() users = model.Session.query(model.User)\ .filter_by(state='active')\ .filter(model.User.name.like('user_d%')) if user: users = users.filter(model.User.fullname == user) users = users.all() log.info('Drupal users in CKAN: %s', len(users)) for user in users: drupal_user_id = DrupalUserMapping.ckan_user_name_to_drupal_id( user.name) try: drupal_user = drupal.get_user_properties(drupal_user_id) except DrupalRequestError, e: if 'There is no user with ID' in str(e): log.info( stats.add('Removed deleted user', '%s %s' % (drupal_user_id, user.fullname))) if write: user.delete() continue elif 'Access denied for user' in str(e): log.info( stats.add('Removed blocked user', '%s %s' % (drupal_user_id, user.fullname))) if write: user.delete() continue raise DrupalRequestError user_dict = DrupalUserMapping.drupal_user_to_ckan_user(drupal_user) user_changed = False for key in update_keys: if getattr(user, key) != user_dict[key]: log.info( stats.add( 'Updating field %s' % key, '%s %s %s->%s' % (drupal_user_id, user.fullname, getattr( user, key), user_dict[key]))) if write: setattr(user, key, user_dict[key]) user_changed = True if not user_changed: log.info( stats.add('Unchanged user', '%s %s' % (drupal_user_id, user.fullname)))
def _do_drupal_login(self, environ, drupal_session_id, new_headers): '''Given a Drupal cookie\'s session ID, check it with Drupal, create/modify the equivalent CKAN user with properties copied from Drupal and log the person in with auth_tkt and its cookie. ''' if self.drupal_client is None: self.drupal_client = DrupalClient() # ask drupal for the drupal_user_id for this session try: drupal_user_id = self.drupal_client.get_user_id_from_session_id( drupal_session_id) except DrupalRequestError, e: log.error('Error checking session with Drupal: %s', e) return
def test_get_department_from_organisation(self): drupal_config = get_mock_drupal_config() client = DrupalClient() parent_org_id = client.get_department_from_organisation('2') assert_equal(parent_org_id, '7') parent_org_id = client.get_department_from_organisation(2) assert_equal(parent_org_id, '7') assert_raises(DrupalKeyError, client.get_department_from_organisation, '999') assert_raises(DrupalKeyError, client.get_department_from_organisation, '') assert_raises(DrupalKeyError, client.get_department_from_organisation, None)
load_config(config_ini_filepath) register_translator() logging.config.fileConfig(config_ini_filepath) log = logging.getLogger(os.path.basename(__file__)) # import CKAN here, rather than earlier, else their logging wont work from ckan import model from ckanext.dgu.lib import publisher as publisher_lib from ckanext.dgu.drupalclient import DrupalClient, DrupalRequestError from pylons import config from running_stats import StatsList # Drupal API drupal = DrupalClient({ 'xmlrpc_domain': 'data.gov.uk', 'xmlrpc_username': config.get('dgu.xmlrpc_username'), 'xmlrpc_password': config.get('dgu.xmlrpc_password') }) user_emails = {} # name:email def get_email_for_user(user): if user.name not in user_emails: if 'user_d' in user.name: user_drupal_id = user.name.replace('user_d', '') try: user_properties = drupal.get_user_properties(user_drupal_id) except DrupalRequestError, e: user_emails[user.name] = user.email else:
def _drupal_client(cls): if not hasattr(cls, '_drupal_client_cache'): cls._drupal_client_cache = DrupalClient() return cls._drupal_client_cache
def __call__(self, environ, start_response): if self.drupal_client is None: self.drupal_client = DrupalClient() # establish from the cookie whether ckan and drupal are signed in ckan_signed_in = [False] drupal_signed_in = [False] for k, v in environ.items(): key = k.lower() if key == 'http_cookie': ckan_signed_in[0] = is_ckan_signed_in(v) drupal_signed_in[0] = drupal_extract_cookie(v) ckan_signed_in = ckan_signed_in[0] drupal_signed_in = drupal_signed_in[0] environ['drupal.uid'] = None environ['drupal.publishers'] = None new_start_response = start_response if drupal_signed_in and not ckan_signed_in: # get info about the user from drupal and store in environ for # use by main CKAN app user_id = self.drupal_client.get_user_id_from_session_id( drupal_signed_in) res = self.drupal_client.get_user_properties(user_id) environ['drupal.uid'] = res['uid'] environ['drupal.publishers'] = res['publishers'] environ['drupal.name'] = res['name'] from ckan import model from ckan.model.meta import Session def munge(username): username.lower().replace(' ', '_') return username # Add the new Drupal user if they don't already exist. query = Session.query( model.User).filter_by(name=unicode(environ['drupal.uid'])) if not query.count(): user = model.User( name=munge(unicode(environ['drupal.uid'])), fullname=unicode(environ['drupal.name']), about=u'Drupal auto-generated user', ) Session.add(user) Session.commit() else: user = query.one() # We want to store values in the user's cookie, so # prepare the response header with this value, # using auth_tkt to sign it. new_header = environ['repoze.who.plugins']['auth_tkt'].remember( environ, { 'repoze.who.userid': environ['drupal.uid'], 'tokens': '', 'userdata': '', }) # e.g. new_header = [('Set-Cookie', 'bob=ab48fe; Path=/;')] cookie_template = new_header[0][1].split('; ') cookie_string = '' for name, value in [ ('ckan_apikey', user.apikey), ('ckan_display_name', user.fullname), ('ckan_user', user.name), ]: cookie_string += '; %s="%s"' % (name, value) new_cookie = cookie_template[:] new_cookie[0] = '%s="%s"' % (name, value) new_header.append(('Set-Cookie', str('; '.join(new_cookie)))) # Also need these cookies to work too: # ckan_apikey # Value "3a51edc6-6461-46b8-bfe2-57445cbdeb2b" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # # # Name ckan_display_name # Value "James Gardner" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # # # Name ckan_user # Value "4466" # Host catalogue.dev.dataco.coi.gov.uk # Path / # Secure No # Expires At End Of Session # @@@ Need to add the headers to the request too so that the rest of the stack can sign the user in. #Cookie: __utma=217959684.178461911.1286034407.1286034407.1286178542.2; __utmz=217959684.1286178542.2.2.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=coi%20london; DRXtrArgs=James+Gardner; DRXtrArgs2=3e174e7f1e1d3fab5ca138c0a023e13a; SESS9854522e7c5dba5831db083c5372623c=4160a72a4d6831abec1ac57d7b5a59eb; auth_tkt="a578c4a0d21bdbde7f80cd271d60b66f4ceabc3f4466!"; ckan_apikey="3a51edc6-6461-46b8-bfe2-57445cbdeb2b"; ckan_display_name="James Gardner"; ckan_user="******" # There is a bug(/feature?) in line 628 of Cookie.py that means # it can't load from unicode strings. This causes Beaker to fail # unless the value here is a string if not environ.get('HTTP_COOKIE'): environ['HTTP_COOKIE'] += str(cookie_string) else: environ['HTTP_COOKIE'] = str(cookie_string[2:]) def cookie_setting_start_response(status, headers, exc_info=None): headers += new_header return start_response(status, headers, exc_info) new_start_response = cookie_setting_start_response return self.app(environ, new_start_response)
def __init__(self, xmlrpc_settings=None): self._drupal_client_cache = DrupalClient(xmlrpc_settings) self._organisation_cache = {} # {dept_or_agency:('name', 'id')}
def sync(xmlrpc_settings): drupal_client = DrupalClient(xmlrpc_settings) for org_id in range(10000, 20000): sync_organisation(drupal_client, org_id)