def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='publisher').all()) written_publishers = set() for top_level_pub in publisher.get_top_level(): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_publishers = publisher.get_parents(pub) if len(parent_publishers) > 1: warn( 'Publisher has multiple parents. Just using first: %s %s', pub.name, parent_publishers) parent_pub_name = parent_publishers[ 0].name if parent_publishers else '' parent_pub_title = parent_publishers[ 0].title if parent_publishers else '' wdtk_id = '' #pub.extras csv_row_values = \ (pub.id, pub.name, pub.title, parent_pub_name, parent_pub_title, dict(pub.extras).get('abbreviation', ''), dict(pub.extras).get('wdtk-title', ''), dict(pub.extras).get('website-url', ''), dict(pub.extras).get('contact-email', ''), dict(pub.extras).get('foi-email', ''), dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) # assume they are all strings csv_row_str = ','.join( ['"%s"' % cell for cell in csv_row_values]) log.info(csv_row_str) f.write(csv_row_str.encode('utf8') + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))
def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='publisher').all()) written_publishers = set() for top_level_pub in publisher.get_top_level(): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_publishers = publisher.get_parents(pub) if len(parent_publishers) > 1: warn('Publisher has multiple parents. Just using first: %s %s', pub.name, parent_publishers) parent_pub_name = parent_publishers[0].name if parent_publishers else '' parent_pub_title = parent_publishers[0].title if parent_publishers else '' wdtk_id = ''#pub.extras csv_row_values = \ (pub.id, pub.name, pub.title, parent_pub_name, parent_pub_title, dict(pub.extras).get('abbreviation', ''), dict(pub.extras).get('wdtk-title', ''), dict(pub.extras).get('website-url', ''), dict(pub.extras).get('contact-email', ''), dict(pub.extras).get('foi-email', ''), dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) # assume they are all strings csv_row_str = ','.join(['"%s"' % cell for cell in csv_row_values]) log.info(csv_row_str) f.write(csv_row_str.encode('utf8') + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))
def export(cls, csv_filepath): csv_filepath = os.path.abspath(csv_filepath) log = global_log from ckan import model f = open(csv_filepath, 'w') f.write(cls.header) number_of_publishers = 0 expected_publishers = set(model.Session.query(model.Group).\ filter_by(state='active').\ filter_by(type='publisher').all()) written_publishers = set() for top_level_pub in publisher.get_top_level(): for pub in publisher.go_down_tree(top_level_pub): number_of_publishers += 1 if pub in written_publishers: warn('publisher written twice: %s %s', pub.name, pub.id) written_publishers.add(pub) parent_pub_title = top_level_pub.title if top_level_pub.id != pub.id else '' csv_line = '"%s","%s","%s","%s","%s"' % \ (pub.id, pub.title, parent_pub_title, dict(pub.extras).get('category', ''), dict(pub.extras).get('spending_published_by', ''), ) log.info(csv_line) f.write(csv_line + '\n') f.flush() f.close() # checks expected_number_of_publishers = len(expected_publishers) assert_equal(sorted(written_publishers), sorted(set(written_publishers))) assert_equal(expected_publishers, set(written_publishers))