def test_latest(self): svc = init_service("token.dat", "credentials.json") try: downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc)) downloader.latest() except Exception as e: assert False, e
def command(config_file): # Import ckan as it changes the dependent packages imported from dump_analysis import (get_run_info, TxtAnalysisFile, CsvAnalysisFile, DumpAnalysisOptions, DumpAnalysis) from pylons import config # settings ckan_instance_name = os.path.basename(config_file).replace('.ini', '') if ckan_instance_name not in ['development', 'dgutest']: default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name default_analysis_dir = '/var/lib/ckan/%s/static/dump_analysis' % ckan_instance_name default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name default_openspending_reports_dir = '/var/lib/ckan/%s/openspending_reports' % ckan_instance_name else: # test purposes default_dump_dir = '~/dump' default_analysis_dir = '~/dump_analysis' default_backup_dir = '~/backups' default_openspending_reports_dir = '~/openspending_reports' dump_dir = os.path.expanduser(config.get('ckan.dump_dir', default_dump_dir)) analysis_dir = os.path.expanduser( config.get('ckan.dump_analysis_dir', default_analysis_dir)) backup_dir = os.path.expanduser( config.get('ckan.backup_dir', default_backup_dir)) openspending_reports_dir = os.path.expanduser( config.get('dgu.openspending_reports_dir', default_openspending_reports_dir)) ga_token_filepath = os.path.expanduser( config.get('googleanalytics.token.filepath', '')) dump_filebase = config.get('ckan.dump_filename_base', 'data.gov.uk-ckan-meta-data-%Y-%m-%d') dump_analysis_filebase = config.get('ckan.dump_analysis_base', 'data.gov.uk-analysis') backup_filebase = config.get('ckan.backup_filename_base', ckan_instance_name + '.%Y-%m-%d.pg_dump') tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp') openspending_reports_url = config.get( 'ckan.openspending_reports_url', 'http://data.etl.openspending.org/uk25k/report/') log = logging.getLogger('ckanext.dgu.bin.gov_daily') log.info('----------------------------') log.info('Starting daily script') start_time = datetime.datetime.today() import ckan.model as model import ckan.lib.dumper as dumper from ckanext.dgu.lib.inventory import inventory_dumper # Check database looks right num_packages_before = model.Session.query( model.Package).filter_by(state='active').count() log.info('Number of existing active packages: %i' % num_packages_before) if num_packages_before < 2: log.error('Expected more packages.') sys.exit(1) elif num_packages_before < 2500: log.warn('Expected more packages.') # Analytics try: if ga_token_filepath: if run_task('analytics'): log.info('Getting analytics for this month') from ckanext.ga_report.download_analytics import DownloadAnalytics from ckanext.ga_report.ga_auth import (init_service, get_profile_id) try: token, svc = init_service(ga_token_filepath, None) except TypeError: log.error( 'Could not complete authorization for Google Analytics.' 'Have you correctly run the getauthtoken task and ' 'specified the correct token file?') sys.exit(0) downloader = DownloadAnalytics(svc, token=token, profile_id=get_profile_id(svc), delete_first=False, skip_url_stats=False) downloader.latest() else: log.info( 'No token specified, so not downloading Google Analytics data') except Exception, exc_analytics: log.error("Failed to process Google Analytics data") log.exception(exc_analytics)
def command(config_file): # Import ckan as it changes the dependent packages imported from dump_analysis import (get_run_info, TxtAnalysisFile, CsvAnalysisFile, DumpAnalysisOptions, DumpAnalysis) from pylons import config # settings ckan_instance_name = os.path.basename(config_file).replace('.ini', '') if ckan_instance_name not in ['development', 'dgutest']: default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name default_analysis_dir = '/var/lib/ckan/%s/static/dump_analysis' % ckan_instance_name default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name default_openspending_reports_dir = '/var/lib/ckan/%s/openspending_reports' % ckan_instance_name else: # test purposes default_dump_dir = '~/dump' default_analysis_dir = '~/dump_analysis' default_backup_dir = '~/backups' default_openspending_reports_dir = '~/openspending_reports' dump_dir = os.path.expanduser(config.get('ckan.dump_dir', default_dump_dir)) analysis_dir = os.path.expanduser(config.get('ckan.dump_analysis_dir', default_analysis_dir)) backup_dir = os.path.expanduser(config.get('ckan.backup_dir', default_backup_dir)) openspending_reports_dir = os.path.expanduser(config.get('dgu.openspending_reports_dir', default_openspending_reports_dir)) ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) dump_filebase = config.get('ckan.dump_filename_base', 'data.gov.uk-ckan-meta-data-%Y-%m-%d') dump_analysis_filebase = config.get('ckan.dump_analysis_base', 'data.gov.uk-analysis') backup_filebase = config.get('ckan.backup_filename_base', ckan_instance_name + '.%Y-%m-%d.pg_dump') tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp') openspending_reports_url = config.get('ckan.openspending_reports_url', 'http://data.etl.openspending.org/uk25k/report/') log = logging.getLogger('ckanext.dgu.bin.gov_daily') log.info('----------------------------') log.info('Starting daily script') start_time = datetime.datetime.today() import ckan.model as model import ckan.lib.dumper as dumper from ckanext.dgu.lib.inventory import inventory_dumper # Check database looks right num_packages_before = model.Session.query(model.Package).filter_by(state='active').count() log.info('Number of existing active packages: %i' % num_packages_before) if num_packages_before < 2: log.error('Expected more packages.') sys.exit(1) elif num_packages_before < 2500: log.warn('Expected more packages.') # Analytics try: if ga_token_filepath: if run_task('analytics'): log.info('Getting analytics for this month') from ckanext.ga_report.download_analytics import DownloadAnalytics from ckanext.ga_report.ga_auth import (init_service, get_profile_id) if not os.path.exists(ga_token_filepath): log.error('GA Token does not exist: %s - not downloading ' 'analytics' % ga_token_filepath) else: try: token, svc = init_service(ga_token_filepath, None) except TypeError, e: log.error('Could not complete authorization for Google ' 'Analytics. Have you correctly run the ' 'getauthtoken task and specified the correct ' 'token file?\nError: %s', e) sys.exit(1) downloader = DownloadAnalytics(svc, token=token, profile_id=get_profile_id(svc), delete_first=False) downloader.latest() else:
def command(config_file): # Import ckan as it changes the dependent packages imported from dump_analysis import (get_run_info, TxtAnalysisFile, CsvAnalysisFile, DumpAnalysisOptions, DumpAnalysis) from pylons import config # settings ckan_instance_name = os.path.basename(config_file).replace('.ini', '') if ckan_instance_name not in ['development', 'dgutest']: default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name default_analysis_dir = '/var/lib/ckan/%s/static/dump_analysis' % ckan_instance_name default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name default_openspending_reports_dir = '/var/lib/ckan/%s/openspending_reports' % ckan_instance_name else: # test purposes default_dump_dir = '~/dump' default_analysis_dir = '~/dump_analysis' default_backup_dir = '~/backups' default_openspending_reports_dir = '~/openspending_reports' dump_dir = os.path.expanduser(config.get('ckan.dump_dir', default_dump_dir)) analysis_dir = os.path.expanduser(config.get('ckan.dump_analysis_dir', default_analysis_dir)) backup_dir = os.path.expanduser(config.get('ckan.backup_dir', default_backup_dir)) openspending_reports_dir = os.path.expanduser(config.get('dgu.openspending_reports_dir', default_openspending_reports_dir)) ga_token_filepath = os.path.expanduser(config.get('googleanalytics.token.filepath', '')) dump_filebase = config.get('ckan.dump_filename_base', 'data.gov.uk-ckan-meta-data-%Y-%m-%d') dump_analysis_filebase = config.get('ckan.dump_analysis_base', 'data.gov.uk-analysis') backup_filebase = config.get('ckan.backup_filename_base', ckan_instance_name + '.%Y-%m-%d.pg_dump') tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp') openspending_reports_url = config.get('ckan.openspending_reports_url', 'http://data.etl.openspending.org/uk25k/report/') log = logging.getLogger('ckanext.dgu.bin.gov_daily') log.info('----------------------------') log.info('Starting daily script') start_time = datetime.datetime.today() import ckan.model as model import ckan.lib.dumper as dumper # Check database looks right num_packages_before = model.Session.query(model.Package).count() log.info('Number of existing packages: %i' % num_packages_before) if num_packages_before < 2: log.error('Expected more packages.') sys.exit(1) elif num_packages_before < 2500: log.warn('Expected more packages.') # Analytics if ga_token_filepath: if run_task('analytics'): log.info('Getting analytics for this month') from ckanext.ga_report.download_analytics import DownloadAnalytics from ckanext.ga_report.ga_auth import (init_service, get_profile_id) try: svc = init_service(ga_token_filepath, None) except TypeError: log.error('Could not complete authorization for Google Analytics.' 'Have you correctly run the getauthtoken task and ' 'specified the correct token file?') sys.exit(0) downloader = DownloadAnalytics(svc, profile_id=get_profile_id(svc), delete_first=False, skip_url_stats=False) downloader.latest() else: log.info('No token specified, so not downloading Google Analytics data') # Copy openspending reports if run_task('openspending'): log.info('Copying in OpenSpending reports') if not os.path.exists(openspending_reports_dir): log.info('Creating dump dir: %s' % openspending_reports_dir) os.makedirs(openspending_reports_dir) try: publisher_response = urllib2.urlopen('http://data.gov.uk/api/rest/group').read() except urllib2.HTTPError, e: log.error('Could not get list of publishers for OpenSpending reports: %s', e) else: try: publishers = json.loads(publisher_response) assert isinstance(publishers, list), publishers assert len(publishers) > 500, len(publishers) log.info('Got list of %i publishers starting: %r', len(publishers), publishers[:3]) except Exception, e: log.error('Could not decode list of publishers for OpenSpending reports: %s', e) else:
def command(config_file): # Import ckan as it changes the dependent packages imported from dump_analysis import get_run_info, TxtAnalysisFile, CsvAnalysisFile, DumpAnalysisOptions, DumpAnalysis from pylons import config # settings ckan_instance_name = os.path.basename(config_file).replace(".ini", "") if ckan_instance_name not in ["development", "dgutest"]: default_dump_dir = "/var/lib/ckan/%s/static/dump" % ckan_instance_name default_analysis_dir = "/var/lib/ckan/%s/static/dump_analysis" % ckan_instance_name default_backup_dir = "/var/backups/ckan/%s" % ckan_instance_name default_openspending_reports_dir = "/var/lib/ckan/%s/openspending_reports" % ckan_instance_name else: # test purposes default_dump_dir = "~/dump" default_analysis_dir = "~/dump_analysis" default_backup_dir = "~/backups" default_openspending_reports_dir = "~/openspending_reports" dump_dir = os.path.expanduser(config.get("ckan.dump_dir", default_dump_dir)) analysis_dir = os.path.expanduser(config.get("ckan.dump_analysis_dir", default_analysis_dir)) backup_dir = os.path.expanduser(config.get("ckan.backup_dir", default_backup_dir)) openspending_reports_dir = os.path.expanduser( config.get("dgu.openspending_reports_dir", default_openspending_reports_dir) ) ga_token_filepath = os.path.expanduser(config.get("googleanalytics.token.filepath", "")) dump_filebase = config.get("ckan.dump_filename_base", "data.gov.uk-ckan-meta-data-%Y-%m-%d") dump_analysis_filebase = config.get("ckan.dump_analysis_base", "data.gov.uk-analysis") backup_filebase = config.get("ckan.backup_filename_base", ckan_instance_name + ".%Y-%m-%d.pg_dump") tmp_filepath = config.get("ckan.temp_filepath", "/tmp/dump.tmp") openspending_reports_url = config.get( "ckan.openspending_reports_url", "http://data.etl.openspending.org/uk25k/report/" ) log = logging.getLogger("ckanext.dgu.bin.gov_daily") log.info("----------------------------") log.info("Starting daily script") start_time = datetime.datetime.today() import ckan.model as model import ckan.lib.dumper as dumper from ckanext.dgu.lib.inventory import inventory_dumper # Check database looks right num_packages_before = model.Session.query(model.Package).count() log.info("Number of existing packages: %i" % num_packages_before) if num_packages_before < 2: log.error("Expected more packages.") sys.exit(1) elif num_packages_before < 2500: log.warn("Expected more packages.") # Analytics try: if ga_token_filepath: if run_task("analytics"): log.info("Getting analytics for this month") from ckanext.ga_report.download_analytics import DownloadAnalytics from ckanext.ga_report.ga_auth import init_service, get_profile_id try: token, svc = init_service(ga_token_filepath, None) except TypeError: log.error( "Could not complete authorization for Google Analytics." "Have you correctly run the getauthtoken task and " "specified the correct token file?" ) sys.exit(0) downloader = DownloadAnalytics( svc, token=token, profile_id=get_profile_id(svc), delete_first=False, skip_url_stats=False ) downloader.latest() else: log.info("No token specified, so not downloading Google Analytics data") except Exception, exc_analytics: log.error("Failed to process Google Analytics data") log.exception(exc_analytics)