Exemplo n.º 1
0
def command():
    USAGE = '''Daily script for government
    Usage: python %s [config.ini]
    ''' % sys.argv[0]
    if len(sys.argv) < 2 or sys.argv[1] in ('--help', '-h'):
        err = 'Error: Please specify config file.'
        print USAGE, err
        logging.error('%s\n%s' % (USAGE, err))
        sys.exit(1)
    config_file = sys.argv[1]
    path = os.path.abspath(config_file)

    load_config(path)

    from pylons import config

    # settings
    ckan_instance_name = os.path.basename(config_file).replace('.ini', '')
    if ckan_instance_name != 'development':
        default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name
        default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name
        default_log_dir = '/var/log/ckan/%s' % ckan_instance_name
    else:
        # test purposes
        default_dump_dir = '~/dump'
        default_backup_dir = '~/backups'
        default_log_dir = '~'
    dump_dir = os.path.expanduser(config.get('ckan.dump_dir',
                                             default_dump_dir))
    backup_dir = os.path.expanduser(config.get('ckan.backup_dir',
                                               default_backup_dir))
    log_dir = os.path.expanduser(config.get('ckan.log_dir',
                                            default_log_dir))
    dump_filebase = config.get('ckan.dump_filename_base',
                               'data.gov.uk-ckan-meta-data-%Y-%m-%d')
    dump_analysis_filebase = config.get('ckan.dump_analysis_base',
                               'data.gov.uk-analysis')
    backup_filebase = config.get('ckan.backup_filename_base',
                                 ckan_instance_name + '.%Y-%m-%d.pg_dump')
    log_filepath = os.path.join(log_dir, 'gov-daily.log')
    print 'Logging to: %s' % log_filepath
    tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp')
    logging.basicConfig(filename=log_filepath, level=logging.INFO)
    logging.info('----------------------------')
    logging.info('Starting daily script')
    start_time = datetime.datetime.today()
    logging.info(start_time.strftime('%H:%M %d-%m-%Y'))

    import ckan.model as model
    import ckan.lib.dumper as dumper

    # Check database looks right
    num_packages_before = model.Session.query(model.Package).count()
    logging.info('Number of existing packages: %i' % num_packages_before)
    if num_packages_before < 2:
        logging.error('Expected more packages.')
        sys.exit(1)
    elif num_packages_before < 2500:
        logging.warn('Expected more packages.')

    # Create dumps for users
    logging.info('Creating database dump')
    if not os.path.exists(dump_dir):
        logging.info('Creating dump dir: %s' % dump_dir)
        os.makedirs(dump_dir)
    query = model.Session.query(model.Package)
    dump_file_base = start_time.strftime(dump_filebase)
    logging.getLogger("MARKDOWN").setLevel(logging.WARN)
    for file_type, dumper_ in (('csv', dumper.SimpleDumper().dump_csv),
                              ('json', dumper.SimpleDumper().dump_json),
                             ):
        dump_filename = '%s.%s' % (dump_file_base, file_type)
        dump_filepath = os.path.join(dump_dir, dump_filename + '.zip')
        tmp_file = open(tmp_filepath, 'w')
        logging.info('Creating %s file: %s' % (file_type, dump_filepath))
        dumper_(tmp_file, query)
        tmp_file.close()
        dump_file = zipfile.ZipFile(dump_filepath, 'w', zipfile.ZIP_DEFLATED)
        dump_file.write(tmp_filepath, dump_filename)
        dump_file.close()
    report_time_taken()

    # Dump analysis
    logging.info('Creating dump analysis')
    json_dump_filepath = os.path.join(dump_dir, '%s.json.zip' % dump_file_base)
    txt_filepath = os.path.join(dump_dir, dump_analysis_filebase + '.txt')
    csv_filepath = os.path.join(dump_dir, dump_analysis_filebase + '.csv')
    run_info = get_run_info()
    options = DumpAnalysisOptions(analyse_by_source=True)
    analysis = DumpAnalysis(json_dump_filepath, options)
    logging.info('Saving dump analysis')
    output_types = (
        # (output_filepath, analysis_file_class)
        (txt_filepath, TxtAnalysisFile),
        (csv_filepath, CsvAnalysisFile),
        )
    analysis_files = {} # analysis_file_class, analysis_file
    for output_filepath, analysis_file_class in output_types:
        logging.info('Saving dump analysis to: %s' % output_filepath)
        analysis_file = analysis_file_class(output_filepath, run_info)
        analysis_file.add_analysis(analysis.date, analysis.analysis_dict)
        analysis_file.save()
    report_time_taken()

    # Create complete backup
    logging.info('Creating database backup')
    if not os.path.exists(backup_dir):
        logging.info('Creating backup dir: %s' % backup_dir)
        os.makedirs(backup_dir)

    db_details = get_db_config(config)
    pg_dump_filename = start_time.strftime(backup_filebase)
    pg_dump_filepath = os.path.join(backup_dir, pg_dump_filename)
    cmd = 'export PGPASSWORD=%(db_pass)s&&pg_dump ' % db_details
    for pg_dump_option, db_details_key in (('U', 'db_user'),
                                           ('h', 'db_host'),
                                           ('p', 'db_port')):
        if db_details.get(db_details_key):
            cmd += '-%s %s ' % (pg_dump_option, db_details[db_details_key])
    cmd += '%(db_name)s' % db_details + ' > %s' % pg_dump_filepath
    logging.info('Backup command: %s' % cmd)
    ret = os.system(cmd)
    if ret == 0:
        logging.info('Backup successful: %s' % pg_dump_filepath)
        logging.info('Zipping up backup')
        pg_dump_zipped_filepath = pg_dump_filepath + '.gz'
        cmd = 'gzip %s' % pg_dump_filepath
        logging.info('Zip command: %s' % cmd)
        ret = os.system(cmd)
        if ret == 0:
            logging.info('Backup gzip successful: %s' % pg_dump_zipped_filepath)
        else:
            logging.error('Backup gzip error: %s' % ret)
    else:
        logging.error('Backup error: %s' % ret)

    # Log footer
    report_time_taken()
    logging.info('Finished daily script')
    logging.info('----------------------------')
Exemplo n.º 2
0
    # Dump analysis
    if run_task('dump_analysis'):
        log.info('Doing dump analysis')
        dump_file_base = start_time.strftime(dump_filebase)
        json_dump_filepath = os.path.join(dump_dir,
                                          '%s.json.zip' % dump_file_base)
        txt_filepath = os.path.join(analysis_dir,
                                    dump_analysis_filebase + '.txt')
        csv_filepath = os.path.join(analysis_dir,
                                    dump_analysis_filebase + '.csv')
        log.info('Input: %s', json_dump_filepath)
        log.info('Output: %s & %s', txt_filepath, csv_filepath)
        if not os.path.exists(analysis_dir):
            log.info('Creating dump analysis dir: %s' % analysis_dir)
            os.makedirs(analysis_dir)
        run_info = get_run_info()
        options = DumpAnalysisOptions(analyse_by_source=True)
        analysis = DumpAnalysis(json_dump_filepath, options)
        output_types = (
            # (output_filepath, analysis_file_class)
            (txt_filepath, TxtAnalysisFile),
            (csv_filepath, CsvAnalysisFile),
        )
        for output_filepath, analysis_file_class in output_types:
            log.info('Saving dump analysis to: %s' % output_filepath)
            analysis_file = analysis_file_class(output_filepath, run_info)
            analysis_file.add_analysis(analysis.date, analysis.analysis_dict)
            analysis_file.save()
        report_time_taken(log)

    if run_task('backup'):
Exemplo n.º 3
0
            os.remove(tmp_filepath)
        report_time_taken(log)

    # Dump analysis
    if run_task('dump_analysis'):
        log.info('Doing dump analysis')
        dump_file_base = start_time.strftime(dump_filebase)
        json_dump_filepath = os.path.join(dump_dir, '%s.json.zip' % dump_file_base)
        txt_filepath = os.path.join(analysis_dir, dump_analysis_filebase + '.txt')
        csv_filepath = os.path.join(analysis_dir, dump_analysis_filebase + '.csv')
        log.info('Input: %s', json_dump_filepath)
        log.info('Output: %s & %s', txt_filepath, csv_filepath)
        if not os.path.exists(analysis_dir):
            log.info('Creating dump analysis dir: %s' % analysis_dir)
            os.makedirs(analysis_dir)
        run_info = get_run_info()
        options = DumpAnalysisOptions(analyse_by_source=True)
        analysis = DumpAnalysis(json_dump_filepath, options)
        output_types = (
            # (output_filepath, analysis_file_class)
            (txt_filepath, TxtAnalysisFile),
            (csv_filepath, CsvAnalysisFile),
            )
        for output_filepath, analysis_file_class in output_types:
            log.info('Saving dump analysis to: %s' % output_filepath)
            analysis_file = analysis_file_class(output_filepath, run_info)
            analysis_file.add_analysis(analysis.date, analysis.analysis_dict)
            analysis_file.save()
        report_time_taken(log)

    if run_task('backup'):