示例#1
0
文件: cli.py 项目: arkka/ckan
 def simple_dump_json(self):
     import ckan.model as model
     if len(self.args) < 2:
         print 'Need json file path'
         return
     dump_filepath = self.args[1]
     import ckan.lib.dumper as dumper
     dump_file = open(dump_filepath, 'w')
     dumper.SimpleDumper().dump(dump_file, format='json')
示例#2
0
 def simple_dump_csv(self):
     from ckan import model
     if len(self.args) < 2:
         print 'Need csv file path'
         return
     dump_filepath = self.args[1]
     import ckan.lib.dumper as dumper
     dump_file = open(dump_filepath, 'w')
     dumper.SimpleDumper().dump(dump_file, format='csv')
示例#3
0
def exportPackages(query):

    #create temporary directory
    tmp_dir = tempfile.mkdtemp()

    #dump package to json
    file_json = open('%s/package.json' % tmp_dir, 'w')
    dumper.SimpleDumper().dump_json(file_json, query)
    file_json.flush()

    #dump package to csv
    file_csv = open('%s/package.csv' % tmp_dir, 'w')
    dumper.SimpleDumper().dump_csv(file_csv, query)
    file_csv.flush()

    #add resource files to tmp directory
    for pkg in query:
        pkg_dict = pkg.as_dict()
        resources = pkg_dict['resources']

        for resource in resources:
            if resource['url_type'] == 'upload':
                #copy file
                try:
                    upload = uploader.ResourceUpload(resource)
                    filepath = upload.get_path(resource['id'])
                    shutil.copyfile(
                        filepath,
                        '%s/%s_%s' % (tmp_dir, resource['id'],
                                      resource['url'].split('/')[-1]))
                except:
                    pass

    #zip directory up
    file_zip_path = '%s.zip' % tmp_dir
    file_zip = zipfile.ZipFile(file_zip_path, 'w')
    zipdir(tmp_dir, file_zip)
    file_zip.close()

    #remove tmp directory
    shutil.rmtree(tmp_dir)

    return file_zip_path
示例#4
0
 def simple_dump_json(self):
     from ckan import model
     if len(self.args) < 2:
         print 'Need json file path'
         return
     dump_filepath = self.args[1]
     import ckan.lib.dumper as dumper
     dump_file = open(dump_filepath, 'w')
     query = model.Session.query(model.Package)
     dumper.SimpleDumper().dump_json(dump_file, query)
示例#5
0
                        finally:
                            f.close()
                        log.info('Wrote openspending report %s', filepath)

    # Create dump for users
    if run_task('dump'):
        log.info('Creating database dump')
        if not os.path.exists(dump_dir):
            log.info('Creating dump dir: %s' % dump_dir)
            os.makedirs(dump_dir)
        query = model.Session.query(
            model.Package).filter(model.Package.state == 'active')
        dump_file_base = start_time.strftime(dump_filebase)
        logging.getLogger("MARKDOWN").setLevel(logging.WARN)
        for file_type, dumper_ in (
            ('csv', dumper.SimpleDumper().dump_csv),
            ('json', dumper.SimpleDumper().dump_json),
            ('unpublished.csv', inventory_dumper),
        ):
            dump_filename = '%s.%s' % (dump_file_base, file_type)
            dump_filepath = os.path.join(dump_dir, dump_filename + '.zip')
            tmp_file = open(tmp_filepath, 'w+b')
            log.info('Creating %s file: %s' % (file_type, dump_filepath))
            dumper_(tmp_file, query)
            tmp_file.close()
            log.info('Dumped data file is %dMb in size' %
                     (os.path.getsize(tmp_filepath) / (1024 * 1024)))
            dump_file = zipfile.ZipFile(dump_filepath, 'w',
                                        zipfile.ZIP_DEFLATED)
            dump_file.write(tmp_filepath, dump_filename)
            dump_file.close()
示例#6
0
import tempfile
import os
from time import time

import ckan
from ckan.tests.legacy import *
import ckan.model as model
import ckan.lib.dumper as dumper
from ckan.common import json
from ckan.lib.dumper import Dumper
simple_dumper = dumper.SimpleDumper()


class TestSimpleDump(TestController):
    @classmethod
    def setup_class(self):
        model.repo.rebuild_db()
        CreateTestData.create()

    @classmethod
    def teardown_class(self):
        model.Session.remove()
        model.repo.rebuild_db()

    def test_simple_dump_csv(self):
        dump_file = tempfile.TemporaryFile()
        simple_dumper.dump(dump_file, 'csv')
        dump_file.seek(0)
        res = dump_file.read()
        assert 'annakarenina' in res, res
        assert 'tolstoy' in res, res
示例#7
0
def command():
    USAGE = '''Daily script for government
    Usage: python %s [config.ini]
    ''' % sys.argv[0]
    if len(sys.argv) < 2 or sys.argv[1] in ('--help', '-h'):
        err = 'Error: Please specify config file.'
        print USAGE, err
        logging.error('%s\n%s' % (USAGE, err))
        sys.exit(1)
    config_file = sys.argv[1]
    path = os.path.abspath(config_file)

    load_config(path)

    from pylons import config

    # settings
    ckan_instance_name = os.path.basename(config_file).replace('.ini', '')
    if ckan_instance_name != 'development':
        default_dump_dir = '/var/lib/ckan/%s/static/dump' % ckan_instance_name
        default_backup_dir = '/var/backups/ckan/%s' % ckan_instance_name
        default_log_dir = '/var/log/ckan/%s' % ckan_instance_name
    else:
        # test purposes
        default_dump_dir = '~/dump'
        default_backup_dir = '~/backups'
        default_log_dir = '~'
    dump_dir = os.path.expanduser(config.get('ckan.dump_dir',
                                             default_dump_dir))
    backup_dir = os.path.expanduser(config.get('ckan.backup_dir',
                                               default_backup_dir))
    log_dir = os.path.expanduser(config.get('ckan.log_dir',
                                            default_log_dir))
    dump_filebase = config.get('ckan.dump_filename_base',
                               'data.gov.uk-ckan-meta-data-%Y-%m-%d')
    dump_analysis_filebase = config.get('ckan.dump_analysis_base',
                               'data.gov.uk-analysis')
    backup_filebase = config.get('ckan.backup_filename_base',
                                 ckan_instance_name + '.%Y-%m-%d.pg_dump')
    log_filepath = os.path.join(log_dir, 'gov-daily.log')
    print 'Logging to: %s' % log_filepath
    tmp_filepath = config.get('ckan.temp_filepath', '/tmp/dump.tmp')
    logging.basicConfig(filename=log_filepath, level=logging.INFO)
    logging.info('----------------------------')
    logging.info('Starting daily script')
    start_time = datetime.datetime.today()
    logging.info(start_time.strftime('%H:%M %d-%m-%Y'))

    import ckan.model as model
    import ckan.lib.dumper as dumper

    # Check database looks right
    num_packages_before = model.Session.query(model.Package).count()
    logging.info('Number of existing packages: %i' % num_packages_before)
    if num_packages_before < 2:
        logging.error('Expected more packages.')
        sys.exit(1)
    elif num_packages_before < 2500:
        logging.warn('Expected more packages.')

    # Create dumps for users
    logging.info('Creating database dump')
    if not os.path.exists(dump_dir):
        logging.info('Creating dump dir: %s' % dump_dir)
        os.makedirs(dump_dir)
    query = model.Session.query(model.Package)
    dump_file_base = start_time.strftime(dump_filebase)
    logging.getLogger("MARKDOWN").setLevel(logging.WARN)
    for file_type, dumper_ in (('csv', dumper.SimpleDumper().dump_csv),
                              ('json', dumper.SimpleDumper().dump_json),
                             ):
        dump_filename = '%s.%s' % (dump_file_base, file_type)
        dump_filepath = os.path.join(dump_dir, dump_filename + '.zip')
        tmp_file = open(tmp_filepath, 'w')
        logging.info('Creating %s file: %s' % (file_type, dump_filepath))
        dumper_(tmp_file, query)
        tmp_file.close()
        dump_file = zipfile.ZipFile(dump_filepath, 'w', zipfile.ZIP_DEFLATED)
        dump_file.write(tmp_filepath, dump_filename)
        dump_file.close()
    report_time_taken()

    # Dump analysis
    logging.info('Creating dump analysis')
    json_dump_filepath = os.path.join(dump_dir, '%s.json.zip' % dump_file_base)
    txt_filepath = os.path.join(dump_dir, dump_analysis_filebase + '.txt')
    csv_filepath = os.path.join(dump_dir, dump_analysis_filebase + '.csv')
    run_info = get_run_info()
    options = DumpAnalysisOptions(analyse_by_source=True)
    analysis = DumpAnalysis(json_dump_filepath, options)
    logging.info('Saving dump analysis')
    output_types = (
        # (output_filepath, analysis_file_class)
        (txt_filepath, TxtAnalysisFile),
        (csv_filepath, CsvAnalysisFile),
        )
    analysis_files = {} # analysis_file_class, analysis_file
    for output_filepath, analysis_file_class in output_types:
        logging.info('Saving dump analysis to: %s' % output_filepath)
        analysis_file = analysis_file_class(output_filepath, run_info)
        analysis_file.add_analysis(analysis.date, analysis.analysis_dict)
        analysis_file.save()
    report_time_taken()

    # Create complete backup
    logging.info('Creating database backup')
    if not os.path.exists(backup_dir):
        logging.info('Creating backup dir: %s' % backup_dir)
        os.makedirs(backup_dir)

    db_details = get_db_config(config)
    pg_dump_filename = start_time.strftime(backup_filebase)
    pg_dump_filepath = os.path.join(backup_dir, pg_dump_filename)
    cmd = 'export PGPASSWORD=%(db_pass)s&&pg_dump ' % db_details
    for pg_dump_option, db_details_key in (('U', 'db_user'),
                                           ('h', 'db_host'),
                                           ('p', 'db_port')):
        if db_details.get(db_details_key):
            cmd += '-%s %s ' % (pg_dump_option, db_details[db_details_key])
    cmd += '%(db_name)s' % db_details + ' > %s' % pg_dump_filepath
    logging.info('Backup command: %s' % cmd)
    ret = os.system(cmd)
    if ret == 0:
        logging.info('Backup successful: %s' % pg_dump_filepath)
        logging.info('Zipping up backup')
        pg_dump_zipped_filepath = pg_dump_filepath + '.gz'
        cmd = 'gzip %s' % pg_dump_filepath
        logging.info('Zip command: %s' % cmd)
        ret = os.system(cmd)
        if ret == 0:
            logging.info('Backup gzip successful: %s' % pg_dump_zipped_filepath)
        else:
            logging.error('Backup gzip error: %s' % ret)
    else:
        logging.error('Backup error: %s' % ret)

    # Log footer
    report_time_taken()
    logging.info('Finished daily script')
    logging.info('----------------------------')
示例#8
0
        dump_file.write(dataset_file, "datasets.csv")
        dump_file.write(resource_file, "resources.csv")
        dump_file.close()

        link_filepath = os.path.join(
            dump_dir, "data.gov.uk-ckan-meta-data-latest.csv.zip")

        if os.path.exists(link_filepath):
            os.unlink(link_filepath)
        os.symlink(dump_filepath, link_filepath)
        os.remove(dataset_file)
        os.remove(resource_file)

        # Dump the json and unpublished csv to the usual place.
        for file_type, dumper_ in (
            ('json', dumper.SimpleDumper().dump_json),
            ('unpublished.csv', inventory_dumper),
        ):
            dump_filename = '%s.%s' % (dump_file_base, file_type)
            dump_filepath = os.path.join(dump_dir, dump_filename + '.zip')
            tmp_file = open(tmp_filepath, 'w+b')
            log.info('Creating %s file: %s' % (file_type, dump_filepath))
            dumper_(tmp_file, query)
            tmp_file.close()
            log.info('Dumped data file is %dMb in size' %
                     (os.path.getsize(tmp_filepath) / (1024 * 1024)))
            dump_file = zipfile.ZipFile(dump_filepath, 'w',
                                        zipfile.ZIP_DEFLATED)
            dump_file.write(tmp_filepath, dump_filename)
            dump_file.close()
示例#9
0
                os.symlink(dump_filepath, link_filepath)
        finally:
            os.remove(tmp_filepath)

    if run_task('dump-csv-unpublished'):
        log.info('Creating database dumps - CSV unpublished')
        create_dump_dir_if_necessary(dump_dir)

        dump_datasets('unpublished.csv', unpublished_dumper, 1, dump_dir)
        report_time_taken(log)

    if run_task('dump-json'):
        log.info('Creating database dumps - JSON')
        create_dump_dir_if_necessary(dump_dir)

        dump_datasets('json', dumper.SimpleDumper().dump_json, 1, dump_dir)
        report_time_taken(log)

    if run_task('dump-json2'):
        # since gov_daily.py is run with sudo, and a path to python in the venv
        # rather than in an activated environment, and ckanapi creates
        # subprocesses, we need to activate the environment. The same one as
        # our current python interpreter.
        bin_dir = os.path.dirname(sys.executable)
        activate_this = os.path.join(bin_dir, 'activate_this.py')
        execfile(activate_this, dict(__file__=activate_this))
        import ckanapi.cli.dump
        log.info('Creating database dumps - JSON 2')
        create_dump_dir_if_necessary(dump_dir)
        ckan = ckanapi.RemoteCKAN('http://localhost',
                                  user_agent='daily dump',