Exemplo n.º 1
0
def rebuild_all_dbs(Session):
    ''' If the tests are running on the same db, we have to make sure that
    the ckan tables are recrated.
    '''
    db_read_url_parts = cli.parse_db_config('ckan.datastore.write_url')
    db_ckan_url_parts = cli.parse_db_config('sqlalchemy.url')
    same_db = db_read_url_parts['db_name'] == db_ckan_url_parts['db_name']

    if same_db:
        model.repo.tables_created_and_initialised = False
    clear_db(Session)
    model.repo.rebuild_db()
Exemplo n.º 2
0
def rebuild_all_dbs(Session):
    ''' If the tests are running on the same db, we have to make sure that
    the ckan tables are recrated.
    '''
    db_read_url_parts = cli.parse_db_config('ckan.datastore.write_url')
    db_ckan_url_parts = cli.parse_db_config('sqlalchemy.url')
    same_db = db_read_url_parts['db_name'] == db_ckan_url_parts['db_name']

    if same_db:
        model.repo.tables_created_and_initialised = False
    clear_db(Session)
    model.repo.rebuild_db()
Exemplo n.º 3
0
def get_microservice_metadata():
    for config_option in ('ckan.spatialingestor.postgis_url', 'ckan.spatialingestor.internal_geoserver_url',):
        if not config.get(config_option):
            raise Exception(
                'Config option `{0}` must be set to use the SpatialIngestor.'.format(config_option))

    core_url = config.get('ckan.site_url', 'http://localhost:8000/')
    return {'postgis': cli.parse_db_config('ckan.spatialingestor.postgis_url'),
            'geoserver': cli.parse_db_config('ckan.spatialingestor.internal_geoserver_url'),
            'geoserver_public_url': config.get('ckan.spatialingestor.public_geoserver_url',
                                               core_url + '/geoserver'),
            'target_spatial_formats': list(set([x.upper() for x in toolkit.aslist(config.get('ckan.spatialingestor.target_formats', []))]))
            }
Exemplo n.º 4
0
    def load_packages(self):

        #Get our CKAN and Drupal connection string

        dbc = parse_db_config('sqlalchemy.url')
        ckan_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbc['db_host'], dbc['db_name'], dbc['db_user'], dbc['db_pass'])

        dbd = parse_db_config('ckan.drupal.url')
        drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])

        # get a connection, if a connect cannot be made an exception will be raised here
        ckan_conn = psycopg2.connect(ckan_conn_string)
        drupal_conn = psycopg2.connect(drupal_conn_string)

        # ckan_conn.ckan_cursor will return a ckan_cursor object, you can use this ckan_cursor to perform queries
        ckan_cursor = ckan_conn.cursor()
        drupal_cursor = drupal_conn.cursor()

        # execute our Query
        ckan_cursor.execute("""select p.id,
       p.name, 
       p.title, 
       case when pe1.value is null then '' else pe1.value end, 
       case when p.notes   is null then '' else p.notes   end, 
       case when pe2.value is null then '' else pe2.value end 
      from package p 
      left join package_extra pe1 on p.id = pe1.package_id and pe1.key = 'title_fra'
      left join package_extra pe2 on p.id = pe2.package_id and pe2.key = 'notes_fra'""")


        # retrieve the records from the CKAN database and insert into the Drupal database
        for rec in ckan_cursor:
            drupal_cursor.execute("""select count(*) from opendata_package where pkg_id = %s""", (rec[0],))
            row = drupal_cursor.fetchone()
            if row[0] == 0:
                print "Inserting package %s" % (rec[0],)
                try:
                    drupal_cursor.execute("""insert into opendata_package (
  pkg_id,
  pkg_name,
  pkg_title_en,
  pkg_title_fr,
  pkg_description_en,
  pkg_description_fr
) values (%s, %s, %s, %s, %s, %s)""", (rec[0], self.format_drupal_string(rec[1]), self.format_drupal_string(rec[2]),
                                           self.format_drupal_string(rec[3]), self.format_drupal_string(rec[4]),
                                           self.format_drupal_string(rec[5])))
                except psycopg2.DataError, e:
                    self.logger.warn('Postgresql Database Exception %s', e.message)
Exemplo n.º 5
0
def dataset_comment_count(pkg_id):

    count = 0

    try:
        dbd = parse_db_config('ckan.drupal.url')
        if (dbd):
            drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (
                dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])

            drupal_conn = pg2.connect(drupal_conn_string)
            drupal_cursor = drupal_conn.cursor()

            # add this to the SQL statement to limit comments to those that are published  'and status = 0'
            drupal_cursor.execute(
                """select count(c.*) from comment c inner join opendata_package o on o.pkg_node_id = c.nid where o.pkg_id = %s""",
                (pkg_id, ))
            row = drupal_cursor.fetchone()
            count = row[0]
            drupal_cursor.close()
            drupal_conn.close()

    except KeyError:
        pass
    return count
Exemplo n.º 6
0
def dataset_comments(pkg_id):

    #import pdb; pdb.set_trace()
    comment_list = []
    try:
      dbd = parse_db_config('ckan.drupal.url')
      if (dbd):
        drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])    
        
        drupal_conn = pg2.connect(drupal_conn_string)
        drupal_cursor = drupal_conn.cursor()
        
        # add this to the SQL statement to limit comments to those that are published  'and status = 0'
        drupal_cursor.execute(
           """select c.subject, to_char(to_timestamp(c.changed), 'YYYY-MM-DD'), c.name, c.thread, f.comment_body_value from comment c 
inner join field_data_comment_body f on c.cid = f.entity_id
inner join opendata_package o on o.pkg_node_id = c.nid
where o.pkg_id = %s""", (pkg_id,))
      
    
        for comment in drupal_cursor:
           comment_body = clean_html(comment[4])
           comment_list.append({'subject': comment[0], 'date': comment[1], 'thread': comment[3], 'comment_body': comment_body, 'user': comment[2]})
        drupal_cursor.close()
        drupal_conn.close()
        
    except KeyError:
       pass
     
    return comment_list
Exemplo n.º 7
0
def dataset_rating(pkg_id):

    rating = -1
    try:
        dbd = parse_db_config('ckan.drupal.url')
        if (dbd):
            drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (
                dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])

            drupal_conn = pg2.connect(drupal_conn_string)

            drupal_cursor = drupal_conn.cursor()

            # retreive the average dataset rating from Drupal  -- NB the parameter must be in the form (x,)
            drupal_cursor.execute(
                """select avg(v.value)/25+1 as rating from opendata_package p 
             inner join votingapi_vote v on p.pkg_node_id = v.entity_id 
             where p.pkg_id = %s""", (pkg_id, ))
            row = drupal_cursor.fetchone()
            rating = row[0]
            drupal_cursor.close()
            drupal_conn.close()
    except KeyError:
        pass
    return rating
Exemplo n.º 8
0
    def command(self):
        '''
        Parse command line arguments and call appropriate method.
        '''
        if not self.args or self.args[0] in ['--help', '-h', 'help']:
            print SetupDatastoreCommand.__doc__
            return

        cmd = self.args[0]
        self._load_config()

        self.db_write_url_parts = cli.parse_db_config(
            'ckan.datastore.write_url')
        self.db_read_url_parts = cli.parse_db_config(
            'ckan.datastore.read_url')
        self.db_ckan_url_parts = cli.parse_db_config(
            'sqlalchemy.url')

        write_db = self.db_write_url_parts['db_name']
        read_db = self.db_read_url_parts['db_name']
        assert write_db == read_db,\
            "write and read db have to be the same"

        if len(self.args) != 2:
            print self.usage
            return

        if cmd == 'set-permissions':
            setup.set_permissions(
                pguser=self.args[1],
                pgport=self.db_ckan_url_parts['db_port'],
                ckandb=self.db_ckan_url_parts['db_name'],
                datastoredb=self.db_write_url_parts['db_name'],
                ckanuser=self.db_ckan_url_parts['db_user'],
                writeuser=self.db_write_url_parts['db_user'],
                readonlyuser=self.db_read_url_parts['db_user']
            )
            if self.verbose:
                print 'Set permissions for read-only user: SUCCESS'
        else:
            print self.usage
            log.error('Command "%s" not recognized' % (cmd,))
            return
Exemplo n.º 9
0
def set_permissions(ctx, config):
    load_config(config or ctx.obj['config'])

    write_url = parse_db_config(u'ckan.datastore.write_url')
    read_url = parse_db_config(u'ckan.datastore.read_url')
    db_url = parse_db_config(u'sqlalchemy.url')

    # Basic validation that read and write URLs reference the same database.
    # This obviously doesn't check they're the same database (the hosts/ports
    # could be different), but it's better than nothing, I guess.
    if write_url['db_name'] != read_url['db_name']:
        exit(u"The datastore write_url and read_url must refer to the same "
             u"database!")

    sql = permissions_sql(maindb=db_url['db_name'],
                          datastoredb=write_url['db_name'],
                          mainuser=db_url['db_user'],
                          writeuser=write_url['db_user'],
                          readuser=read_url['db_user'])

    print(sql)
Exemplo n.º 10
0
    def command(self):
        '''
        Parse command line arguments and call appropriate method.
        '''
        if not self.args or self.args[0] in ['--help', '-h', 'help']:
            print SetupDatastoreCommand.__doc__
            return

        cmd = self.args[0]
        self._load_config()

        self.db_write_url_parts = cli.parse_db_config(
            'ckan.datastore.write_url')
        self.db_read_url_parts = cli.parse_db_config('ckan.datastore.read_url')
        self.db_ckan_url_parts = cli.parse_db_config('sqlalchemy.url')

        write_db = self.db_write_url_parts['db_name']
        read_db = self.db_read_url_parts['db_name']
        assert write_db == read_db,\
            "write and read db have to be the same"

        if len(self.args) != 2:
            print self.usage
            return

        if cmd == 'set-permissions':
            setup.set_permissions(
                pguser=self.args[1],
                pgport=self.db_ckan_url_parts['db_port'],
                ckandb=self.db_ckan_url_parts['db_name'],
                datastoredb=self.db_write_url_parts['db_name'],
                ckanuser=self.db_ckan_url_parts['db_user'],
                writeuser=self.db_write_url_parts['db_user'],
                readonlyuser=self.db_read_url_parts['db_user'])
            if self.verbose:
                print 'Set permissions for read-only user: SUCCESS'
        else:
            print self.usage
            log.error('Command "%s" not recognized' % (cmd, ))
            return
Exemplo n.º 11
0
    def command(self):
        '''
        Parse command line arguments and call appropriate method.
        '''
        if not self.args or self.args[0] in ['--help', '-h', 'help']:
            print SetupDatastoreCommand.__doc__
            return

        cmd = self.args[0]
        self._load_config()

        self.db_write_url_parts = cli.parse_db_config(
            'ckan.datastore.write_url')
        self.db_read_url_parts = cli.parse_db_config('ckan.datastore.read_url')
        self.db_ckan_url_parts = cli.parse_db_config('sqlalchemy.url')

        assert self.db_write_url_parts['db_name'] == self.db_read_url_parts[
            'db_name'], "write and read db should be the same"

        if cmd == 'create-db':
            if len(self.args) != 2:
                print self.usage
                return
            self.sql_superuser = self.args[1]
            self.create_db()
            if self.verbose:
                print 'Creating DB: SUCCESS'
        elif cmd == 'create-read-only-user':
            if len(self.args) != 2:
                print self.usage
                return
            self.sql_superuser = self.args[1]
            self.create_read_only_user()
            if self.verbose:
                print 'Creating read-only user: SUCCESS'
        else:
            print self.usage
            log.error('Command "%s" not recognized' % (cmd, ))
            return
Exemplo n.º 12
0
    def _purge_legacy_all(self):
        geoserver_info = cli.parse_db_config('ckan.spatialingestor.postgis_url')

        geoserver_credentials = (geoserver_info['db_user'], geoserver_info['db_pass'])
        geoserver_wsurl = 'http://' + geoserver_info['db_host'] + 'rest/workspaces'

        postgist_info = cli.parse_db_config('ckan.spatialingestor.postgis_url')

        def get_db_cursor():
            try:
                connection = psycopg2.connect(dbname=postgist_info['db_name'],
                                              user=postgist_info['db_user'],
                                              password=postgist_info['db_pass'],
                                              host=postgist_info['db_host'],
                                              port=postgist_info.get('db_port', None))

                connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)

                return connection.cursor(), connection
            except Exception, e:
                log.error("Failed to connect with PostGIS with error {0}".format(str(e)))
                return None
Exemplo n.º 13
0
def _get_db_settings():
    postgis_info = cli.parse_db_config(
        'ckanext.datagovau.spatialingestor.postgis.url')

    db_port = postgis_info.get('db_port', '')
    if db_port == '':
        db_port = None

    return dict(dbname=postgis_info.get('db_name'),
                user=postgis_info.get('db_user'),
                password=postgis_info.get('db_pass'),
                host=postgis_info.get('db_host'),
                port=db_port)
Exemplo n.º 14
0
def set_permissions(ctx, config):
    load_config(config or ctx.obj['config'])

    write_url = parse_db_config(u'ckan.datastore.write_url')
    read_url = parse_db_config(u'ckan.datastore.read_url')
    db_url = parse_db_config(u'sqlalchemy.url')

    # Basic validation that read and write URLs reference the same database.
    # This obviously doesn't check they're the same database (the hosts/ports
    # could be different), but it's better than nothing, I guess.
    if write_url['db_name'] != read_url['db_name']:
        exit(u"The datastore write_url and read_url must refer to the same "
             u"database!")

    sql = permissions_sql(
        maindb=db_url['db_name'],
        datastoredb=write_url['db_name'],
        mainuser=db_url['db_user'],
        writeuser=write_url['db_user'],
        readuser=read_url['db_user'])

    print(sql)
Exemplo n.º 15
0
 def saveUser(self, user_id, provider, user_key, user_secret):
     pprint.pprint("save user")
     dbd = parse_db_config('ckan.drupal.url')
     drupal_conn_string = "host='%s' dbname='%s' port='%s' user='******' password='******'" % (
         dbd['db_host'], dbd['db_name'], dbd['db_port'], dbd['db_user'],
         dbd['db_pass'])
     drupal_conn = psycopg2.connect(drupal_conn_string)
     drupal_cursor = drupal_conn.cursor(
         cursor_factory=psycopg2.extras.DictCursor)
     drupal_cursor.execute(
         """INSERT INTO opendata_tokens_provider_user (id_usuario, provider, key, secret) VALUES (%s, %s, %s, %s)""",
         (user_id, provider, user_key, user_secret))
     drupal_conn.commit()
Exemplo n.º 16
0
def _set_permissions(args):
    write_url = cli.parse_db_config('ckan.datastore.write_url')
    read_url = cli.parse_db_config('ckan.datastore.read_url')
    db_url = cli.parse_db_config('sqlalchemy.url')

    # Basic validation that read and write URLs reference the same database.
    # This obviously doesn't check they're the same database (the hosts/ports
    # could be different), but it's better than nothing, I guess.
    if write_url['db_name'] != read_url['db_name']:
        _abort("The datastore write_url and read_url must refer to the same "
               "database!")

    context = {
        'maindb': db_url['db_name'],
        'datastoredb': write_url['db_name'],
        'mainuser': db_url['db_user'],
        'writeuser': write_url['db_user'],
        'readuser': read_url['db_user'],
    }

    sql = _permissions_sql(context)

    print(sql)
Exemplo n.º 17
0
 def saveUserToken(self, user_token, user_id, provider):
     pprint.pprint("store user token")
     pprint.pprint(str(user_token))
     dbd = parse_db_config('ckan.drupal.url')
     drupal_conn_string = "host='%s' dbname='%s' port='%s' user='******' password='******'" % (
         dbd['db_host'], dbd['db_name'], dbd['db_port'], dbd['db_user'],
         dbd['db_pass'])
     drupal_conn = psycopg2.connect(drupal_conn_string)
     drupal_cursor = drupal_conn.cursor(
         cursor_factory=psycopg2.extras.DictCursor)
     drupal_cursor.execute(
         """UPDATE opendata_tokens_provider_user SET token=%s WHERE id_usuario=%s AND provider=%s""",
         (str(user_token), user_id, provider))
     drupal_conn.commit()
Exemplo n.º 18
0
    def command(self):
        '''
        Parse command line arguments and call appropriate method.
        '''
        if not self.args or self.args[0] in ['--help', '-h', 'help']:
            print SetupDatastoreCommand.__doc__
            return

        cmd = self.args[0]
        self._load_config()

        self.db_write_url_parts = cli.parse_db_config('ckan.datastore.write_url')
        self.db_read_url_parts = cli.parse_db_config('ckan.datastore.read_url')
        self.db_ckan_url_parts = cli.parse_db_config('sqlalchemy.url')

        assert self.db_write_url_parts['db_name'] == self.db_read_url_parts['db_name'], "write and read db should be the same"

        if cmd == 'create-db':
            if len(self.args) != 2:
                print self.usage
                return
            self.sql_superuser = self.args[1]
            self.create_db()
            if self.verbose:
                print 'Creating DB: SUCCESS'
        elif cmd == 'create-read-only-user':
            if len(self.args) != 2:
                print self.usage
                return
            self.sql_superuser = self.args[1]
            self.create_read_only_user()
            if self.verbose:
                print 'Creating read-only user: SUCCESS'
        else:
            print self.usage
            log.error('Command "%s" not recognized' % (cmd,))
            return
Exemplo n.º 19
0
    def addexception(self, id):
        # Adds an exception to the database.
        # Sometimes resources will contain valid CPR-numbers which are in fact not
        d_port = config.get('ckan.cprvalidation.postgres_port', None)
        d_pass = config.get('ckan.cprvalidation.cprvalidation_password', None)
        db_name = config.get('ckan.cprvalidation.cprvalidation_db', None)

        add_exception = ''' UPDATE {0}.status SET excepted = TRUE
                            WHERE package_id = %s
                            returning *
        ;'''

        if d_pass == None:
            print(
                "Setup cprvalidation_password in /etc/ckan/default/production.ini"
            )
            sys.exit(1)
        if d_port == None:
            print("Setup postgres_port in /etc/ckan/default/production.ini")
            sys.exit(1)

        try:
            db_config = parse_db_config()
            host = db_config.get('db_host')
            conn = psycopg2.connect(database=db_name,
                                    host=host,
                                    user="******",
                                    password=d_pass,
                                    port=d_port)
            conn.autocommit = True
            print(" ")
        except Exception as e:
            print(e)
            sys.exit()

        cur = conn.cursor()
        cur.execute(add_exception.format(db_name), (id, ))

        count = len(cur.fetchall())
        if (count == 0):
            print("Could not find relation %s " % id)
        else:
            print(
                "Added exception for %d resources in dataset with package_id: %s "
                % (count, id))

        conn.commit()
        conn.close()
Exemplo n.º 20
0
def _get_geoserver_data():
    geoserver_info = cli.parse_db_config(
        'ckanext.datagovau.spatialingestor.geoserver.url')
    protocol = "http://"

    if geoserver_info.get('db_type') == 'sslgeoserver':
        protocol = "https://"

    geoserver_host = protocol + geoserver_info.get('db_host')

    port = geoserver_info.get('db_port', '')
    if port != '':
        geoserver_host += ':' + port

    geoserver_host += '/' + geoserver_info.get('db_name') + '/'
    return (
        geoserver_host, geoserver_info.get('db_user'),
        geoserver_info.get('db_pass'),
        config.get('ckanext.datagovau.spatialingestor.geoserver.public_url'))
Exemplo n.º 21
0
    def download(self):
        port = config.get('ckan.cprvalidation.postgres_port', None)
        password = config.get('ckan.cprvalidation.cprvalidation_password',
                              None)
        db_name = config.get('ckan.cprvalidation.cprvalidation_db', None)
        db_config = parse_db_config()
        host = db_config.get('db_host')

        if port != None and password != None:
            try:
                conn = psycopg2.connect(database=db_name,
                                        host=host,
                                        user="******",
                                        password=password,
                                        port=port)
            except Exception as e:
                log.warn(e)
                sys.exit()
        else:
            log.warn(
                "Config not setup properly! Missing either postgres_port or cprvalidation_password"
            )
            sys.exit()

        select = """COPY (SELECT * FROM {0}.status) to STDOUT WITH CSV HEADER"""
        cur = conn.cursor()

        #Instead of using an actual file, we use a file-like string buffer
        text_stream = StringIO.StringIO()

        cur.copy_expert(select.format(db_name), text_stream)
        output = text_stream.getvalue()

        #Cleanup after ourselves
        text_stream.close()
        conn.commit()
        conn.close()

        pylons.response.headers['Content-Type'] = 'text/csv;charset=utf-8'
        pylons.response.headers[
            'Content-Disposition'] = 'attachment; filename="cpr_report.csv"'
        return output
Exemplo n.º 22
0
def _migrate_autogen_timestamp(old_name, new_name):
    write_url_obj = cli.parse_db_config('ckan.datastore.write_url')

    write_url = 'postgres://'+ write_url_obj['db_user'] + ':'
    write_url = write_url + write_url_obj['db_pass'] + '@'
    write_url = write_url + write_url_obj['db_host']
    write_url = write_url + (write_url_obj['db_port'] if write_url_obj['db_port'] else '') + '/'
    write_url = write_url + write_url_obj['db_name']

    conn = create_engine(write_url)
    
    sql_autogen_res = 'select table_name \
        from INFORMATION_SCHEMA.COLUMNS where column_name = %s'
    sql_rename_column = 'ALTER TABLE "{table_name}" RENAME {old_name} TO {new_name}'

    autogen_res = conn.execute(sql_autogen_res, old_name).fetchall()
    for ar in autogen_res:
        result = conn.execute(sql_rename_column.format(
            table_name = ar[0],
            old_name = old_name,
            new_name = new_name))
Exemplo n.º 23
0
def dataset_rating(pkg_id):
  rating = None
  try:
    dbd = parse_db_config('ckan.drupal.url')
    if (dbd):
      drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])    
      
      drupal_conn = pg2.connect(drupal_conn_string)

      drupal_cursor = drupal_conn.cursor()
      
      # retreive the average dataset rating from Drupal  -- NB the parameter must be in the form (x,)
      drupal_cursor.execute(  
          """select avg(v.value)/25+1 as rating from opendata_package p 
             inner join votingapi_vote v on p.pkg_node_id = v.entity_id 
             where p.pkg_id = %s""", (pkg_id,))
      row = drupal_cursor.fetchone()
      rating = row[0]
      drupal_cursor.close()
      drupal_conn.close()
  except KeyError:
     pass
  return int(0 if rating is None else rating)
Exemplo n.º 24
0
def dataset_comment_count(pkg_id):

    count = 0

    try:
      dbd = parse_db_config('ckan.drupal.url')
      if (dbd):
        drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])    
        
        drupal_conn = pg2.connect(drupal_conn_string)
        drupal_cursor = drupal_conn.cursor()
        
        # add this to the SQL statement to limit comments to those that are published  'and status = 0'
        drupal_cursor.execute(
           """select count(c.*) from comment c inner join opendata_package o on o.pkg_node_id = c.nid where o.pkg_id = %s""", (pkg_id,))
        row = drupal_cursor.fetchone()
        count = row[0]
        drupal_cursor.close()
        drupal_conn.close()      
       
    except KeyError:
       pass
    return count
Exemplo n.º 25
0
def dataset_comments(pkg_id):

    #import pdb; pdb.set_trace()
    comment_list = []
    try:
        dbd = parse_db_config('ckan.drupal.url')
        if (dbd):
            drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (
                dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])

            drupal_conn = pg2.connect(drupal_conn_string)
            drupal_cursor = drupal_conn.cursor()

            # add this to the SQL statement to limit comments to those that are published  'and status = 0'
            drupal_cursor.execute(
                """select c.subject, to_char(to_timestamp(c.changed), 'YYYY-MM-DD'), c.name, c.thread, f.comment_body_value from comment c 
inner join field_data_comment_body f on c.cid = f.entity_id
inner join opendata_package o on o.pkg_node_id = c.nid
where o.pkg_id = %s""", (pkg_id, ))

            for comment in drupal_cursor:
                comment_body = clean_html(comment[4])
                comment_list.append({
                    'subject': comment[0],
                    'date': comment[1],
                    'thread': comment[3],
                    'comment_body': comment_body,
                    'user': comment[2]
                })
            drupal_cursor.close()
            drupal_conn.close()

    except KeyError:
        pass

    return comment_list
Exemplo n.º 26
0
def scanDB():
    d_port = config.get('ckan.cprvalidation.postgres_port', None)
    d_pass = config.get('ckan.cprvalidation.cprvalidation_password', None)
    db_name = config.get('ckan.cprvalidation.cprvalidation_db', None)

    try:
        db_config = parse_db_config()
        host = db_config.get('db_host')
        conn = psycopg2.connect(database=db_name,
                                host=host,
                                user="******",
                                password=d_pass,
                                port=d_port)
    except Exception as e:
        print(e)
        sys.exit()

    # TODO: PDF is really slow, so we need to fix that, removed for now
    select = """
                   SELECT * FROM """ + db_name + """.status
                   WHERE format = ANY('{csv,xlsx,json,geojson,ods,docx}')
                   AND (last_updated::timestamp >= last_checked::timestamp OR last_checked IS NULL)
                   AND (url_type IS NOT NULL OR datastore_active = 'true')
                   AND excepted IS NULL;
       """
    print("Scanning for updates...")
    cur = conn.cursor()
    # Get the datasets we have to validate
    cur.execute(select)
    tmp_return = cur.fetchall()
    conn.commit()
    conn.close()

    # Return them
    print("Found %d updated resources to validate \n" % len(tmp_return))
    return tmp_return
Exemplo n.º 27
0
def setup_protected_resources(**kwargs):
    template_filename = os.path.join(os.path.dirname(__file__),
                                     u'set_protected_resource_table.sql')
    with open(template_filename) as f:
        content = f.read()
        print(content.format(**parse_db_config()))
Exemplo n.º 28
0
    def load_docs(self):
        '''
        Load the Virtual Library datasets into the same Drupal table as the Open Data datasets.
        @return: nothing
        '''

        #Get our CKAN and Drupal connection string

        dbc = parse_db_config('sqlalchemy.url')
        ckan_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbc['db_host'], dbc['db_name'], dbc['db_user'], dbc['db_pass'])

        dbd = parse_db_config('ckan.drupal.url')
        drupal_conn_string = "host='%s' dbname='%s' user='******' password='******'" % (dbd['db_host'], dbd['db_name'], dbd['db_user'], dbd['db_pass'])

        # get a connection, if a connect cannot be made an exception will be raised here
        ckan_conn = psycopg2.connect(ckan_conn_string)
        drupal_conn = psycopg2.connect(drupal_conn_string)

        # ckan_conn.ckan_cursor will return a ckan_cursor object, you can use this ckan_cursor to perform queries
        ckan_cursor = ckan_conn.cursor()
        drupal_cursor = drupal_conn.cursor()

        # execute our Query
        ckan_cursor.execute("""select p.id,
       p.name,
       case when pe1.value is null then '' else pe1.value end,
       case when pe2.value is null then '' else pe2.value end
      from package p
      left join package_extra pe1 on p.id = pe1.package_id and pe1.key = 'title_ml'
      left join package_extra pe2 on p.id = pe2.package_id and pe2.key = 'description_ml'
      where p.type = 'doc' and p.state = 'active'""")

        # retrieve the records from the CKAN database and insert into the Drupal database
        for rec in ckan_cursor:
            drupal_cursor.execute("""select count(*) from opendata_package where pkg_id = %s""", (rec[0],))
            row = drupal_cursor.fetchone()
            if row[0] == 0:
                titles = json.loads(rec[2])
                descriptions = json.loads(rec[3])
                title_en = ''
                if 'en' in titles:
                    title_en = titles['en']
                title_fr = ''
                if 'fr' in titles:
                    title_fr = titles['fr']
                desc_en = ''
                if 'en' in descriptions:
                    desc_en = descriptions['en']
                desc_fr = ''
                if 'fr' in descriptions:
                    desc_fr = descriptions['fr']
                print "Inserting package %s: %s %s %s: %s %s" % (rec[0], rec[1], title_en, title_fr, desc_en, desc_fr)
                try:
                    drupal_cursor.execute("""insert into opendata_package (
  pkg_id,
  pkg_name,
  pkg_title_en,
  pkg_title_fr,
  pkg_description_en,
  pkg_description_fr
) values (%s, %s, %s, %s, %s, %s)""", (rec[0], rec[1], title_en, title_fr, desc_en, desc_fr))
                    drupal_conn.commit()
                except psycopg2.DataError, e:
                    self.logger.warn('Postgresql Database Exception %s', e.message)
    def notify(self, entity, operation=None):

        if operation == model.domain_object.DomainObjectOperation.new and isinstance(
                entity, (_package.Package)):

            reload(sys)
            sys.setdefaultencoding('utf-8')

            dbc = parse_db_config('ckan.drupal.url')
            drupal_conn_string = "host='%s' port='%s' dbname='%s' user='******' password='******'" % (
                dbc['db_host'], dbc['db_port'], dbc['db_name'], dbc['db_user'],
                dbc['db_pass'])

            drupal_conn = psycopg2.connect(drupal_conn_string)
            drupal_cursor = drupal_conn.cursor()

            titles = json.loads(entity.title_translated)
            descriptions = json.loads(entity.notes_translated)
            title_en = ''
            if 'en' in titles:
                title_en = titles['en']
            title_es = ''
            if 'es' in titles:
                title_es = titles['es']
            title_ca = ''
            if 'ca' in titles:
                title_ca = titles['ca']
            desc_en = ''
            if 'en' in descriptions:
                desc_en = descriptions['en']
            desc_es = ''
            if 'es' in descriptions:
                desc_es = descriptions['es']
            desc_ca = ''
            if 'ca' in descriptions:
                desc_ca = descriptions['ca']

            log.debug("Inserting package %s: %s %s %s: %s %s %s %s" %
                      (entity.id, entity.name, title_en, title_es, title_ca,
                       desc_en, desc_es, desc_ca))

            try:
                drupal_cursor.execute(
                    """insert into opendata_package (pkg_id,pkg_name,pkg_title_en,pkg_title_es,pkg_title_ca,pkg_description_en,pkg_description_es,pkg_description_ca) values (%s, %s, %s, %s, %s, %s, %s, %s)""",
                    (entity.id, self.format_drupal_string(
                        entity.name), self.format_drupal_string(title_en),
                     self.format_drupal_string(title_es),
                     self.format_drupal_string(title_ca),
                     self.format_drupal_string(desc_en),
                     self.format_drupal_string(desc_es),
                     self.format_drupal_string(desc_ca)))
                drupal_conn.commit()
            except psycopg2.DataError, e:
                log.warn('Postgresql Database Exception %s', e.message)

            drupal_conn.commit()
            drupal_cursor.close()
            drupal_conn.close()

            #Habilitar una vez importado el esquema de drupal en ckan
            '''
Exemplo n.º 30
0
    def resource_download(self, environ, id, resource_id, filename=None):

        context = {
            'model': model,
            'session': model.Session,
            'user': c.user,
            'auth_user_obj': c.userobj
        }

        try:
            rsc = t.get_action('resource_show')(context, {'id': resource_id})
        except (logic.NotFound, logic.NotAuthorized):
            base.abort(404, _('Resource not found'))

        headers = {
            'X-Forwarded-For': environ.get('REMOTE_ADDR'),
            'User-Agent': environ.get('HTTP_USER_AGENT'),
            'Accept-Language': environ.get('HTTP_ACCEPT_LANGUAGE', ''),
            'Accept-Encoding': environ.get('HTTP_ACCEPT_ENCODING', '')
        }

        if rsc.get('token_required') == 'Yes':
            authentication = environ.get('HTTP_AUTHORIZATION', '')
            url_redirect = "%s/tokens?resource_id=%s&package_id=%s" % (
                config.get('ckan.site_url'), resource_id, rsc['package_id'])

            if authentication == '':
                return redirect(url_redirect.encode('utf-8'))
            dbd = parse_db_config('ckan.drupal.url')
            drupal_conn_string = "host='%s' dbname='%s' port='%s' user='******' password='******'" % (
                dbd['db_host'], dbd['db_name'], dbd['db_port'], dbd['db_user'],
                dbd['db_pass'])
            drupal_conn = psycopg2.connect(drupal_conn_string)
            drupal_cursor = drupal_conn.cursor(
                cursor_factory=psycopg2.extras.DictCursor)
            if not rsc.get('token_type'):
                drupal_cursor.execute(
                    """select id_usuario from opendata_tokens where tkn_usuario=%s""",
                    (authentication, ))
            else:
                drupal_cursor.execute(
                    """SELECT t.*, pu.*, p.*, u.name, u.mail, u.uid FROM opendata_tokens t
                        LEFT JOIN opendata_tokens_provider_user pu ON pu.id_usuario=t.id_usuario
                        LEFT JOIN opendata_tokens_provider p ON (pu.provider = p.id  OR p.id='bsm')
                        LEFT JOIN users u ON t.id_usuario = u.uid
                        WHERE t.tkn_usuario = %s AND (p.id IS NULL OR p.id = %s)""",
                    (authentication, rsc.get('token_type')))

            if drupal_cursor.rowcount < 1:
                return redirect(url_redirect.encode('utf-8'))
            elif rsc.get('token_type'):
                record = drupal_cursor.fetchone()
                api = None

                if rsc.get('token_type') == 'bsm':
                    api = bsm.BsmApi(rsc,
                                     app_token=record['app_token'],
                                     consumer_key=record['consumer_key'],
                                     consumer_secret=record['consumer_secret'],
                                     user_token=record['token'],
                                     user_id=record['uid'],
                                     user_key=record['key'],
                                     user_secret=record['secret'],
                                     username=record['name'],
                                     email=record['mail'])

                pprint.pprint(record['app_token'])

                api_content, status, headers = api.execute()

        # Save download to tracking_raw
        CustomTrackingController.update(environ['REQUEST_URI'], 'resource',
                                        environ)

        if rsc.get('url_type') == 'upload':
            # Internal redirect
            upload = uploader.get_resource_uploader(rsc)
            filepath = upload.get_path(rsc['id'])
            fileapp = paste.fileapp.FileApp(filepath)

            try:
                status, headers, app_iter = request.call_application(fileapp)
            except OSError:
                base.abort(404, _('Resource data not found'))

            response.headers.update(dict(headers))

            content_type, content_enc = m.guess_type(rsc.get('url', ''))

            if content_type and content_type == 'application/xml':
                response.headers['Content-Type'] = 'application/octet-stream'
            elif content_type:
                response.headers['Content-Type'] = content_type

            response.status = status

            return app_iter

            h.redirect_to(rsc['url'].encode('utf-8'))
        elif api_content:
            response.headers['Content-Type'] = headers['content-type']
            response.status = status
            return api_content
        elif 'url' not in rsc:
            base.abort(404, _('No download is available'))
        else:
            # External redirect
            return redirect(rsc['url'].encode('utf-8'))
Exemplo n.º 31
0
    def initdb(self):
        #For debugging purposes we delete the database everytime we init. This CLEANS the database
        d_port = config.get('ckan.cprvalidation.postgres_port', None)
        d_pass = config.get('ckan.cprvalidation.cprvalidation_password', None)
        db_name = config.get('ckan.cprvalidation.cprvalidation_db', None)
        postgres_pass = config.get('ckan.cprvalidation.postgres_password',
                                   None)
        error_state = False
        if d_pass == None:
            print(
                "Setup cprvalidation_password in /etc/ckan/default/production.ini"
            )
            error_state = True
        if d_port == None:
            print("Setup postgres_port in /etc/ckan/default/production.ini")
            error_state = True
        if postgres_pass == None:
            print(
                "Setup postgres_password in /etc/ckan/default/production.ini")
            error_state = True

        if (error_state):
            print("Exiting..")
            sys.exit(1)

        create_user = '''
                    CREATE ROLE cprvalidation WITH PASSWORD %s;
                '''
        drop_db = '''DROP DATABASE IF EXISTS {0};'''
        create_db = '''
            CREATE DATABASE {0}
            WITH OWNER = cprvalidation
            ENCODING = 'UTF8'
            TABLESPACE = pg_default
            CONNECTION LIMIT = -1;
        '''

        create_schema = '''
            DROP SCHEMA IF EXISTS {0};
            CREATE SCHEMA {0}
            AUTHORIZATION cprvalidation;
        '''

        create_table = '''
            DROP TABLE IF EXISTS {0}.status;
            CREATE TABLE {0}.status
            (
              package_id character varying NOT NULL,
              resource_id character varying NOT NULL,
              status character varying, -- valid, invalid, pending
              format character varying NOT NULL,
              url character varying,
              url_type character varying,
              datastore_active character varying,
              last_checked timestamp,
              last_updated timestamp,
              cpr_number character varying,
              excepted boolean,
              error character varying,
              CONSTRAINT status_pkey PRIMARY KEY (resource_id)
            )
            WITH (
              OIDS=FALSE
            );
            ALTER TABLE {0}.status
              OWNER TO cprvalidation;
            COMMENT ON COLUMN {0}.status.status IS 'valid, invalid, pending';

        '''

        try:
            db_config = parse_db_config()
            host = db_config.get('db_host')
            conn = psycopg2.connect(database="postgres",
                                    host=host,
                                    user="******",
                                    password=postgres_pass,
                                    port=d_port)
            conn.autocommit = True
            print("Connected as postgres user.")
        except Exception as e:
            print(e)
            sys.exit()

        cur = conn.cursor()
        try:
            #cur.execute(create_user,[d_pass])
            cur.execute(drop_db.format(db_name))
            cur.execute(create_db.format(db_name))
            print("Initialized Database")
            conn.commit()
            conn.close()
        except Exception as e:
            #TODO: Handle this sort of erros more gracefully
            print("Unexpected error")
            print(e.message)
            sys.exit(1)

        #
        # We need two different sessions to the database as we are changing user
        #
        try:
            db_config = parse_db_config()
            host = db_config.get('db_host')
            conn = psycopg2.connect(database=db_name,
                                    host=host,
                                    user="******",
                                    password=d_pass,
                                    port=d_port)
            conn.autocommit = True

            print("Created the table and scheme")
        except Exception as e:
            print("Woops")
            print(e)
            sys.exit()

        cur = conn.cursor()
        try:
            cur.execute(create_schema.format(db_name))
            cur.execute(create_table.format(db_name))
            print("Created schema and table")
            conn.commit()
            conn.close()
            print("Done.")
        except:
            # TODO: Handle this sort of erros more gracefully
            print("Unexpected error")
            sys.exit(1)
Exemplo n.º 32
0
def validateResource(resource):
    '''       Overview of the tuple
         (
                 0 package_id character varying NOT NULL,
                 1 resource_id character varying NOT NULL,
                 2 status character varying, -- valid, invalid, pending
                 3 format character varying NOT NULL,
                 4 url character varying,
                 5 url_type character varying,
                 6 datastore_active character varying,
                 7 last_checked character varying,
                 8 last_updated character varying,
                 9 cpr_number character varying,
                 10 excepted bool,
                 11 error character varying
                )
        '''
    siteurl = config.get('ckan.site_url')
    email = config.get('ckan.cprvalidation.email', None)
    d_port = config.get('ckan.cprvalidation.postgres_port', None)
    d_pass = config.get('ckan.cprvalidation.cprvalidation_password', None)
    db_name = config.get('ckan.cprvalidation.cprvalidation_db', None)

    id = resource[1]
    format = str(resource[3]).lower()
    datastore = True if str(resource[6]).lower() == "true" else False
    filestore = True if resource[5] == "upload" else False
    file_url = resource[4]

    file_string = None
    file_path = None
    local = False
    error = False

    print("DEBUG INFO: ")
    print("Resource: " + str(resource))
    print("Datastore: " + str(datastore))
    print("Filestore: " + str(filestore))

    storage_path = config.get('ckan.storage_path')

    # Get the filepath, locally or externally, it should not matter
    if filestore:
        file_path = os.path.join(storage_path, 'resources', id[0:3], id[3:6],
                                 id[6:])
        local = True
    elif datastore:
        file_path = siteurl + "/datastore/dump/" + id + "?format=csv"
        format = "csv"  #Datastore will always be CSV, so this makes it easier

    print("Format: " + str(format))
    print("File_path: " + str(file_path))

    if file_path is None:
        print("Could not construct file_path")
        return None

    format = str(format).lower()

    # If the s3filestore plugin is enabled, always retrieve files from HTTP
    if ckan.plugins.plugin_loaded('s3filestore'):
        local = False

    if format == "csv":
        output = processCSV(file_path, file_url, local)
    elif format == "docx":
        output = processDOCX(file_url)
    elif format == "ods":
        output = processODS(file_url)
    elif format == "xlsx":
        output = processXLSX(file_url)
    elif format == "pdf":
        output = processPDF(file_url)
    elif format == "geojson" or format == "json":
        output = processJSON(file_url)
    else:
        print("Format %s can't be processed" % format)
        return

    error = output[0]
    file_string = output[1]
    insert_error = False

    if (file_string is None or error != None):
        insert_error = True
    else:
        iscpr = validcpr(file_string)

    if (insert_error):
        print(error)
        try:
            db_config = parse_db_config()
            host = db_config.get('db_host')
            conn = psycopg2.connect(database=db_name,
                                    host=host,
                                    user="******",
                                    password=d_pass,
                                    port=d_port)
        except Exception as e:
            print(e)
            sys.exit()
        current_time = datetime.datetime.utcnow(
        )  # Timestamp is UTC as CKAN stores metadata_modified as UTC
        insert = """
                    UPDATE {0}.status
                    SET status='error', last_checked = %s,error = %s
                    WHERE resource_id= %s
                    returning *
                ;"""

        cur = conn.cursor()
        cur.execute(insert.format(db_name), [current_time, error, id])
        conn.commit()
        conn.close()
    else:
        if (not iscpr[0]):  #If we dont have a CPR in the resource
            try:
                db_config = parse_db_config()
                host = db_config.get('db_host')
                conn = psycopg2.connect(database=db_name,
                                        host=host,
                                        user="******",
                                        password=d_pass,
                                        port=d_port)
            except Exception as e:
                print(e)
                sys.exit()
            current_time = datetime.datetime.utcnow()  #Timestamp
            insert = """
                        UPDATE {0}.status
                        SET status='valid', last_checked= %s
                        WHERE resource_id= %s
                        returning *
                    ;"""

            cur = conn.cursor()
            cur.execute(insert.format(db_name), [current_time, id])
            conn.commit()
            conn.close()
        else:  #We have a CPR-number!

            print(
                "Detected a CPR number, if an exception is made nothing will happen"
            )

            try:
                db_config = parse_db_config()
                host = db_config.get('db_host')
                conn = psycopg2.connect(database=db_name,
                                        host=host,
                                        user="******",
                                        password=d_pass,
                                        port=d_port)
            except Exception as e:
                print(e)
                sys.exit()
            current_time = datetime.datetime.utcnow()  #Timestamp
            select = """
                        SELECT * FROM {0}.status
                        WHERE package_id = %s AND excepted IS NOT NULL;
                    """
            insert = """
                        UPDATE {0}.status
                        SET status='invalid', last_checked= %s,cpr_number=%s
                        WHERE resource_id= %s
                        returning *
            ;"""

            cur = conn.cursor()
            cur.execute(select.format(db_name), [resource[0]])
            if (len(cur.fetchall()) >
                    0):  #There was an exception made for this resource
                print("Exception was made for package with id: %s ignoring." %
                      resource[0])
                return

            cur.execute(insert.format(db_name), [current_time, iscpr[1], id])
            conn.commit()
            conn.close()

            try:
                print("Making dataset private")
                package_id = resource[0]
                package = get_action('package_show')({}, {'id': package_id})
            except Exception as e:
                print("Could not show package")
                print(e.message)
                sys.exit(1)
            try:
                if (
                        package["private"] == True
                ):  #If the dataset is already private, we do not need to send an email otherwise we spam
                    return
                package["private"] = True
                get_action('package_update')({}, package)
                print(
                    "Made dataset with package id: " + package_id +
                    " private as it contains CPR data. Either add an exception or remove it from the site"
                )
                print(
                    "When an exception has been made or data altered, kindly mark data as public again"
                )

                recipient = config.get('ckan.cprvalidation.email', None)
                subject = "CPR fundet i datasæt: %s" % resource[0]
                body = "CPR data er fundet i datasættet med id: %s specifikt resourcen med id: %s \n Data er gjort privat, tjek data igennem og " \
                       "publicer igen eller tilføj en exception hvis du mener data ikke indeholder CPR og kan stå" \
                       " inde for dette." % (resource[0],id)
                try:
                    process = subprocess.Popen([
                        'mail', '-s', subject, '-r', "*****@*****.**",
                        recipient
                    ],
                                               stdin=subprocess.PIPE)
                except Exception, error:
                    print error
                process.communicate(body)

            except Exception as e:
                print("Could not update package")
                print(e.message)
                sys.exit(1)
Exemplo n.º 33
0
    def view(self):

        context = {
            'model': model,
            'session': model.Session,
            'user': c.user,
            'for_view': True,
            'auth_user_obj': c.userobj
        }

        # Obtenemos parametros de configuracion
        site_url = config.get('ckan.site_url') + config.get(
            'ckan.root_path').replace('{{LANG}}', '')

        # Inicializamos variables
        now = d.datetime.now()
        year_from = 1989
        year_to = now.year

        # Comprobamos si es un usuario identificado o no
        logged_in = False
        if 'user' in context and context['user']:
            logged_in = True

        # Si se pide el catalogo publico ignoramos el usuario identificado
        if logged_in and 'public' in request.params and request.params.get(
                'public') == 'true':
            logged_in = False

        if not logged_in:
            # Obtenemos el catalogo para usuarios no identificados
            packages = t.get_action('package_search')(context, {
                'include_private': False,
                'rows': 1000,
                'sort': 'name asc'
            })
            packages = packages['results']
        elif c.userobj.sysadmin:
            # Obtenemos el catalogo para sysadmin
            packages = t.get_action('package_search')(context, {
                'include_private': True,
                'rows': 1000,
                'sort': 'name asc'
            })
            packages = packages['results']
        else:
            # Obtenemos el catalogo para usuarios identificados: solo los de sus organizaciones
            user_org = t.get_action('organization_list_for_user')(
                context, {
                    'permission': 'create_dataset'
                })
            org_facets = ''
            for org in user_org:
                if org_facets != '':
                    org_facets = org_facets + ' OR '
                org_facets = org_facets + org['name']

            packages = t.get_action('package_search')(
                context, {
                    'fq': 'organization:(' + org_facets + ')',
                    'include_private': True,
                    'rows': 1000,
                    'sort': 'name asc'
                })
            packages = packages['results']

        # obtenemos los formatos
        formats = t.get_action('format_autocomplete')(context, {
            'q': '',
            'limit': 50
        })
        # puede devolver formatos duplicados, lo convertimos a un set que eliminara los elementos
        # duplicados y de nuevo a una lista
        formats = list(set(formats))

        for format in formats:
            format_strip = format.strip()
            if not format_strip:
                formats.remove(format)

        # Realizamos conexion a la BBDD de Drupal para obtener el numero de comentarios de cada dataset y almacenamos los valores en un array
        dbc = parse_db_config('ckan.drupal.url')
        ckan_conn_string = "host='%s' port='%s' dbname='%s' user='******' password='******'" % (
            dbc['db_host'], dbc['db_port'], dbc['db_name'], dbc['db_user'],
            dbc['db_pass'])
        ckan_conn = psycopg2.connect(ckan_conn_string)
        ckan_cursor = ckan_conn.cursor()
        ckan_cursor.execute(
            """SELECT OP.pkg_name, COUNT(*) FROM opendata_package OP INNER JOIN node N ON N.tnid = OP.pkg_node_id INNER JOIN comment C ON C.nid = N.nid WHERE N.tnid != 0 GROUP BY OP.pkg_name;"""
        )

        comments = {}

        for row in ckan_cursor:
            comments.update({row[0]: row[1]})
        ckan_cursor.close()
        ckan_conn.close()

        sql_downloads = '''select sum(count) AS downloads, sum(count_absolute) AS downloads_absolute, t.tracking_type, p.name from tracking_summary t
                                inner join resource r ON r.id = t.resource_id
                                inner join package p ON p.id = r.package_id
                                GROUP BY p.name, t.tracking_type;'''
        results_downloads = model.Session.execute(sql_downloads)

        downloads = {}
        downloads_absolute = {}
        api_access_number = {}
        api_access_number_absolute = {}

        for row in results_downloads:
            if row.tracking_type == 'resource':
                downloads.update({row.name: row.downloads})
                downloads_absolute.update({row.name: row.downloads_absolute})
            else:
                api_access_number.update({row.name: row.downloads})
                api_access_number_absolute.update(
                    {row.name: row.downloads_absolute})

        sql_views = '''SELECT t.tracking_date, t.running_total, t.recent_views, t.package_id
                            FROM tracking_summary t
                            INNER JOIN
                                (SELECT package_id, MAX(tracking_date) AS tracking_date
                                FROM tracking_summary 
                                GROUP BY package_id) t2
                                ON t.package_id = t2.package_id
                            INNER JOIN package p ON p.id = t.package_id
                            AND t.tracking_date = t2.tracking_date;'''
        results_views = model.Session.execute(sql_views)

        tracking_total = {}
        tracking_recent = {}

        for row in results_views:
            tracking_total.update({row.package_id: row.running_total})
            tracking_recent.update({row.package_id: row.recent_views})

        # Incluimos la informacion que necesitamos mostrar para cada dataset
        for package in packages:
            for key in package['notes_translated']:
                if package['notes_translated'][key]:
                    package['notes_translated'][key] = package[
                        'notes_translated'][key].replace('\n', ' ').replace(
                            '\r', ' ')

            # Obtenemos un string con las etiquetas
            tags = ''
            for tag in package['tags']:
                tags = tags + ' ' + tag['display_name']
            package['flattened_tags'] = tags

            # Obtenemos un string con los formatos de sus recursos, el total de descargas y el valor de openness_score del dataset
            # y si el dataset esta automatizado
            flattened_formats = ','
            qa = 0
            automatic = 'N'

            if 'update_string' in package and package['update_string']:
                automatic = 'S'

            for resource in package['resources']:
                if resource['format'].lower() not in flattened_formats:
                    # Lo rodeamos con otros caracteres para que los strings contenidos en otros no den resultado "true" (ej: XLS y XLSX)
                    flattened_formats = flattened_formats + resource[
                        'format'].lower() + ','

                if automatic == 'N':
                    if (not resource['url_type'] == 'upload'
                            and not '/resources/opendata/' in resource['url']
                            and not '/resource/' + resource['id'] +
                            '/download/' in resource['url']):
                        automatic = 'S'

                if 'qa' in resource:
                    resource_qa = ast.literal_eval(resource['qa'])
                    if (resource_qa['openness_score'] > qa):
                        qa = int(resource_qa['openness_score'])

            package['flattened_formats'] = flattened_formats
            package['automatic'] = automatic
            package['qa'] = qa

            # Establecemos la tabla de formatos para cada dataset
            package['formats'] = OrderedDict()

            for format in formats:
                format_value = 'N'
                if ',' + format + ',' in flattened_formats:
                    format_value = 'S'

                package['formats'][format] = format_value

            # Establecemos la tabla de anyos para cada dataset
            package['years'] = OrderedDict()
            for year in range(year_from, year_to + 1):
                year_value = 'N'
                if 'Any ' + str(year) in package['flattened_tags']:
                    year_value = 'S'

                package['years'][year] = year_value

            # Escapamos los campos de texto
            self.escape_text(package)
            self.escape_translated_text(package)

            # Obtenemos numero comentarios
            if (package['name'] in comments):
                package['comments'] = comments[package['name']]
            else:
                package['comments'] = 0

            if (package['name'] in downloads):
                package['downloads'] = downloads[package['name']]
            else:
                package['downloads'] = 0

            if (package['name'] in downloads_absolute):
                package['downloads_absolute'] = downloads_absolute[
                    package['name']]
            else:
                package['downloads_absolute'] = 0

            if (package['name'] in api_access_number):
                package['api_access_number'] = api_access_number[
                    package['name']]
            else:
                package['api_access_number'] = 0

            if (package['name'] in api_access_number_absolute):
                package[
                    'api_access_number_absolute'] = api_access_number_absolute[
                        package['name']]
            else:
                package['api_access_number_absolute'] = 0

            if (package['id'] in tracking_total):
                package['tracking_total'] = tracking_total[package['id']]
            else:
                package['tracking_total'] = 0

            if (package['id'] in tracking_recent):
                package['tracking_recent'] = tracking_recent[package['id']]
            else:
                package['tracking_recent'] = 0

        curdate = d.datetime.now().strftime('%Y-%m-%d_%H-%M')
        t.response.headers['Content-Type'] = 'application/csv; charset=utf-8'
        t.response.headers[
            'Content-Disposition'] = 'attachment; filename=catalegBCN_' + curdate + '.csv'
        return t.render('cataleg.csv',
                        extra_vars={
                            'site_url': site_url,
                            'packages': packages,
                            'logged_in': logged_in,
                            'formats': formats,
                            'year_from': year_from,
                            'year_to': year_to,
                            'user': c.user,
                            'auth_user_obj': c.userobj,
                            'request': request
                        })
Exemplo n.º 34
0
Arquivo: db.py Projeto: hasadna/ckan
def _get_read_only_user(data_dict):
    parsed = cli.parse_db_config("ckan.datastore.read_url")
    return parsed["db_user"]
Exemplo n.º 35
0
def _get_read_only_user(data_dict):
    parsed = cli.parse_db_config('ckan.datastore.read_url')
    return parsed['db_user']
Exemplo n.º 36
0
def _get_read_only_user(data_dict):
    parsed = cli.parse_db_config('ckan.datastore.read_url')
    return parsed['db_user']
Exemplo n.º 37
0
def updateSchema(resources):
    #Connect to the database
    d_port = config.get('ckan.cprvalidation.postgres_port', None)
    d_pass = config.get('ckan.cprvalidation.cprvalidation_password', None)
    db_name = config.get('ckan.cprvalidation.cprvalidation_db', None)

    try:
        db_config = parse_db_config()
        host = db_config.get('db_host')
        conn = psycopg2.connect(database=db_name,
                                host=host,
                                user="******",
                                password=d_pass,
                                port=d_port)
    except Exception as e:
        print(e)
        sys.exit()

    # Fetch all resources from the database
    print("Looking for new resources..")
    cur = conn.cursor()
    print db_name
    cur.execute("""SELECT resource_id, last_updated FROM {0}.status;
        """.format(db_name))
    database_resources = cur.fetchall()
    # These are new resources
    difference_insert = list(
        set([str(r['id'])
             for r in resources]) - set(r[0] for r in database_resources))
    difference_update = list(
        set([(str(r['metadata_modified']).replace("T", " "))
             for r in resources]) - set(str(r[1]) for r in database_resources))

    insert = """
                INSERT INTO {0}.status values %s
                ON CONFLICT (resource_id) DO
                  UPDATE SET last_updated = %s
                returning *
            ;"""
    update = """
                    UPDATE {0}.status SET last_updated = %s
                    WHERE resource_id = %s
                    returning *
                ;"""
    '''       Overview of the table order
     (
              package_id character varying NOT NULL,
              resource_id character varying NOT NULL,
              status character varying, -- valid, invalid, pending
              format character varying NOT NULL,
              url character varying,
              url_type character varying,
              datastore_active character varying,
              last_checked character varying,
              last_updated character varying,
              excepted BOOLEAN
              CONSTRAINT status_pkey PRIMARY KEY (resource_id)
            )
    '''
    #For each new resource, add them to the schema and set their status to pending
    count = 0
    for id in difference_insert:
        count += 1
        dict = find(resources, "id", id)
        i = (
            dict["package_id"],
            dict["id"],
            "pending",
            str(dict["format"]).lower(),
            dict["url"],
            dict["url_type"],
            dict["datastore_active"],
            None,
            dict["metadata_modified"],
        )
        u = dict["metadata_modified"]
        cur.execute(insert.format(db_name), (i, u))
    print("Inserted %d new resources to the database \n" % count)

    # # #
    # Update the information for last_updated
    # # #
    count = 0
    for date in difference_update:
        #Multiple resources can share the same metadata_modified, so check them all
        dicts = findall(resources, "metadata_modified", date.replace(" ", "T"))
        for dict in dicts:
            count += 1
            i = dict["metadata_modified"]
            try:
                cur.execute(update.format(db_name), (i, dict["id"]))
            except Exception as e:
                print(e.message)
    print("Updated %d new resources to the database \n" % count)
    try:
        conn.commit()
        conn.close()
    except Exception as e:
        print(e.message)