Пример #1
0
    def hash_control(self):
        """This method will create a table that keeps control about all the
        system uploaded data
        """
        import hashlib
        from datetime import datetime
        import shutil

        # Generate hash with file content
        h = hashlib.md5()
        f = open(self.response, 'r')
        h.update(f.read())

        # Copy file to repository
        session = model.Session
        #metadata = model.metadata

        # Create table if it doesn't exists
        setup_model()

        # First check if hash is already in database
        results = session.query(DataRepository.hash).filter_by(hash=h.hexdigest()).all()
        #self.log(results)

        if len(results) > 0:
            #log.error('This file %s has the same hash of a file already in\
            #    database. Aborting' % self.response)
            self.log( 'This file %s has the same hash of a file already in\
                database. Aborting' % self.response)
            os.remove(self.response)
            return True

        # Today's date
        file_date = datetime.today()

        # Filename hash to store
        filename, extension = os.path.splitext(os.path.basename(self.response))
        h2 = hashlib.md5()
        h2.update(file_date.__str__() + filename)
        filename = h2.hexdigest() + extension

        # Now add full repository path to filename
        filename2 = os.path.join(self.repository,filename)

        # Now insert data and copy file to repository
        #log.warning('Inserting file %s in repository' % self.response)
        self.log('Inserting file %s in repository' % self.response)

        # Copy file to repository
        shutil.copy2(self.response,filename2)

        # insert info in database
        repository = DataRepository(hash=h.hexdigest(), creation_date=file_date.today(), original_file = filename2, package_file=self.response)
        session.add(repository)
        session.commit()

        #log.warning('File inserted')
        self.log('File inserted')

        # Remove other file
        os.remove(self.response)

        self.response = filename2

        return False
Пример #2
0
    def log_error(self,file_dict):
        """
        Keep a repository of import errors in repository
        """
        import hashlib
        from datetime import datetime
        import shutil

        # Today's date
        file_date = datetime.today()

        # Generate hash with file content
        h = hashlib.md5()
        f = open(file_dict['tmpfile'], 'r')
        h.update(f.read() + file_date.__str__())
        f.close()

        # Copy file to repository
        session = model.Session

        # Create table if it doesn't exists
        setup_model()

        # First check if hash is already in database
        results = session.query(ErrorRepository.hash).filter_by(hash=h.hexdigest()).all()

        if len(results) > 0:
            self.log( 'This file %s has the same hash of a file already in\
                database. Aborting' % file_dict['filename'])
            os.remove(file_dict['tmpfile'])
            return

        # Filename hash to store
        filename3, extension = os.path.splitext(os.path.basename(file_dict['filename']))
        filename3 = file_date.__str__() + '-' + filename3 + extension

        # Now add full repository path to filename
        filename2 =  os.path.join(self.repository,os.path.join('import_errors',filename3.replace(' ', '-')))

        # Now insert data and copy file to repository
        #log.error('Error parsing file %s. Inserting in repository' % file_dict['filename'])
        self.log('Error in file %s. Inserting in repository with message\n %s' % (file_dict['filename'],file_dict.get('errmsg')))

        # Create base dir if it doesn't exist
        if not os.path.exists(os.path.join(self.repository,'import_errors')):
            os.mkdir(os.path.join(self.repository,'import_errors'), 0770)

        # Copy file to repository
        shutil.copy2(file_dict['tmpfile'],filename2)

        # insert info in database
        repository = ErrorRepository(
          hash=h.hexdigest(),
          creation_date=file_date.today(),
          original_file=filename2,
          errmsg=file_dict.get('errmsg'),
          error_type=file_dict.get('error_type'),
          package_file=file_dict.get('package_file')
        )
        session.add(repository)
        session.commit()

        #log.warning('File inserted')
        self.log('File inserted')

        # Remove other file
        os.remove(file_dict['tmpfile'])
Пример #3
0
class lightbaseAdminController(AdminController):
    """
    Add controller to log page
    """
    def log(self):

        import sqlalchemy
        from ckan import plugins, model
        from ckanext.datadaemon.model import setup as setup_model
        from ckanext.datadaemon.model import ErrorRepository

        # Adding pagination
        q = c.q = request.params.get('q',
                                     u'')  # unicode format (decoded from utf8)
        try:
            page = int(request.params.get('page', 1))
        except ValueError, e:
            abort(400, ('"page" parameter must be an integer'))
        limit = 20

        # most search operations should reset the page counter:
        params_nopage = [(k, v) for k, v in request.params.items()
                         if k != 'page']

        def search_url(params):
            url = '/ckan-admin/log'
            params = [(k, v.encode('utf-8') if isinstance(v, basestring) else str(v)) \
                            for k, v in params]
            return url + u'?' + urlencode(params)

        def drill_down_url(**by):
            params = list(params_nopage)
            params.extend(by.items())
            return search_url(set(params))

        c.drill_down_url = drill_down_url

        def remove_field(key, value):
            params = list(params_nopage)
            params.remove((key, value))
            return search_url(params)

        c.remove_field = remove_field

        def pager_url(q=None, page=None):
            params = list(params_nopage)
            params.append(('page', page))
            return search_url(params)

        params = request.params
        setup_model()
        session = ckan.model.Session

        # Variáveis de entrada:
        # data: Postagem. Valores: 24, 168, 720
        # data_inicio
        # data_final
        # tipo: Tipo de Error. Valores: None, ParsingError, FileRetrievalError, FileCollectionError

        # Construir uma consulta com base nas variáveis de entrada

        # 1 - Criar consulta que traz todos os valores
        query = "SELECT e.* FROM dt_errors e WHERE 1 = 1"

        # 2 - Filtrar pela variável data. Ou eu filtro pelo período ou por data de início
        # e data de fim
        value_inicio = ""
        value_final = ""
        if params.get('data'):
            query = query + "AND creation_date >= (now() - interval '%s hours')" % params.get(
                'data')
        else:
            if params.get('data_inicio'):
                query = query + "AND creation_date >= '%s'" % params.get(
                    'data_inicio')
                value_inicio = params.get('data_inicio')
            if params.get('data_final'):
                query = query + "AND creation_date <= '%s'" % params.get(
                    'data_final')
                value_final = params.get('data_final')

        # 3 - Filtrar por tipo de erro
        if params.get('tipo') == 'None':
            query = query + "AND error_type is NULL "
        elif params.get('tipo'):
            query = query + "AND error_type = '%s'" % params.get('tipo')
        # 4 - Adicionar paginação à consulta
        query_pagination = query + "ORDER BY creation_date DESC LIMIT %s OFFSET %s" % (
            limit * page, (limit * page) - limit)

        error_list = session.query(ErrorRepository).from_statement(
            query_pagination).all()
        error_list2 = session.query(ErrorRepository).from_statement(
            query).all()
        error_list3 = session.query(
            ErrorRepository.original_file).from_statement(
                query_pagination).all()
        from os.path import basename
        retorno = list()
        for lista in error_list:
            lista.original_file = basename(lista.original_file)
            retorno.append(lista)

        tipos_de_erros = session.query(
            ErrorRepository.error_type).distinct().all()
        data = params.get('data')
        tipo = params.get('tipo')
        x = {
            'valor': retorno,
            'valor2': tipos_de_erros,
            'valor3': value_inicio,
            'valor4': value_final,
            'valor5': data,
            'valor6': tipo
        }

        c.page = h.Page(collection=error_list2,
                        page=page,
                        url=pager_url,
                        item_count=len(error_list2),
                        items_per_page=limit)
        #--------------Download do arquivo-------------
        from paste.fileapp import FileApp
        import mimetypes
        if params.get('arquivo'):
            filepath = session.query(ErrorRepository.original_file).filter(
                ErrorRepository.hash == params.get('arquivo')).all()
            content_type = mimetypes.guess_type(str(filepath[0][0]))
            if content_type:
                headers = [
                    ('Content-Disposition',
                     'attachment; filename=\"' + str(filepath[0][0]) + '\"'),
                    ('Content-Type', '\'' + str(content_type[0]) + '\'')
                ]
            else:
                headers = [
                    ('Content-Disposition',
                     'attachment; filename=\"' + str(filepath[0][0]) + '\"'),
                    ('Content-Type', 'aplication/octet-stream')
                ]

            fapp = FileApp(str(filepath[0][0]), headers)
            return fapp(request.environ, self.start_response)
        else:
            return render('admin/log.html', extra_vars=x)