def check_project_indexing_finished(project_name):
    """Check if we can find lopq_model and lopd_codes.
    """
    if data['projects'][project_name]['status'] == 'indexing' or data['projects'][project_name]['status'] == 'rerunning':
        # look for columns lopq_model and lopd_codes in hbase update table row of this ingestion
        ingestion_id = data['projects'][project_name]['ingestion_id']
        logger.info('[check_project_indexing_finished: log] checking if ingestion %s has completed.' % (ingestion_id))
        try:
            from happybase.connection import Connection
            conn = Connection(config['image']['hbase_host'])
            table = conn.table(config['image']['hbase_table_updates'])
            columns=[config['image']['lopq_model_column'], config['image']['lopq_codes_column']]
            row = table.row(ingestion_id, columns=columns)
            # if found, copy to domain data folder
            if len(row)==len(columns):
                logger.info('[check_project_indexing_finished: log] ingestion %s looks completed' % (ingestion_id))
                # copy codes first
                local_codes_path = os.path.join(_get_domain_dir_path(data['projects'][project_name]['domain']), config['image']['lopq_codes_local_suffix'])
                _copy_from_hdfs(row[config['image']['lopq_codes_column']], local_codes_path)
                local_model_path = os.path.join(_get_domain_dir_path(data['projects'][project_name]['domain']), config['image']['lopq_model_local_suffix'])
                _copy_from_hdfs(row[config['image']['lopq_model_column']], local_model_path)
                if os.path.exists(local_codes_path) and os.path.exists(local_model_path):
                    logger.info('[check_project_indexing_finished: log] ingestion %s has completed and should be ready now...' % (ingestion_id))
                    data['projects'][project_name]['status'] = 'ready'
                else:
                    data['projects'][project_name]['status'] = 'failed'
                    logger.info('[check_project_indexing_finished: log] ingestion %s has completed but local copy failed...' % (ingestion_id))
                    # for debugging store infos: row[config['image']['lopq_codes_column']], row[config['image']['lopq_model_column']]
            else: # else, 
                # check the job is still running
                job_id = data['projects'][project_name]['job_id']
                output = get_job_info(job_id)
                if output['status'] == 'RUNNING':
                    pass # we just have to wait for the job to end
                else:
                    # if it is not, the job failed... what should we do?
                    # mark project as failed?
                    if data['projects'][project_name]['status'] == 'indexing':
                        # TODO: check why resubmission does not work.
                        # try to rerun once
                        logger.info('[check_project_indexing_finished: log] rerunning ingestion %s which has failed once...' % (ingestion_id))
                        endpt = "/cu_imgsearch_manager/projects/{}".format(project_name)
                        #pingback_url = config['image']['base_service_url']+endpt
                        pingback_url = config['image']['base_service_url_vpn']+endpt
                        domain_name = data['projects'][project_name]['domain']
                        rerun_output = rerun_job(job_id, data['projects'][project_name]['ingestion_id'], data['domains'][domain_name]['table_sha1infos'], pingback_url)
                        data['projects'][project_name]['status'] = 'rerunning'
                        logger.info('[check_project_indexing_finished: log] resubmission output was: {}'.format(rerun_output))
                    elif data['projects'][project_name]['status'] == 'rerunning':
                        logger.info('[check_project_indexing_finished: log] ingestion %s has failed twice...' % (ingestion_id))
                        logger.info('[check_project_indexing_finished: log] job info output was: {}'.format(output))
                        data['projects'][project_name]['status'] = 'failed'
                        # for debugging store info: output
        except Exception as inst:
            logger.error('[check_project_indexing_finished: error] {}'.format(inst))
Exemplo n.º 2
0
def check_project_indexing_finished(project_name):
    """Check if we can find lopq_model and lopd_codes.
    """
    if data['projects'][project_name]['status'] == 'indexing' or data['projects'][project_name]['status'] == 'rerunning':
        # look for columns lopq_model and lopd_codes in hbase update table row of this ingestion
        ingestion_id = data['projects'][project_name]['ingestion_id']
        logger.info('[check_project_indexing_finished: log] checking if ingestion %s has completed.' % (ingestion_id))
        try:
            from happybase.connection import Connection
            conn = Connection(config['image']['hbase_host'])
            table = conn.table(config['image']['hbase_table_updates'])
            columns=[config['image']['lopq_model_column'], config['image']['lopq_codes_column']]
            row = table.row(ingestion_id, columns=columns)
            # if found, copy to domain data folder
            if len(row)==len(columns):
                logger.info('[check_project_indexing_finished: log] ingestion %s looks completed' % (ingestion_id))
                # copy codes first
                local_codes_path = os.path.join(_get_domain_dir_path(data['projects'][project_name]['domain']), config['image']['lopq_codes_local_suffix'])
                _copy_from_hdfs(row[config['image']['lopq_codes_column']], local_codes_path)
                local_model_path = os.path.join(_get_domain_dir_path(data['projects'][project_name]['domain']), config['image']['lopq_model_local_suffix'])
                _copy_from_hdfs(row[config['image']['lopq_model_column']], local_model_path)
                if os.path.exists(local_codes_path) and os.path.exists(local_model_path):
                    logger.info('[check_project_indexing_finished: log] ingestion %s has completed and should be ready now...' % (ingestion_id))
                    data['projects'][project_name]['status'] = 'ready'
                else:
                    data['projects'][project_name]['status'] = 'failed'
                    logger.info('[check_project_indexing_finished: log] ingestion %s has completed but local copy failed...' % (ingestion_id))
                    # for debugging store infos: row[config['image']['lopq_codes_column']], row[config['image']['lopq_model_column']]
            else: # else, 
                # check the job is still running
                job_id = data['projects'][project_name]['job_id']
                output = get_job_info(job_id)
                if output['status'] == 'RUNNING':
                    pass # we just have to wait for the job to end
                else:
                    # if it is not, the job failed... what should we do?
                    # mark project as failed?
                    if data['projects'][project_name]['status'] == 'indexing':
                        # TODO: check why resubmission does not work.
                        # try to rerun once
                        logger.info('[check_project_indexing_finished: log] rerunning ingestion %s which has failed once...' % (ingestion_id))
                        endpt = "/cu_imgsearch_manager/projects/{}".format(project_name)
                        #pingback_url = config['image']['base_service_url']+endpt
                        pingback_url = config['image']['base_service_url_vpn']+endpt
                        domain_name = data['projects'][project_name]['domain']
                        rerun_output = rerun_job(job_id, data['projects'][project_name]['ingestion_id'], data['domains'][domain_name]['table_sha1infos'], pingback_url)
                        data['projects'][project_name]['status'] = 'rerunning'
                        logger.info('[check_project_indexing_finished: log] resubmission output was: {}'.format(rerun_output))
                    elif data['projects'][project_name]['status'] == 'rerunning':
                        logger.info('[check_project_indexing_finished: log] ingestion %s has failed twice...' % (ingestion_id))
                        logger.info('[check_project_indexing_finished: log] job info output was: {}'.format(output))
                        data['projects'][project_name]['status'] = 'failed'
                        # for debugging store info: output
        except Exception as inst:
            logger.error('[check_project_indexing_finished: error] {}'.format(inst))
    def delete(self, project_name):
        if project_name not in data['projects']:
            return rest.not_found()
        try:
            project_lock.acquire(project_name)
            # - get corresponding domain
            domain_name = data['projects'][project_name]['domain']
            # - delete ingestion_id row from hbase updates table
            # should we delete corresponding files on HDFS?
            # delete hbase table sha1_infos?
            ingestion_id = data['projects'][project_name]['ingestion_id']
            from happybase.connection import Connection
            conn = Connection(config['image']['hbase_host'])
            table = conn.table(config['image']['hbase_table_updates'])
            table.delete(ingestion_id, columns=['info:lopq_codes_path', 'info:lopq_model_pkl'])
            # remove project:
            # - from current data dict
            del data['projects'][project_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_project_dir_path(project_name)))
            # - from mongodb
            db_projects.delete_one({'project_name':project_name})
            msg = 'project {} has been deleted'.format(project_name)
            logger.info(msg)
            # if it's the last project from a domain, shoud we remove the domain?
            # for now assume one project per domain and delete too
            # stop and remove docker container
            docker_name = data['domains'][domain_name]['docker_name']
            subproc = sub.Popen("sudo docker stop {}; sudo docker rm {}".format(docker_name, docker_name), shell=True)
            # cleanup ports list
            data['ports'].remove(data['domains'][domain_name]['port'])
            # remove domain:
            # - from current data dict
            del data['domains'][domain_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_domain_dir_path(domain_name)))
            # - from mongodb
            db_domains.delete_one({'domain_name':domain_name})
            # should we also clean up things in HDFS?...
            msg2 = 'domain {} has been deleted'.format(domain_name)
            logger.info(msg2)
            # regenerate apache conf from scratch for domains that are still active.
            reset_apache_conf()

            return rest.deleted(msg+' '+msg2)
        except Exception as e:
            logger.error('deleting project %s: %s' % (project_name, e.message))
            return rest.internal_error('deleting project %s error, halted.' % project_name)
        finally:
            project_lock.remove(project_name)
Exemplo n.º 4
0
    def delete(self, project_name):
        if project_name not in data['projects']:
            return rest.not_found()
        try:
            project_lock.acquire(project_name)
            # - get corresponding domain
            domain_name = data['projects'][project_name]['domain']
            # - delete ingestion_id row from hbase updates table
            # should we delete corresponding files on HDFS?
            # delete hbase table sha1_infos?
            ingestion_id = data['projects'][project_name]['ingestion_id']
            from happybase.connection import Connection
            conn = Connection(config['image']['hbase_host'])
            table = conn.table(config['image']['hbase_table_updates'])
            table.delete(ingestion_id, columns=['info:lopq_codes_path', 'info:lopq_model_pkl'])
            # remove project:
            # - from current data dict
            del data['projects'][project_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_project_dir_path(project_name)))
            # - from mongodb
            db_projects.delete_one({'project_name':project_name})
            msg = 'project {} has been deleted'.format(project_name)
            logger.info(msg)
            # if it's the last project from a domain, shoud we remove the domain?
            # for now assume one project per domain and delete too
            # stop and remove docker container
            docker_name = data['domains'][domain_name]['docker_name']
            subproc = sub.Popen("sudo docker stop {}; sudo docker rm {}".format(docker_name, docker_name), shell=True)
            # cleanup ports list
            data['ports'].remove(data['domains'][domain_name]['port'])
            # remove domain:
            # - from current data dict
            del data['domains'][domain_name]
            # - files associated with project
            shutil.rmtree(os.path.join(_get_domain_dir_path(domain_name)))
            # - from mongodb
            db_domains.delete_one({'domain_name':domain_name})
            # should we also clean up things in HDFS?...
            msg2 = 'domain {} has been deleted'.format(domain_name)
            logger.info(msg2)
            # regenerate apache conf from scratch for domains that are still active.
            reset_apache_conf()

            return rest.deleted(msg+' '+msg2)
        except Exception as e:
            logger.error('deleting project %s: %s' % (project_name, e.message))
            return rest.internal_error('deleting project %s error, halted.' % project_name)
        finally:
            project_lock.remove(project_name)
def set_connection():
    client = client_mod.Client(admin=True)
    instance = client.instance(INSTANCE_ID, LOCATION_ID)
    operation = instance.create()
    if not _wait_until_complete(operation):
        raise RuntimeError('Instance creation exceed 5 seconds.')
    Config.CONNECTION = Connection(instance=instance)
def get_create_table(table_name, options, families={'info': dict()}):
    try:
        from happybase.connection import Connection
        conn = Connection(options.hbase_ip)
        try:
            table = conn.table(table_name)
            # this would fail if table does not exist
            fam = table.families()
            return table
        # what exception would be raised if table does not exist, actually none.
        # need to try to access families to get error
        except Exception as inst:
            print "[get_create_table: info] table {} does not exist (yet)".format(table_name)
            conn.create_table(table_name, families)
            table = conn.table(table_name)
            print "[get_create_table: info] created table {}".format(table_name)
            return table
    except Exception as inst:
        print inst
 def get_create_table(self,
                      table_name,
                      conn=None,
                      families={'info': dict()}):
     try:
         if conn is None:
             from happybase.connection import Connection
             conn = Connection(self.hbase_host)
         try:
             # what exception would be raised if table does not exist, actually none.
             # need to try to access families to get error
             table = conn.table(table_name)
             # this would fail if table does not exist
             _ = table.families()
             return table
         except Exception as inst:
             # TODO: act differently based on error type (connection issue or actually table missing)
             if type(inst) == TTransportException:
                 raise inst
             else:
                 print "[{}.get_create_table: info] table {} does not exist (yet): {}{}".format(
                     self.pp, table_name, type(inst), inst)
                 conn.create_table(table_name, families)
                 table = conn.table(table_name)
                 print "[{}.get_create_table: info] created table {}".format(
                     self.pp, table_name)
                 return table
     except Exception as inst:
         # May fail if families in dictionary do not match those of an existing table, or because of connection issues?
         # Should we raise it up?
         #pass
         raise inst
Exemplo n.º 8
0
    def __init__(self, size, **kwargs):
        if not isinstance(size, six.integer_types):
            raise TypeError('Pool size arg must be an integer')

        if size < _MIN_POOL_SIZE:
            raise ValueError('Pool size must be positive')

        self._lock = threading.Lock()
        self._queue = six.moves.queue.LifoQueue(maxsize=size)
        self._thread_connections = threading.local()

        connection_kwargs = kwargs
        connection_kwargs['autoconnect'] = False
        if 'instance' not in connection_kwargs:
            connection_kwargs['instance'] = _get_instance(
                timeout=kwargs.get('timeout'))

        for _ in six.moves.range(size):
            connection = Connection(**connection_kwargs)
            self._queue.put(connection)
Exemplo n.º 9
0
def get_create_table(table_name, options, families={'info': dict()}):
    try:
        from happybase.connection import Connection
        conn = Connection(options.hbase_ip)
        try:
            table = conn.table(table_name)
            # this would fail if table does not exist
            fam = table.families()
            return table
        # what exception would be raised if table does not exist, actually none.
        # need to try to access families to get error
        except Exception as inst:
            print "[get_create_table: info] table {} does not exist (yet)".format(table_name)
            conn.create_table(table_name, families)
            table = conn.table(table_name)
            print "[get_create_table: info] created table {}".format(table_name)
            return table
    except Exception as inst:
        print inst
Exemplo n.º 10
0
    def get_create_table(self, table_name, conn=None, families=None):
        """Get HBase table "table_name", creating it if it does not exist yet.

    :param table_name: name of the table to create.
    :type table_name: string
    :param conn: happybase connection
    :type conn: :class:`happybase.Connection`
    :param families: dictionary of column families (see ``get_dictcf_sha1_table`` and ``get_dictcf_update_table``)
    :type families: dict
    :return: table
    :rtype: :class:`happybase.Table`
    """
        # try:
        if conn is None:
            from happybase.connection import Connection
            conn = Connection(host=self.hbase_host, port=self.hbase_port)
        try:
            # as no exception would be raised if table does not exist...
            table = conn.table(table_name)
            # ...try to access families to get error if table does not exist yet
            _ = table.families()
            # table exist, return it
            return table
        except Exception as inst:
            # act differently based on error type (connection issue or actually table missing)
            if type(inst) == TTransportException:
                raise inst
            else:
                # we need to create the table
                msg = "[{}.get_create_table: info] table {} does not exist (yet): {}{}"
                print(msg.format(self.pp, table_name, type(inst), inst))
                # but we need to know which column families it should contain
                if families is None:
                    msg = "[{}.get_create_table: ERROR] table {} does not exist and 'families' not provided"
                    raise ValueError(msg.format(self.pp, table_name))
                # Create table...
                conn.create_table(table_name, families)
                table = conn.table(table_name)
                msg = "[{}.get_create_table: info] created table {}"
                print(msg.format(self.pp, table_name))
                # ... and return it
                return table