Пример #1
0
    def _backend_setup(self, server=True, *args, **kwargs):
        """
    Args:
      server (bool): Whether this is the client or a server

    Raises:
      TurbiniaException: When there are errors creating PSQ Queue
    """

        log.debug(
            'Setting up PSQ Task Manager requirements on project {0:s}'.format(
                config.TURBINIA_PROJECT))
        self.server_pubsub = turbinia_pubsub.TurbiniaPubSub(
            config.PUBSUB_TOPIC)
        if server:
            self.server_pubsub.setup_subscriber()
        else:
            self.server_pubsub.setup_publisher()
        psq_publisher = pubsub.PublisherClient()
        psq_subscriber = pubsub.SubscriberClient()
        datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
        try:
            self.psq = psq.Queue(
                psq_publisher,
                psq_subscriber,
                config.TURBINIA_PROJECT,
                name=config.PSQ_TOPIC,
                storage=psq.DatastoreStorage(datastore_client))
        except exceptions.GoogleCloudError as e:
            msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
            log.error(msg)
            raise turbinia.TurbiniaException(msg)
Пример #2
0
def get_visitors_queue():
    client = pubsub.Client(project=current_app.config['PROJECT_ID'])

    # Create a queue specifically for processing books and pass in the
    # Flask application context. This ensures that tasks will have access
    # to any extensions / configuration specified to the app, such as
    # models.
    return psq.Queue(client, 'books', extra_context=current_app.app_context)
Пример #3
0
def get_daily_fetch_queue():
    #current_appを使うのでwith app.app_context():内で呼ばれる必要がある。
    project = current_app.config['PROJECT_ID']
    return psq.Queue(publisher_client,
                     subscriber_client,
                     project,
                     'daily_fetch_queue',
                     extra_context=current_app.app_context)
Пример #4
0
    def __init__(self, jobs_denylist=None, jobs_allowlist=None):
        """Initialization for PSQ Worker.

    Args:
      jobs_denylist (Optional[list[str]]): Jobs we will exclude from running
      jobs_allowlist (Optional[list[str]]): The only Jobs we will include to run
    """
        setup()
        psq_publisher = pubsub.PublisherClient()
        psq_subscriber = pubsub.SubscriberClient()
        datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
        try:
            self.psq = psq.Queue(
                psq_publisher,
                psq_subscriber,
                config.TURBINIA_PROJECT,
                name=config.PSQ_TOPIC,
                storage=psq.DatastoreStorage(datastore_client))
        except exceptions.GoogleCloudError as e:
            msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
            log.error(msg)
            raise TurbiniaException(msg)

        # Deregister jobs from denylist/allowlist.
        job_manager.JobsManager.DeregisterJobs(jobs_denylist, jobs_allowlist)
        disabled_jobs = list(
            config.DISABLED_JOBS) if config.DISABLED_JOBS else []
        disabled_jobs = [j.lower() for j in disabled_jobs]
        # Only actually disable jobs that have not been allowlisted.
        if jobs_allowlist:
            disabled_jobs = list(set(disabled_jobs) - set(jobs_allowlist))
        if disabled_jobs:
            log.info(
                'Disabling non-allowlisted jobs configured to be disabled in the '
                'config file: {0:s}'.format(', '.join(disabled_jobs)))
            job_manager.JobsManager.DeregisterJobs(jobs_denylist=disabled_jobs)

        # Check for valid dependencies/directories.
        dependencies = config.ParseDependencies()
        if config.DOCKER_ENABLED:
            try:
                check_docker_dependencies(dependencies)
            except TurbiniaException as e:
                log.warning(
                    "DOCKER_ENABLED=True is set in the config, but there is an error checking for the docker daemon: {0:s}"
                ).format(str(e))
        check_system_dependencies(dependencies)
        check_directory(config.MOUNT_DIR_PREFIX)
        check_directory(config.OUTPUT_DIR)
        check_directory(config.TMP_DIR)
        register_job_timeouts(dependencies)

        jobs = job_manager.JobsManager.GetJobNames()
        log.info('Dependency check complete. The following jobs are enabled '
                 'for this worker: {0:s}'.format(','.join(jobs)))
        log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
        self.worker = psq.Worker(queue=self.psq)
Пример #5
0
def get_moviess_queue():
    project = current_app.config['PROJECT_ID']

    # Create a queue specifically for processing moviess and pass in the
    # Flask application context. This ensures that tasks will have access
    # to any extensions / configuration specified to the app, such as
    # models.
    return psq.Queue(publisher_client,
                     subscriber_client,
                     project,
                     'moviess',
                     extra_context=current_app.app_context)
Пример #6
0
    def __init__(self, jobs_blacklist=None, jobs_whitelist=None):
        """Initialization for PSQ Worker.

    Args:
      jobs_blacklist (Optional[list[str]]): Jobs we will exclude from running
      jobs_whitelist (Optional[list[str]]): The only Jobs we will include to run
    """
        config.LoadConfig()
        psq_publisher = pubsub.PublisherClient()
        psq_subscriber = pubsub.SubscriberClient()
        datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
        try:
            self.psq = psq.Queue(
                psq_publisher,
                psq_subscriber,
                config.TURBINIA_PROJECT,
                name=config.PSQ_TOPIC,
                storage=psq.DatastoreStorage(datastore_client))
        except exceptions.GoogleCloudError as e:
            msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
            log.error(msg)
            raise TurbiniaException(msg)

        # Deregister jobs from blacklist/whitelist.
        disabled_jobs = list(
            config.DISABLED_JOBS) if config.DISABLED_JOBS else []
        job_manager.JobsManager.DeregisterJobs(jobs_blacklist, jobs_whitelist)
        if disabled_jobs:
            log.info(
                'Disabling jobs that were configured to be disabled in the '
                'config file: {0:s}'.format(', '.join(disabled_jobs)))
            job_manager.JobsManager.DeregisterJobs(
                jobs_blacklist=disabled_jobs)

        # Check for valid dependencies/directories.
        dependencies = config.ParseDependencies()
        if config.DOCKER_ENABLED:
            check_docker_dependencies(dependencies)
        check_system_dependencies(dependencies)
        check_directory(config.MOUNT_DIR_PREFIX)
        check_directory(config.OUTPUT_DIR)
        check_directory(config.TMP_DIR)

        jobs = job_manager.JobsManager.GetJobNames()
        log.info(
            'Dependency check complete. The following jobs will be enabled '
            'for this worker: {0:s}'.format(','.join(jobs)))
        log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
        self.worker = psq.Worker(queue=self.psq)
Пример #7
0
 def _backend_setup(self):
   log.debug(
       'Setting up PSQ Task Manager requirements on project {0:s}'.format(
           config.PROJECT))
   self.server_pubsub = turbinia_pubsub.TurbiniaPubSub(config.PUBSUB_TOPIC)
   self.server_pubsub.setup()
   psq_pubsub_client = pubsub.Client(project=config.PROJECT)
   datastore_client = datastore.Client(project=config.PROJECT)
   try:
     self.psq = psq.Queue(
         psq_pubsub_client,
         config.PSQ_TOPIC,
         storage=psq.DatastoreStorage(datastore_client))
   except GaxError as e:
     msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
     log.error(msg)
     raise turbinia.TurbiniaException(msg)
Пример #8
0
 def _backend_setup(self):
     psq_publisher = pubsub.PublisherClient()
     psq_subscriber = pubsub.SubscriberClient()
     datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
     try:
         self.psq = psq.Queue(
             psq_publisher,
             psq_subscriber,
             config.TURBINIA_PROJECT,
             name=config.PSQ_TOPIC,
             storage=psq.DatastoreStorage(datastore_client))
     except exceptions.GoogleCloudError as e:
         msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
         log.error(msg)
         raise TurbiniaException(msg)
     log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
     self.worker = psq.Worker(queue=self.psq)
Пример #9
0
  def __init__(self, *_, **__):
    """Initialization for PSQ Worker."""
    config.LoadConfig()
    psq_publisher = pubsub.PublisherClient()
    psq_subscriber = pubsub.SubscriberClient()
    datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
    try:
      self.psq = psq.Queue(
          psq_publisher, psq_subscriber, config.TURBINIA_PROJECT,
          name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client))
    except exceptions.GoogleCloudError as e:
      msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
      log.error(msg)
      raise TurbiniaException(msg)

    check_directory(config.MOUNT_DIR_PREFIX)
    check_directory(config.OUTPUT_DIR)
    check_directory(config.TMP_DIR)

    log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
    self.worker = psq.Worker(queue=self.psq)
Пример #10
0
def scrape_reddit_task(subreddit, pages=20):
    for image_urls in scrape_reddit(subreddit, pages):
        q = psq.Queue(pubsub.Client(), 'images')
        q.enqueue('main.label_images_task', image_urls)
        print("Enqueued {} images".format(len(image_urls)))
Пример #11
0
    response = vision.detect_labels(image_contents)

    for image_url, labels in zip(image_urls, response):
        storage.add_labels(labels)
        storage.add_image(image_url, labels)


def label_images_task(image_urls):
    vision = VisionApi()
    storage = Storage()

    label_images(vision, storage, image_urls)


def scrape_reddit(subreddit, pages=10):
    after = None

    for _ in range(pages):
        posts, after = reddit.get_hot('aww', after=after)
        yield reddit.get_previews(posts)


def scrape_reddit_task(subreddit, pages=20):
    for image_urls in scrape_reddit(subreddit, pages):
        q = psq.Queue(pubsub.Client(), 'images')
        q.enqueue('main.label_images_task', image_urls)
        print("Enqueued {} images".format(len(image_urls)))


q = psq.Queue(pubsub.Client(), 'images')
Пример #12
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from gcloud import datastore, pubsub
import psq
import tasks

PROJECT_ID = 'your-project-id'

pubsub_client = pubsub.Client(project=PROJECT_ID)
datastore_client = datastore.Client(project=PROJECT_ID)

q = psq.Queue(
    pubsub_client,
    storage=psq.DatastoreStorage(datastore_client))


def main():
    q.enqueue(tasks.slow_task)
    q.enqueue(tasks.print_task, "Hello, World")
    r = q.enqueue(tasks.adder, 1, 5)
    print(r.result(timeout=10))


if __name__ == '__main__':
    main()
Пример #13
0
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import datastore, pubsub
import psq
import tasks

PROJECT_ID = 'junyiacademytest1'

pubsub_client = pubsub.Client(project=PROJECT_ID)
datastore_client = datastore.Client(project=PROJECT_ID)
storage = psq.DatastoreStorage(datastore_client)

q = psq.Queue(pubsub_client, storage=storage)


def main():
    r = q.enqueue(tasks.adder, 1, 5)
    print(r.result(timeout=10))
    storage.delete_task(r.task_id)


if __name__ == '__main__':
    main()
Пример #14
0
import logging

from google.cloud import datastore
from google.cloud import pubsub_v1
import psq
import tasks

PROJECT_ID = 'your-project-id'  # CHANGE ME

publisher_client = pubsub_v1.PublisherClient()
subscriber_client = pubsub_v1.SubscriberClient()
datastore_client = datastore.Client(project=PROJECT_ID)

q = psq.Queue(publisher_client,
              subscriber_client,
              PROJECT_ID,
              storage=psq.DatastoreStorage(datastore_client))


def main():
    q.enqueue(tasks.slow_task)
    q.enqueue(tasks.print_task, "Hello, World")
    r = q.enqueue(tasks.adder, 1, 5)
    print(r.result(timeout=10))


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    main()
Пример #15
0
def get_books_queue():
    ps_client = pubsub.Client(project=current_app.config['PROJECT_ID'])
    return psq.Queue(ps_client, extra_context=current_app.app_context)
Пример #16
0
def get_scraper_queue():
    project = current_app.config['PROJECT_ID']

    return psq.Queue(
        publisher_client, subscriber_client, project,
        'nogizaka_scraper', extra_context=current_app.app_context)