import logging from sqs_wrapper import SQSWrapper logger = logging.getLogger('sqs_reader') logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(name)s:%(message)s') formatter.default_msec_format = '%s.%03d' stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) sqs = SQSWrapper('default') for work_item in sqs.work_items(): logger.info(f'Got work item "{work_item}"') sqs.report_status(work_item, f'Reporting status for item {work_item}', is_fatal=False, is_complete=True)
import subprocess import glob import csv import sys import os import requests import shutil import json from pathlib import Path from sqs_wrapper import SQSWrapper sqs = SQSWrapper('default', 'igv') def run(cmd: str) -> None: subprocess.run(cmd, shell=True, check=True, stdout=sys.stdout) def get_bam(job_info: str, sample: str, token_file: str, chrom: str, bam_window_start: int, bam_window_end: int, variant_junction: str) -> None: print('Obtaining bam manifest file') bam_url = 'https://api.gdc.cancer.gov/files?filters=%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.data_format%22%2C%22value%22%3A%5B%22BAM%22%5D%7D%7D%2C%7B%22op%22%3A%22AND%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.experimental_strategy%22%2C%22value%22%3A%5B%22RNA-Seq%22%5D%7D%7D%2C%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22IN%22%2C%22content%22%3A%7B%22field%22%3A%22cases.project.program.name%22%2C%22value%22%3A%5B%22TCGA%22%5D%7D%7D%2C%7B%22op%22%3A%22IN%22%2C%22content%22%3A%7B%22field%22%3A%22cases.samples.submitter_id%22%2C%22value%22%3A%5B%22{0}%22%5D%7D%7D%5D%7D%5D%7D%5D%7D%5D%7D&query=files.data_format%20in%20%5B%22BAM%22%5D%20and%20files.experimental_strategy%20in%20%5B%22RNA-Seq%22%5D%20AND%20cases.project.program.name%20IN%20%5BTCGA%5D%20and%20cases.samples.submitter_id%20IN%20%5B%22P{0}&return_type=manifest'.format( sample) response = requests.get(bam_url) if response.status_code != 200: sqs.report_status( job_info, f'Failed to download bam manifest file: HTTP Status Code: {response.status_code}' ) sqs.queue_error_item(job_info)
subprocess.run(f'aws s3 cp {self.sample}.tar.gz {self.s3_archive_upload_url}/{self.cohort}/{self.sample}.tar.gz', shell=True, check=True) else: self.logger.error('S3 Archive Upload URL is not set - archive file not uploaded') logger = logging.getLogger('regtools_reader') logger.setLevel(logging.DEBUG) log_formatter = logging.Formatter('%(asctime)s:%(levelname)7s:%(name)s:%(message)s') log_formatter.default_msec_format = '%s.%03d' stream_handler = logging.StreamHandler() stream_handler.setFormatter(log_formatter) stream_handler.setLevel(logging.DEBUG) logger.addHandler(stream_handler) sqs = SQSWrapper('default') try: logger.info('Waiting for work item') for sample_id in sqs.work_items(): filesystem_path = 'regtools_wd_' + sample_id.split(';')[1] try: workflow = RegtoolsWorkflow(sample_id=sample_id, filesystem_path=filesystem_path, logger=logger, s3_token_download_url='s3://regtools-cwl-sharedfiles/gdc-user-token.txt', s3_archive_upload_url='s3://regtools-results-unstranded') file_handler = logging.FileHandler(f'{filesystem_path}/{sample_id}.log') file_handler.setFormatter(log_formatter) file_handler.setLevel(logging.DEBUG)
import logging import sys from pathlib import Path from sqs_wrapper import SQSWrapper logger = logging.getLogger('sqs_writer') logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(name)s:%(message)s') formatter.default_msec_format = '%s.%03d' stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) sqs = SQSWrapper('default', 'igv') if len(sys.argv) != 2: print('Input file required') sys.exit(-1) inputfile = Path(sys.argv[1]) # if not inputfile.exists(): # print('Input file does not exist') # sys.exit(-1) logger.info('Attempting to queue items') # with open(inputfile, 'r') as f: # items = f.readlines() # for item in items:
test_image, number_of_times_to_upsample=0, model="cnn") no = len(face_locations) for i in range(no): test_image_enc = face_recognition.face_encodings(test_image, face_locations, model="large")[i] np_name = clf.predict([test_image_enc]) name = np_name.tolist()[0] if name == 'kusudaaina': pil_image = Image.fromarray(test_image) split_path = os.path.split(image_url) pil_image.save(work_dir_root + 'save/' + tweet_id + "_" + split_path[1], quality=95) del pil_image return True return False work_dir_root = "/home/hogesako/poc_face_recognition/work/" with open(work_dir_root + 'model.pickle', mode='rb') as fp: clf = pickle.load(fp) sqs = SQSWrapper() while True: tweets = sqs.fetch_tweet() print('tweet count:' + str(len(tweets))) for tweet in tweets: analyze(tweet)
import datetime import json import logging from sqs_wrapper import SQSWrapper logger = logging.getLogger('sqs_status') logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(name)s:%(message)s') formatter.default_msec_format = '%s.%03d' stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) sqs = SQSWrapper('default') logger.info('Awaiting status messages') for status_item in sqs.status_items(): work_item = status_item['work_item'] message = status_item['message'] is_fatal = status_item['is_fatal'] is_complete = status_item['is_complete'] hostname = status_item['hostname'] logger.info(f'Got status item "{status_item}"') if status_item['is_fatal']: with open('failures_v5.tsv', 'w') as f: f.write(f'{datetime.datetime.now()}\t{work_item}\t{message}\n')