示例#1
0
import logging

from sqs_wrapper import SQSWrapper

logger = logging.getLogger('sqs_reader')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(name)s:%(message)s')
formatter.default_msec_format = '%s.%03d'
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

sqs = SQSWrapper('default')

for work_item in sqs.work_items():
    logger.info(f'Got work item "{work_item}"')
    sqs.report_status(work_item,
                      f'Reporting status for item {work_item}',
                      is_fatal=False,
                      is_complete=True)
示例#2
0
import subprocess
import glob
import csv
import sys
import os
import requests
import shutil
import json
from pathlib import Path
from sqs_wrapper import SQSWrapper

sqs = SQSWrapper('default', 'igv')


def run(cmd: str) -> None:
    subprocess.run(cmd, shell=True, check=True, stdout=sys.stdout)


def get_bam(job_info: str, sample: str, token_file: str, chrom: str,
            bam_window_start: int, bam_window_end: int,
            variant_junction: str) -> None:
    print('Obtaining bam manifest file')
    bam_url = 'https://api.gdc.cancer.gov/files?filters=%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.data_format%22%2C%22value%22%3A%5B%22BAM%22%5D%7D%7D%2C%7B%22op%22%3A%22AND%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.experimental_strategy%22%2C%22value%22%3A%5B%22RNA-Seq%22%5D%7D%7D%2C%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22IN%22%2C%22content%22%3A%7B%22field%22%3A%22cases.project.program.name%22%2C%22value%22%3A%5B%22TCGA%22%5D%7D%7D%2C%7B%22op%22%3A%22IN%22%2C%22content%22%3A%7B%22field%22%3A%22cases.samples.submitter_id%22%2C%22value%22%3A%5B%22{0}%22%5D%7D%7D%5D%7D%5D%7D%5D%7D%5D%7D&query=files.data_format%20in%20%5B%22BAM%22%5D%20and%20files.experimental_strategy%20in%20%5B%22RNA-Seq%22%5D%20AND%20cases.project.program.name%20IN%20%5BTCGA%5D%20and%20cases.samples.submitter_id%20IN%20%5B%22P{0}&return_type=manifest'.format(
        sample)
    response = requests.get(bam_url)
    if response.status_code != 200:
        sqs.report_status(
            job_info,
            f'Failed to download bam manifest file: HTTP Status Code: {response.status_code}'
        )
        sqs.queue_error_item(job_info)
示例#3
0
import subprocess
import glob
import csv
import sys
import os
import requests
import shutil
import json
from pathlib import Path
from sqs_wrapper import SQSWrapper

sqs = SQSWrapper('default', 'igv')


def run(cmd: str) -> None:
    subprocess.run(cmd, shell=True, check=True, stdout=sys.stdout)


def get_bam(job_info: str, sample: str, token_file: str, chrom: str,
            bam_window_start: int, bam_window_end: int,
            variant_junction: str) -> None:
    print('Obtaining bam manifest file')
    bam_url = 'https://api.gdc.cancer.gov/files?filters=%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.data_format%22%2C%22value%22%3A%5B%22BAM%22%5D%7D%7D%2C%7B%22op%22%3A%22AND%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22files.experimental_strategy%22%2C%22value%22%3A%5B%22RNA-Seq%22%5D%7D%7D%2C%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22IN%22%2C%22content%22%3A%7B%22field%22%3A%22cases.project.program.name%22%2C%22value%22%3A%5B%22TCGA%22%5D%7D%7D%2C%7B%22op%22%3A%22IN%22%2C%22content%22%3A%7B%22field%22%3A%22cases.samples.submitter_id%22%2C%22value%22%3A%5B%22{0}%22%5D%7D%7D%5D%7D%5D%7D%5D%7D%5D%7D&query=files.data_format%20in%20%5B%22BAM%22%5D%20and%20files.experimental_strategy%20in%20%5B%22RNA-Seq%22%5D%20AND%20cases.project.program.name%20IN%20%5BTCGA%5D%20and%20cases.samples.submitter_id%20IN%20%5B%22P{0}&return_type=manifest'.format(
        sample)
    response = requests.get(bam_url)
    if response.status_code != 200:
        sqs.report_status(
            job_info,
            f'Failed to download bam manifest file: HTTP Status Code: {response.status_code}'
        )
        sqs.queue_error_item(job_info)
示例#4
0
            subprocess.run(f'aws s3 cp {self.sample}.tar.gz {self.s3_archive_upload_url}/{self.cohort}/{self.sample}.tar.gz', shell=True,
                           check=True)
        else:
            self.logger.error('S3 Archive Upload URL is not set - archive file not uploaded')


logger = logging.getLogger('regtools_reader')
logger.setLevel(logging.DEBUG)
log_formatter = logging.Formatter('%(asctime)s:%(levelname)7s:%(name)s:%(message)s')
log_formatter.default_msec_format = '%s.%03d'
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(log_formatter)
stream_handler.setLevel(logging.DEBUG)
logger.addHandler(stream_handler)

sqs = SQSWrapper('default')

try:
    logger.info('Waiting for work item')
    for sample_id in sqs.work_items():
        filesystem_path = 'regtools_wd_' + sample_id.split(';')[1]
        try:
            workflow = RegtoolsWorkflow(sample_id=sample_id,
                                        filesystem_path=filesystem_path,
                                        logger=logger,
                                        s3_token_download_url='s3://regtools-cwl-sharedfiles/gdc-user-token.txt',
                                        s3_archive_upload_url='s3://regtools-results-unstranded')

            file_handler = logging.FileHandler(f'{filesystem_path}/{sample_id}.log')
            file_handler.setFormatter(log_formatter)
            file_handler.setLevel(logging.DEBUG)
示例#5
0
import logging
import sys
from pathlib import Path

from sqs_wrapper import SQSWrapper

logger = logging.getLogger('sqs_writer')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(name)s:%(message)s')
formatter.default_msec_format = '%s.%03d'
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

sqs = SQSWrapper('default', 'igv')

if len(sys.argv) != 2:
    print('Input file required')
    sys.exit(-1)

inputfile = Path(sys.argv[1])

# if not inputfile.exists():
#     print('Input file does not exist')
#     sys.exit(-1)

logger.info('Attempting to queue items')

# with open(inputfile, 'r') as f:
#     items = f.readlines()
#     for item in items:
示例#6
0
        test_image, number_of_times_to_upsample=0, model="cnn")
    no = len(face_locations)
    for i in range(no):
        test_image_enc = face_recognition.face_encodings(test_image,
                                                         face_locations,
                                                         model="large")[i]
        np_name = clf.predict([test_image_enc])
        name = np_name.tolist()[0]
        if name == 'kusudaaina':
            pil_image = Image.fromarray(test_image)
            split_path = os.path.split(image_url)
            pil_image.save(work_dir_root + 'save/' + tweet_id + "_" +
                           split_path[1],
                           quality=95)
            del pil_image
            return True
    return False


work_dir_root = "/home/hogesako/poc_face_recognition/work/"
with open(work_dir_root + 'model.pickle', mode='rb') as fp:
    clf = pickle.load(fp)

sqs = SQSWrapper()

while True:
    tweets = sqs.fetch_tweet()
    print('tweet count:' + str(len(tweets)))
    for tweet in tweets:
        analyze(tweet)
示例#7
0
import datetime
import json
import logging

from sqs_wrapper import SQSWrapper

logger = logging.getLogger('sqs_status')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(name)s:%(message)s')
formatter.default_msec_format = '%s.%03d'
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

sqs = SQSWrapper('default')
logger.info('Awaiting status messages')
for status_item in sqs.status_items():
    work_item = status_item['work_item']
    message = status_item['message']
    is_fatal = status_item['is_fatal']
    is_complete = status_item['is_complete']
    hostname = status_item['hostname']
    logger.info(f'Got status item "{status_item}"')
    if status_item['is_fatal']:
        with open('failures_v5.tsv', 'w') as f:
            f.write(f'{datetime.datetime.now()}\t{work_item}\t{message}\n')