Exemplo n.º 1
0
def main(options, args):
    logging.DEBUG = options('general.debug')

    if len(args) != 3:
        raise cli.MissingOptionError(
            'Must specify PROJECT REPOPATH OUTPUTPATH, see --help')

    project, repoPath, outputPath = args

    if options('general.checkout') and options('general.export'):
        raise cli.InvalidOptionError(
            'You cannot specify both checkout and export')

    logging.debugPrint(lambda: 'Loading repositories information...')
    repositories = loadRepositories(options('general.config_dir'))

    logging.debugPrint(lambda: 'Loaded repositories: %s' %
                       (' '.join(repositories.keys()), ))

    if project not in repositories:
        raise cli.InvalidOptionError('%s is not a valid project name' %
                                     project)

    repo = repositories[project]
    branch = options('general.branch') or repo.branch
    if not options('general.export') and (repo.exportType == CHECKOUT
                                          or options('general.checkout')):
        exportFunc = repo.rType.checkout
    else:
        exportFunc = repo.rType.export

    exportFunc(options, repo, repoPath, outputPath, branch)
Exemplo n.º 2
0
def blockOnTask(host, name, taskName, notifyF=logPrint, errorF=errorPrint):
    endStates = [task.TASK_FAILED, task.TASK_COMPLETED]
    state = None
    prevTime = None
    sleepTime = 1
    time.sleep(sleepTime)
    while state not in endStates:
        tsk = loadTask(host, name, taskName)
        state = tsk.state
        if prevTime is None:
            msgs = tsk.getMessages()
        else:
            msgs = tsk.getMessagesAfterTime(prevTime)
        prevTime = tsk.timestamp
        for m in msgs:
            if m['mtype'] == task.MSG_ERROR:
                errorF(m['text'])
            elif m['mtype'] == task.MSG_NOTIFICATION:
                notifyF(m['text'])
            elif logging.DEBUG and m['mtype'] == task.MSG_SILENT:
                debugPrint(lambda: m['text'])
        ##
        # Make this configurable
        if state not in endStates:
            sleepTime = sleepTime < 30 and sleepTime * 2 or 30
            time.sleep(sleepTime)

    return state
Exemplo n.º 3
0
def blockOnTask(host,
                cluster,
                taskName,
                notifyF=logging.logPrint,
                errorF=logging.errorPrint):
    endStates = [task.TASK_FAILED, task.TASK_COMPLETED]
    state = None
    prevTime = None
    sleepTime = 1
    yield defer_utils.sleep(sleepTime)()
    while state not in endStates:
        tsk = yield tasks_client.loadTask(host, cluster, 'guest', taskName)
        tsk = task.taskFromDict(tsk)
        state = tsk.state
        if prevTime is None:
            msgs = tsk.getMessages()
        else:
            msgs = tsk.getMessagesAfterTime(prevTime)
        prevTime = tsk.timestamp
        for m in msgs:
            if m['mtype'] == task.MSG_ERROR:
                errorF(m['text'])
            elif m['mtype'] == task.MSG_NOTIFICATION:
                notifyF(m['text'])
            elif logging.DEBUG and m['mtype'] == task.MSG_SILENT:
                logging.debugPrint(lambda: m['text'])
        ##
        # Make this configurable
        if state not in endStates:
            sleepTime = sleepTime < 30 and sleepTime * 2 or 30
            yield defer_utils.sleep(sleepTime)()

    defer.returnValue(state)
Exemplo n.º 4
0
def monitorDownload(pr, downloaderChan, baseDir, url, minRate):
    sizeSamples = []
    while True:
        baseSize = getSizeOfFiles(getDownloadFilenames(baseDir, url))
        time.sleep(SAMPLE_RATE)
        ##
        # If the program exited and exited correctly, then we're good
        # otherwise take another sample size and see if we should terminate
        if pr.exitCode is not None:
            downloaderChan.receive()
            return True
        else:
            currentSize = getSizeOfFiles(getDownloadFilenames(baseDir, url)) - baseSize
            logging.debugPrint(lambda : 'Download rate: %8d - %s' % (currentSize/SAMPLE_RATE, getUrlFilename(url)))
            size = currentSize/SAMPLE_RATE
            if size < 0:
                size = 0
            sizeSamples.append(size)
            if len(sizeSamples) > MAX_SAMPLE_SIZE:
                sizeSamples.pop(0)

            if len(sizeSamples) >= MAX_SAMPLE_SIZE and sum(sizeSamples)/len(sizeSamples) < minRate:
                logging.logPrint('Average Rate: %8d - %s - KILLING' % (sum(sizeSamples)/len(sizeSamples), getUrlFilename(url)))
                os.kill(pr.pipe.pid, signal.SIGTERM)
                ##
                # Give it a second to finish up whatever it's doing
                time.sleep(2)
                try:
                    downloaderChan.receive()
                except:
                    pass
                return False
Exemplo n.º 5
0
def monitorDownload(pr, downloaderChan, baseDir, url, minRate):
    sizeSamples = []
    while True:
        baseSize = getSizeOfFiles(getDownloadFilenames(baseDir, url))
        time.sleep(SAMPLE_RATE)
        ##
        # If the program exited and exited correctly, then we're good
        # otherwise take another sample size and see if we should terminate
        if pr.exitCode is not None:
            downloaderChan.receive()
            return True
        else:
            currentSize = getSizeOfFiles(getDownloadFilenames(baseDir, url)) - baseSize
            logging.debugPrint(lambda: "Download rate: %8d - %s" % (currentSize / SAMPLE_RATE, getUrlFilename(url)))
            size = currentSize / SAMPLE_RATE
            if size < 0:
                size = 0
            sizeSamples.append(size)
            if len(sizeSamples) > MAX_SAMPLE_SIZE:
                sizeSamples.pop(0)

            if len(sizeSamples) >= MAX_SAMPLE_SIZE and sum(sizeSamples) / len(sizeSamples) < minRate:
                logging.logPrint(
                    "Average Rate: %8d - %s - KILLING" % (sum(sizeSamples) / len(sizeSamples), getUrlFilename(url))
                )
                os.kill(pr.pipe.pid, signal.SIGTERM)
                ##
                # Give it a second to finish up whatever it's doing
                time.sleep(2)
                try:
                    downloaderChan.receive()
                except:
                    pass
                return False
Exemplo n.º 6
0
def blockOnTask(host, cluster, taskName, notifyF=logging.logPrint, errorF=logging.errorPrint):
    endStates = [task.TASK_FAILED, task.TASK_COMPLETED]
    state = None
    prevTime = None
    sleepTime = 1
    yield defer_utils.sleep(sleepTime)()
    while state not in endStates:
        tsk = yield tasks_client.loadTask(host, cluster, 'guest', taskName)
        tsk = task.taskFromDict(tsk)
        state = tsk.state
        if prevTime is None:
            msgs = tsk.getMessages()
        else:
            msgs = tsk.getMessagesAfterTime(prevTime)
        prevTime = tsk.timestamp
        for m in msgs:
            if m['mtype'] == task.MSG_ERROR:
                errorF(m['text'])
            elif m['mtype'] == task.MSG_NOTIFICATION:
                notifyF(m['text'])
            elif logging.DEBUG and m['mtype'] == task.MSG_SILENT:
                logging.debugPrint(lambda : m['text'])
        ##
        # Make this configurable
        if state not in endStates:
            sleepTime = sleepTime < 30 and sleepTime * 2 or 30
            yield defer_utils.sleep(sleepTime)()

    defer.returnValue(state)
Exemplo n.º 7
0
def blockOnTask(host, name, taskName, notifyF=logPrint, errorF=errorPrint):
    endStates = [task.TASK_FAILED, task.TASK_COMPLETED]
    state = None
    prevTime = None
    sleepTime = 1
    time.sleep(sleepTime)
    while state not in endStates:
        tsk = loadTask(host, name, taskName)
        state = tsk.state
        if prevTime is None:
            msgs = tsk.getMessages()
        else:
            msgs = tsk.getMessagesAfterTime(prevTime)
        prevTime = tsk.timestamp
        for m in msgs:
            if m['mtype'] == task.MSG_ERROR:
                errorF(m['text'])
            elif m['mtype'] == task.MSG_NOTIFICATION:
                notifyF(m['text'])
            elif logging.DEBUG and m['mtype'] == task.MSG_SILENT:
                debugPrint(lambda : m['text'])
        ##
        # Make this configurable
        if state not in endStates:
            sleepTime = sleepTime < 30 and sleepTime * 2 or 30
            time.sleep(sleepTime)

    return state
Exemplo n.º 8
0
def main(options, args):
    logging.DEBUG = options('general.debug')
    
    if len(args) != 3:
        raise cli.MissingOptionError('Must specify PROJECT REPOPATH OUTPUTPATH, see --help')
    
    project, repoPath, outputPath = args

    if options('general.checkout') and options('general.export'):
        raise cli.InvalidOptionError('You cannot specify both checkout and export')
    
    logging.debugPrint(lambda : 'Loading repositories information...')
    repositories = loadRepositories(options('general.config_dir'))

    logging.debugPrint(lambda : 'Loaded repositories: %s' % (' '.join(repositories.keys()),))
    
    if project not in repositories:
        raise cli.InvalidOptionError('%s is not a valid project name' % project)

    repo = repositories[project]
    branch = options('general.branch') or repo.branch
    if not options('general.export') and (repo.exportType == CHECKOUT or options('general.checkout')):
        exportFunc = repo.rType.checkout
    else:
        exportFunc = repo.rType.export

    exportFunc(options, repo, repoPath, outputPath, branch)
Exemplo n.º 9
0
def runDownloader(chan):
    pr, rchan = chan.receive()
    try:
        commands.runProgramRunnerEx(pr)
        logging.debugPrint(lambda: "Successfully completed download")
        rchan.send(None)
    except Exception, err:
        logging.logPrint("Download failed for unknown reason: " + str(err))
        rchan.sendError(err)
Exemplo n.º 10
0
def runDownloader(chan):
    pr, rchan = chan.receive()
    try:
        commands.runProgramRunnerEx(pr)
        logging.debugPrint(lambda : 'Successfully completed download')
        rchan.send(None)
    except Exception, err:
        logging.logPrint('Download failed for unknown reason: ' + str(err))
        rchan.sendError(err)
Exemplo n.º 11
0
def main(options, _args):
    logging.DEBUG = options('general.debug')
    
    cluster = cluster_ctl.loadCluster('local')

    if cluster.ctype.NAME in ['EC2', 'Nimbus', 'DIAG']:
        cluster = cluster.update(dataNodes=cluster.ctype.updateInstances(cluster.credInst, cluster.dataNodes),
                                 execNodes=cluster.ctype.updateInstances(cluster.credInst, cluster.execNodes))

        logging.debugPrint(lambda : 'Dumping new cluster')
        cluster_ctl.saveCluster(cluster)
Exemplo n.º 12
0
 def STATE_IDLE(self, event):
     self._log('In idle state, got message')
     if event['event'] == 'start' and event['name'] == 'start pipeline:':
         self.stateF = self.STATE_RUNNING
         self.changed('state', self.state())
         
         self._log('Got start message, switching to running state, starting update loop')
         self.delayed = reactor.callLater(PIPELINE_UPDATE_FREQUENCY,
                                          self.delayedLock.run,
                                          self._waitForPipelineXmlRunningAndLoop)
     else:
         logging.debugPrint(lambda : repr(event))
Exemplo n.º 13
0
def validMD5(options, url, md5):
    if md5 is not None:
        files = getDownloadFilenames(options("general.base_dir"), url)
        if files:
            files.sort()
            newMd5 = calculateMD5(files)
            logging.debugPrint(lambda: "Comparing %s to %s" % (md5, newMd5))
            return md5 == newMd5
        else:
            False
    else:
        return True
Exemplo n.º 14
0
def validMD5(options, url, md5):
    if md5 is not None:
        files = getDownloadFilenames(options('general.base_dir'), url)
        if files:
            files.sort()
            newMd5 = calculateMD5(files)
            logging.debugPrint(lambda : 'Comparing %s to %s' % (md5, newMd5))
            return md5 == newMd5
        else:
            False
    else:
        return True
Exemplo n.º 15
0
    def STATE_IDLE(self, event):
        self._log('In idle state, got message')
        if event['event'] == 'start' and event['name'] == 'start pipeline:':
            self.stateF = self.STATE_RUNNING
            self.changed('state', self.state())

            self._log(
                'Got start message, switching to running state, starting update loop'
            )
            self.delayed = reactor.callLater(
                PIPELINE_UPDATE_FREQUENCY, self.delayedLock.run,
                self._waitForPipelineXmlRunningAndLoop)
        else:
            logging.debugPrint(lambda: repr(event))
Exemplo n.º 16
0
def main(options, tasks):
    if options('general.debug'):
        logging.DEBUG = True

    if not tasks:
        debugPrint(lambda: 'No task names provided, loading all from database')
        tasks = loadAllTasks(options('general.host'), options('general.name'))
    else:
        debugPrint(lambda: 'Task names provided, loading from database')
        tasks = [
            loadTask(options('general.host'), options('general.name'), t)
            for t in tasks
        ]

    if options('general.block'):
        debugPrint(lambda: 'Blocking until tasks finish or fail')
        tasks = blockOnTasks(options, tasks)

    if options('general.no_completed'):
        debugPrint(lambda: 'Removing any completed tasks')
        tasks = filter(lambda t: t.state != task.TASK_COMPLETED, tasks)

    ##
    # If there are tasks, find the largest, otherwise just return 0
    maxTaskNameLen = tasks and max([len(t.name) for t in tasks]) or 0
    printSpacing = False
    for t in tasks:
        if not printSpacing:
            printSpacing = True
        else:
            print
            print

        printTask(options, t, maxTaskNameLen)

    if options('general.exit_code'):
        debugPrint(
            lambda:
            'Exiting with non-zero state if any tasks are not in a completed state'
        )
        notCompleted = [t for t in tasks if t.state != task.TASK_COMPLETED]
        if notCompleted:
            return 1
        else:
            return 0

    return 0
Exemplo n.º 17
0
def main(options, tasks):
    if options('general.debug'):
        logging.DEBUG = True

    if not tasks:
        debugPrint(lambda : 'No task names provided, loading all from database')
        tasks = loadAllTasks(options('general.host'), options('general.name'))
    else:
        debugPrint(lambda : 'Task names provided, loading from database')
        tasks = [loadTask(options('general.host'), options('general.name'), t)
                 for t in tasks]

    if options('general.block'):
        debugPrint(lambda : 'Blocking until tasks finish or fail')
        tasks = blockOnTasks(options, tasks)
        
    if options('general.no_completed'):
        debugPrint(lambda : 'Removing any completed tasks')
        tasks = filter(lambda t : t.state != task.TASK_COMPLETED, tasks)


    ##
    # If there are tasks, find the largest, otherwise just return 0
    maxTaskNameLen = tasks and max([len(t.name) for t in tasks]) or 0
    printSpacing = False
    for t in tasks:
        if not printSpacing:
            printSpacing = True
        else:
            print
            print

        printTask(options, t, maxTaskNameLen)

    if options('general.exit_code'):
        debugPrint(lambda : 'Exiting with non-zero state if any tasks are not in a completed state')
        notCompleted = [t for t in tasks if t.state != task.TASK_COMPLETED]
        if notCompleted:
            return 1
        else:
            return 0

    return 0
Exemplo n.º 18
0
def attemptDownload(options, url):
    cmd = ["wget", "-nv", "-P", options("general.base_dir")]
    if options("general.continue_download"):
        cmd.append("-c")

    cmd.append(url)
    pr = commands.ProgramRunner(" ".join(cmd), stdoutf=sys.stdout.write, stderrf=sys.stderr.write, log=True)

    downloaderChan = threads.runThreadWithChannel(runDownloader).channel.sendWithChannel(pr)
    ##
    # Wait 20 seconds or until files start appearing, if nothing appears
    # then we will continue on to monitorDownload and it will fail out anyways
    count = 20
    while count > 0 and not getDownloadFilenames(options("general.base_dir"), url):
        count -= 1
        time.sleep(1)

    logging.debugPrint(lambda: "Downloading with a minimum acceptable rate of %d" % options("general.min_rate"))

    return monitorDownload(pr, downloaderChan, options("general.base_dir"), url, options("general.min_rate"))
Exemplo n.º 19
0
def handleWWWObserver(request):
    """
    Input:
    { id: string
      file: string
      event: string
      retval: string
      props: string
      host: string
      time: string
      name: string
      message: string
    }

    Output:
    None
    """
    logging.debugPrint(lambda: repr(request.body))
    request.mq.send('/queue/pipelines/observer/' + request.body['props'],
                    json.dumps(request.body))
    return defer_pipe.ret(request.update(response=None))
Exemplo n.º 20
0
def handleWWWObserver(request):
    """
    Input:
    { id: string
      file: string
      event: string
      retval: string
      props: string
      host: string
      time: string
      name: string
      message: string
    }

    Output:
    None
    """
    logging.debugPrint(lambda : repr(request.body))
    request.mq.send('/queue/pipelines/observer/' + request.body['props'],
                    json.dumps(request.body))
    return defer_pipe.ret(request.update(response=None))
Exemplo n.º 21
0
def attemptDownload(options, url):
    cmd = ['wget', '-nv', '-P', options('general.base_dir')]
    if options('general.continue_download'):
        cmd.append('-c')

    cmd.append(url)
    pr = commands.ProgramRunner(' '.join(cmd),
                                stdoutf=sys.stdout.write,
                                stderrf=sys.stderr.write,
                                log=True)

    downloaderChan = threads.runThreadWithChannel(runDownloader).channel.sendWithChannel(pr)
    ##
    # Wait 20 seconds or until files start appearing, if nothing appears
    # then we will continue on to monitorDownload and it will fail out anyways
    count = 20
    while count > 0 and not getDownloadFilenames(options('general.base_dir'), url):
        count -= 1
        time.sleep(1)

    logging.debugPrint(lambda : 'Downloading with a minimum acceptable rate of %d' % options('general.min_rate'))

    return monitorDownload(pr, downloaderChan, options('general.base_dir'), url, options('general.min_rate'))
Exemplo n.º 22
0
 def update(self, who, aspect, value):
     if aspect == 'load':
         logging.debugPrint(lambda : 'TAG_NOTIFY: Sending LOAD ' + value.tagName)
         self.mq.send(LOAD_TOPIC, json.dumps(value.tagName))
     elif aspect == 'save':
         logging.debugPrint(lambda : 'TAG_NOTIFY: Sending SAVE ' + value.tagName)
         self.mq.send(SAVE_TOPIC, json.dumps(value.tagName))
     elif aspect == 'remove':
         logging.debugPrint(lambda : 'TAG_NOTIFY: Sending REMOVE ' + value)
         self.mq.send(REMOVE_TOPIC, json.dumps(value))
Exemplo n.º 23
0
def downloadUrls(chan):
    (options, queue), rchan = chan.receive()

    ##
    # Loop until queue is empty
    try:
        while True:
            url, md5 = queue.get_nowait()

            ##
            # Skip all this if it's already been downloaded
            if md5 and validMD5(options, url, md5):
                rchan.send((url, True))
                continue
            
            if not options('general.continue_download'):
                logging.debugPrint(lambda : 'Deleting any files that already exist')
                deleteDownloadedFiles(options('general.base_dir'), url)
                time.sleep(1)
                
            tries = options('general.tries')
            try:
                while (not attemptDownload(options, url) or not validMD5(options, url, md5)) and tries > 0:
                    logging.debugPrint(lambda : 'Download failed, trying again. %d' % tries)
                    if not options('general.continue_download'):
                        logging.debugPrint(lambda : 'Deleting downloaded files')
                        deleteDownloadedFiles(options('general.base_dir'), url)
                        time.sleep(1)
                    tries -= 1

                if tries <= 0:
                    rchan.send((url, False))
                else:
                    rchan.send((url, True))

            except Exception, err:
                logging.errorPrint('Download failed: ' + str(err))
                rchan.send((url, False))
                
    except Queue.Empty:
        rchan.send(None)
Exemplo n.º 24
0
def downloadUrls(chan):
    (options, queue), rchan = chan.receive()

    ##
    # Loop until queue is empty
    try:
        while True:
            url, md5 = queue.get_nowait()

            ##
            # Skip all this if it's already been downloaded
            if md5 and validMD5(options, url, md5):
                rchan.send((url, True))
                continue

            if not options("general.continue_download"):
                logging.debugPrint(lambda: "Deleting any files that already exist")
                deleteDownloadedFiles(options("general.base_dir"), url)
                time.sleep(1)

            tries = options("general.tries")
            try:
                while (not attemptDownload(options, url) or not validMD5(options, url, md5)) and tries > 0:
                    logging.debugPrint(lambda: "Download failed, trying again. %d" % tries)
                    if not options("general.continue_download"):
                        logging.debugPrint(lambda: "Deleting downloaded files")
                        deleteDownloadedFiles(options("general.base_dir"), url)
                        time.sleep(1)
                    tries -= 1

                if tries <= 0:
                    rchan.send((url, False))
                else:
                    rchan.send((url, True))

            except Exception, err:
                logging.errorPrint("Download failed: " + str(err))
                rchan.send((url, False))

    except Queue.Empty:
        rchan.send(None)
Exemplo n.º 25
0
def deleteDownloadedFiles(baseDir, url):
    files = getDownloadFilenames(baseDir, url)
    for f in files:
        logging.debugPrint(lambda : 'Deleting: ' + f)
        deleteFile(f)
Exemplo n.º 26
0
def main(options, args):
    logging.DEBUG = options('general.debug')

    queue = Queue.Queue()
    ##
    # Track the downloaded URL names for joining later if specified
    urls = []
    if not args:
        for line in [l for l in sys.stdin if l.strip()]:
            md5, url = line.split(' ', 1)
            url = url.strip()
            urls.append(url)
            queue.put((url, md5))
    else:
        for url in args:
            urls.append(url)
            queue.put((url, None))


    if options('general.join_name') and options('general.join_md5'):
        md5 = calculateMD5([os.path.join(options('general.base_dir'), options('general.join_name'))])
        ##
        # If they match, then no need to download and exit cleanly
        if md5 == options('general.join_md5'):
            return
            
    retChans = [threads.runThreadWithChannel(downloadUrls).channel.sendWithChannel((options, queue)) for _ in range(options('general.max_threads'))]

    successUrls = []
    failedUrls = []
    for c in retChans:
        ret = c.receive()
        while ret is not None:
            url, succ = ret
            if succ:
                successUrls.append(url)
            else:
                failedUrls.append(url)

            ret = c.receive()

    if failedUrls:
        for url in failedUrls:
            logging.errorPrint(url)

        ##
        # If any URLs failed, exit with fail
        sys.exit(1)
    else:
        if options('general.join_name'):
            logging.debugPrint(lambda : 'Joining files into: ' + options('general.join_name'))
            files = []
            for url in urls:
                files.extend(sorted(getDownloadFilenames(options('general.base_dir'), url)))

            fout = open(os.path.join(options('general.base_dir'), options('general.join_name')), 'wb')
            for f in files:
                logging.debugPrint(lambda : 'Reading: ' + f)
                fin = open(f, 'rb')
                d = fin.read(1000000)
                while d:
                    fout.write(d)
                    d = fin.read(1000000)
                fin.close()

            fout.close()
            logging.debugPrint(lambda : 'Deleting downloaded files after join')
            for f in files:
                logging.debugPrint(lambda : 'Deleting: ' + f)
                deleteFile(f)
Exemplo n.º 27
0
def main(options, args):
    logging.DEBUG = options("general.debug")

    queue = Queue.Queue()
    ##
    # Track the downloaded URL names for joining later if specified
    urls = []
    if not args:
        for line in [l for l in sys.stdin if l.strip()]:
            md5, url = line.split(" ", 1)
            url = url.strip()
            urls.append(url)
            queue.put((url, md5))
    else:
        for url in args:
            urls.append(url)
            queue.put((url, None))

    if options("general.join_name") and options("general.join_md5"):
        md5 = calculateMD5([os.path.join(options("general.base_dir"), options("general.join_name"))])
        ##
        # If they match, then no need to download and exit cleanly
        if md5 == options("general.join_md5"):
            return

    retChans = [
        threads.runThreadWithChannel(downloadUrls).channel.sendWithChannel((options, queue))
        for _ in range(options("general.max_threads"))
    ]

    successUrls = []
    failedUrls = []
    for c in retChans:
        ret = c.receive()
        while ret is not None:
            url, succ = ret
            if succ:
                successUrls.append(url)
            else:
                failedUrls.append(url)

            ret = c.receive()

    if failedUrls:
        for url in failedUrls:
            logging.errorPrint(url)

        ##
        # If any URLs failed, exit with fail
        sys.exit(1)
    else:
        if options("general.join_name"):
            logging.debugPrint(lambda: "Joining files into: " + options("general.join_name"))
            files = []
            for url in urls:
                files.extend(sorted(getDownloadFilenames(options("general.base_dir"), url)))

            fout = open(os.path.join(options("general.base_dir"), options("general.join_name")), "wb")
            for f in files:
                logging.debugPrint(lambda: "Reading: " + f)
                fin = open(f, "rb")
                d = fin.read(1000000)
                while d:
                    fout.write(d)
                    d = fin.read(1000000)
                fin.close()

            fout.close()
            logging.debugPrint(lambda: "Deleting downloaded files after join")
            for f in files:
                logging.debugPrint(lambda: "Deleting: " + f)
                deleteFile(f)
Exemplo n.º 28
0
def deleteDownloadedFiles(baseDir, url):
    files = getDownloadFilenames(baseDir, url)
    for f in files:
        logging.debugPrint(lambda: "Deleting: " + f)
        deleteFile(f)
Exemplo n.º 29
0
 def _perform_workflow(data):
     tn, f = data
     outfile = open("%s.fasta" % tn, "w")
     outfile.write(">%s\n%s" % (tn,f))
     outfile.close()
     logging.debugPrint(lambda : "Processing sequence: %s" % tn)
     blast_against_reference("%s.fasta" % tn, combined, _temp_name(tn, "blast_parsed.txt"))
     subprocess.check_call("sort -u -k 2,2 %s > %s" % (_temp_name(tn, "blast_parsed.txt"),
                                                       _temp_name(tn, "blast_unique.parsed.txt")),
                           shell=True)
     parsed_blast_to_seqs(_temp_name(tn, "blast_unique.parsed.txt"), _temp_name(tn, "seqs_in.fas"))
     check_and_align_seqs(_temp_name(tn, "seqs_in.fas"), num_refs, _temp_name(tn, "seqs_aligned.fas"))
     if os.path.isfile(_temp_name(tn, "seqs_aligned.fas")):
         """What if there are NO SNPs in a given region"""
         #try:
         subprocess.call(['mothur',
                                '#filter.seqs(fasta=%s, soft=100, vertical=F)' % _temp_name(tn, "seqs_aligned.fas")], stdout=subprocess.PIPE)
         subprocess.check_call('sed "s/[^1]/0/g" %s | sed "s/0/2/g" | sed "s/1/0/g" | sed "s/2/1/g" > %s' % (_temp_name(tn, "seqs_aligned.filter"),
                                                                                                             _temp_name(tn, "mask.txt")), shell=True)
         split_read(_temp_name(tn, "mask.txt"),_temp_name(tn, "padded.txt"))
         sum_qual_reads(_temp_name(tn, "padded.txt"), _temp_name(tn,"polys.txt"))
         #except:
         #    """This function was never created"""
         #    write_poly_zeros(_temp_name(tn, "padded.txt"), _temp_name(tn,"polys.txt"))
         if "T" == run_r:
             name = get_seq_name("%s.fasta" % tn)
             subprocess.check_call("cat snps.r | R --slave --args %s %s.table %s.pdf 2> /dev/null" % (_temp_name(tn, "seqs_aligned.fas"), name, name),
     					      shell=True)
             os.system("mv %s.table ./R_output/%s.table.txt" % (name, name))
             os.system("mv %s.pdf ./R_output/%s.plots.pdf" % (name, name))
         else:
             pass
         subprocess.check_call("FastTree -nt -noboot %s > %s 2> /dev/null" % (_temp_name(tn, "seqs_aligned.fas"),
                                                                              _temp_name(tn, "tmp.tree")),
                               shell=True)
         run_dendropy("%s" % (_temp_name(tn, "tmp.tree")), tree, "%s" % (_temp_name(tn, "tmp.RF")))
         run_dendropy_euclidian("%s" % (_temp_name(tn, "tmp.tree")), tree, "%s" % (_temp_name(tn, "tmp.EU")))
         get_contig_length("%s.fasta" % tn, _temp_name(tn, "length.txt"))
         thread_id = id(threading.current_thread())
         thread_distance_file = str(thread_id) + '_distance.txt'
         parse_rf_file(_temp_name(tn, "tmp.RF"), thread_distance_file)
         thread_euclidian_file = str(thread_id) + "_euc_dist.txt"
         parse_rf_file(_temp_name(tn, "tmp.EU"), thread_euclidian_file)
         thread_name_file = str(thread_id) + '_name.txt'
         write_strip_name("%s.fasta" % tn, thread_name_file)
         polys_name_file = str(thread_id) + '_polys.txt'
         parse_poly_file(_temp_name(tn, "polys.txt"), polys_name_file)
         length_name_file = str(thread_id) + '_length.txt'
         parse_poly_file(_temp_name(tn, "length.txt"), length_name_file)
         try:
             subprocess.check_call("rm mothur*", shell=True, stderr=open(os.devnull, 'w'))
         except:
             pass
         subprocess.check_call(["rm",
                                _temp_name(tn, "blast_parsed.txt"),
                                "%s.fasta" % tn,
                                _temp_name(tn, "blast_unique.parsed.txt"),
                                _temp_name(tn, "seqs_in.fas"),
                                _temp_name(tn, "seqs_aligned.fas"),
                                _temp_name(tn, "tmp.tree"),
                                _temp_name(tn, "tmp.RF"),
                                _temp_name(tn, "tmp.EU"),
                                _temp_name(tn, "mask.txt"),
                                _temp_name(tn, "padded.txt"),
                                _temp_name(tn, "polys.txt"),
                                _temp_name(tn, "seqs_aligned.filter"),
                                _temp_name(tn, "length.txt"),
                                _temp_name(tn, "seqs_aligned.filter.fasta")])
         return (thread_distance_file, thread_name_file, polys_name_file, length_name_file,
                 thread_euclidian_file)
     else:
         subprocess.check_call(["rm",
                                _temp_name(tn, "blast_parsed.txt"),
                                "%s.fasta" % tn,
                                _temp_name(tn, "blast_unique.parsed.txt"),
                                _temp_name(tn, "seqs_in.fas")])