def getAllMap(inclPostExpFIds, fIdsNotToBeIncl): os.system("cat " + util.allFPtsFile + " >> " + util.globAllFPtsFile) fIdsSeqToAllFIdsMap = {} allFIdsLines = util.readFromFile(util.globAllFPtsFile) psFIds = util.readFromFile(util.postSetupFIdsFile) for allFIdsLn in allFIdsLines: allFIdsLnParts = allFIdsLn.split(":") fIdsSeq = allFIdsLnParts[0] fIdsInExec = allFIdsLnParts[1] fIdsInExec = fIdsInExec.rstrip("\n") fIdsInExecParts = fIdsInExec.split(" ") #setOfFIdsInExec = set([]) setOfFIdsInExec = [] for fIdsInExecPart in fIdsInExecParts: isPSFId = (fIdsInExecPart in psFIds) if(inclPostExpFIds or ((not inclPostExpFIds) and (not isPSFId))): if fIdsInExecPart not in fIdsNotToBeIncl: #setOfFIdsInExec.add(fIdsInExecPart) setOfFIdsInExec.append(fIdsInExecPart) fIdsSeqToAllFIdsMap[fIdsSeq] = setOfFIdsInExec return fIdsSeqToAllFIdsMap
def test7_readFromFile( self ): # Read from non-existant file. self.failUnless(util.readFromFile(tempfile.mktemp()) == None) # Read file where we (hopefully) don't have permissions. self.failUnless(util.readFromFile("/etc/shadow") == None)
def storeNewTicket( masterKey, ticket, bridge ): """ Store a new session ticket and the according master key for future use. This method is only called by clients. The given data, `masterKey', `ticket' and `bridge', is YAMLed and stored in the global ticket dictionary. If there already is a ticket for the given `bridge', it is overwritten. """ assert len(masterKey) == const.MASTER_KEY_LENGTH assert len(ticket) == const.TICKET_LENGTH ticketFile = const.STATE_LOCATION + const.CLIENT_TICKET_FILE log.debug("Storing newly received ticket in `%s'." % ticketFile) # Add a new (key, ticket) tuple with the given bridge as hash key. tickets = dict() content = util.readFromFile(ticketFile) if (content is not None) and (len(content) > 0): tickets = yaml.safe_load(content) # We also store a timestamp so we later know if our ticket already expired. tickets[str(bridge)] = [int(time.time()), masterKey, ticket] util.writeToFile(yaml.dump(tickets), ticketFile)
def storeNewTicket(masterKey, ticket, bridge): """ Store a new session ticket and the according master key for future use. This method is only called by clients. The given data, `masterKey', `ticket' and `bridge', is YAMLed and stored in the global ticket dictionary. If there already is a ticket for the given `bridge', it is overwritten. """ assert len(masterKey) == const.MASTER_KEY_LENGTH assert len(ticket) == const.TICKET_LENGTH ticketFile = const.STATE_LOCATION + const.CLIENT_TICKET_FILE log.debug("Storing newly received ticket in `%s'." % ticketFile) # Add a new (key, ticket) tuple with the given bridge as hash key. tickets = dict() content = util.readFromFile(ticketFile) if (content is not None) and (len(content) > 0): tickets = yaml.safe_load(content) # We also store a timestamp so we later know if our ticket already expired. tickets[str(bridge)] = [int(time.time()), masterKey, ticket] util.writeToFile(yaml.dump(tickets), ticketFile)
def BoilerData(downloadDate): ret = False read_directory = os.path.join(Constants.ROOT_FOLDER,Constants.DATA_DIR,downloadDate) write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.BOILER_DATA_DIR,downloadDate) if not os.path.exists(read_directory): util.logger.error("Boilers data can't be run because folder isn't present = "+downloadDate) return ret if not os.path.exists(write_directory): os.makedirs(write_directory) onlyfiles = [ f for f in os.listdir(read_directory) if os.path.isfile(os.path.join(read_directory,f)) ] count = 0 try: for htmlFile in onlyfiles: htmlData = util.readFromFile(os.path.join(read_directory,htmlFile)) html_filename = os.path.join(write_directory,htmlFile) if os.path.isfile(html_filename) is False: htmlText = remove_boiler(htmlData) result = util.writeToFile(htmlText, html_filename) else: result = True if result == True: count = count + 1 util.logger.info('Boilered data done for ='+html_filename+str(count)) except Exception, e: util.logger.error("Exception at boiler for data : %s" % read_directory)
def filterNoise(self, posExps, fInfos): fIdToFInfosMap = util.getFIdToInfoMap(fInfos) newPosExps = [] failedSeqs = util.readFromFile(util.failedExpsFile) for posExp in posExps: posExp = posExp.rstrip("\n") isRpcNoise = False posExpFIds = posExp.split(" ") for fId in posExpFIds: fIdFInf = fIdToFInfosMap[fId] if util.isRpcNoise(fIdFInf): isRpcNoise = True break if isRpcNoise: #print "yes...rpc noise...posExp is " + posExp continue l = len(posExpFIds) pfx = " ".join(posExpFIds[0:l-1]) if pfx not in failedSeqs: newPosExps.append(posExp) return newPosExps
def alignFastaClustalw(input, path): ''' input: string containing fasta formatted sequences to be aligned. path: working directory where fasta will be written and clustal will write output files. runs alignment program clustalw Returns: fasta-formatted aligned sequences ''' clustalFastaPath = os.path.join(path, CLUSTAL_INPUT_FILENAME) clustalAlignmentPath = os.path.join(path, CLUSTAL_ALIGNMENT_FILENAME) util.writeToFile(input, clustalFastaPath) try: subprocess.check_call('clustalw -output=fasta -infile=%s -outfile=%s 2>&1 >/dev/null'%(clustalFastaPath, clustalAlignmentPath), shell=True) except Exception: logging.exception('runClustal Error: clustalFastaPath data = %s'%open(clustalFastaPath).read()) raise alignedFasta = util.readFromFile(clustalAlignmentPath) return alignedFasta
def alignFastaClustalw(input, path): ''' input: string containing fasta formatted sequences to be aligned. path: working directory where fasta will be written and clustal will write output files. runs alignment program clustalw Returns: fasta-formatted aligned sequences ''' clustalFastaPath = os.path.join(path, CLUSTAL_INPUT_FILENAME) clustalAlignmentPath = os.path.join(path, CLUSTAL_ALIGNMENT_FILENAME) util.writeToFile(input, clustalFastaPath) try: cmd = ['clustalw', '-output', 'fasta', '-infile', clustalFastaPath, '-outfile', clustalAlignmentPath] with open(os.devnull, 'w') as devnull: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) except Exception: logging.exception('runClustal Error: clustalFastaPath data = %s'%open(clustalFastaPath).read()) raise alignedFasta = util.readFromFile(clustalAlignmentPath) return alignedFasta
def findStoredTicket(bridge): """ Retrieve a previously stored ticket from the ticket dictionary. The global ticket dictionary is loaded and the given `bridge' is used to look up the ticket and the master key. If the ticket dictionary does not exist (yet) or the ticket data could not be found, `None' is returned. """ assert bridge ticketFile = const.STATE_LOCATION + const.CLIENT_TICKET_FILE log.debug("Attempting to read master key and ticket from file `%s'." % ticketFile) # Load the ticket hash table from file. yamlBlurb = util.readFromFile(ticketFile) if (yamlBlurb is None) or (len(yamlBlurb) == 0): return None tickets = yaml.safe_load(yamlBlurb) try: timestamp, masterKey, ticket = tickets[str(bridge)] except KeyError: log.info("Found no ticket for bridge `%s'." % str(bridge)) return None # We can remove the ticket now since we are about to redeem it. log.debug("Deleting ticket since it is about to be redeemed.") del tickets[str(bridge)] util.writeToFile(yaml.dump(tickets), ticketFile) # If our ticket is expired, we can't redeem it. ticketAge = int(time.time()) - timestamp if ticketAge > const.SESSION_TICKET_LIFETIME: log.warning( "We did have a ticket but it already expired %s ago." % str( datetime.timedelta(seconds=(ticketAge - const.SESSION_TICKET_LIFETIME)))) return None return (masterKey, ticket)
def findStoredTicket( bridge ): """ Retrieve a previously stored ticket from the ticket dictionary. The global ticket dictionary is loaded and the given `bridge' is used to look up the ticket and the master key. If the ticket dictionary does not exist (yet) or the ticket data could not be found, `None' is returned. """ assert bridge ticketFile = const.STATE_LOCATION + const.CLIENT_TICKET_FILE log.debug("Attempting to read master key and ticket from file `%s'." % ticketFile) # Load the ticket hash table from file. yamlBlurb = util.readFromFile(ticketFile) if (yamlBlurb is None) or (len(yamlBlurb) == 0): return None tickets = yaml.safe_load(yamlBlurb) try: timestamp, masterKey, ticket = tickets[str(bridge)] except KeyError: log.info("Found no ticket for bridge `%s'." % str(bridge)) return None # We can remove the ticket now since we are about to redeem it. log.debug("Deleting ticket since it is about to be redeemed.") del tickets[str(bridge)] util.writeToFile(yaml.dump(tickets), ticketFile) # If our ticket is expired, we can't redeem it. ticketAge = int(time.time()) - timestamp if ticketAge > const.SESSION_TICKET_LIFETIME: log.warning("We did have a ticket but it already expired %s ago." % str(datetime.timedelta(seconds= (ticketAge - const.SESSION_TICKET_LIFETIME)))) return None return (masterKey, ticket)
def alignFastaClustalw(input, path): ''' input: string containing fasta formatted sequences to be aligned. path: working directory where fasta will be written and clustal will write output files. runs alignment program clustalw Returns: fasta-formatted aligned sequences ''' clustalFastaPath = os.path.join(path, CLUSTAL_INPUT_FILENAME) clustalAlignmentPath = os.path.join(path, CLUSTAL_ALIGNMENT_FILENAME) util.writeToFile(input, clustalFastaPath) try: cmd = [ 'clustalw', '-output', 'fasta', '-infile', clustalFastaPath, '-outfile', clustalAlignmentPath ] with open(os.devnull, 'w') as devnull: subprocess.check_call(cmd, stdout=devnull, stderr=devnull) except Exception: logging.exception('runClustal Error: clustalFastaPath data = %s' % open(clustalFastaPath).read()) raise alignedFasta = util.readFromFile(clustalAlignmentPath) return alignedFasta
def BoilerNews(downloadDate): jsonData = readBoilerJson(downloadDate) if jsonData is None: return False result = False read_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.GOOGLE_NEWS_DIR,downloadDate) write_directory = os.path.join(Constants.ROOT_FOLDER,Constants.RECOMMENDATION_DIR,Constants.BOILER_GOOGLE_NEWS_DIR,downloadDate) if not os.path.exists(read_directory): util.logger.error("Boilers news can't be run because folder isn't present = "+downloadDate) return result if not os.path.exists(write_directory): os.makedirs(write_directory) onlyfiles = [ f for f in os.listdir(read_directory) if os.path.isfile(os.path.join(read_directory,f)) ] count = 0 for htmlFile in onlyfiles: try: htmlData = util.readFromFile(os.path.join(read_directory,htmlFile)) if htmlData is not None: html_filename = os.path.join(write_directory,htmlFile) if os.path.isfile(html_filename) is False: htmlText = remove_boiler(htmlData) result = util.writeToFile(htmlText, html_filename) if result == True: if htmlFile in jsonData: jsonData[htmlFile]["content"] = htmlText else: result = True if result == True: count = count + 1 else: if htmlFile in jsonData: del jsonData[htmlFile] print 'Boilered done for ='+html_filename+str(count) except Exception, e: util.logger.error( "Exception at boiler for google news : %s" % read_directory)
def BoilerData(downloadDate): ret = False read_directory = os.path.join(Constants.ROOT_FOLDER, Constants.DATA_DIR, downloadDate) write_directory = os.path.join(Constants.ROOT_FOLDER, Constants.BOILER_DATA_DIR, downloadDate) if not os.path.exists(read_directory): util.logger.error( "Boilers data can't be run because folder isn't present = " + downloadDate) return ret if not os.path.exists(write_directory): os.makedirs(write_directory) onlyfiles = [ f for f in os.listdir(read_directory) if os.path.isfile(os.path.join(read_directory, f)) ] count = 0 for htmlFile in onlyfiles: try: htmlData = util.readFromFile(os.path.join(read_directory, htmlFile)) html_filename = os.path.join(write_directory, htmlFile) if os.path.isfile(html_filename) is False: htmlText = remove_boiler(htmlData) result = util.writeToFile(htmlText, html_filename) else: result = True if result == True: count = count + 1 util.logger.info('Boilered data done for =' + html_filename + str(count)) except Exception, e: util.logger.error("Exception at boiler for data : " + read_directory + "/" + htmlFile)
def BoilerSuggNews(downloadDate): jsonData = readBoilerJson(downloadDate) if jsonData is None: return False result = False read_directory = os.path.join(Constants.ROOT_FOLDER, Constants.RECOMMENDATION_DIR, Constants.GOOGLE_NEWS_DIR, downloadDate, Constants.SUGG_GOOGLENEWS) write_directory = os.path.join(Constants.ROOT_FOLDER, Constants.RECOMMENDATION_DIR, Constants.BOILER_GOOGLE_NEWS_DIR, downloadDate, Constants.SUGG_GOOGLENEWS) if not os.path.exists(read_directory): util.logger.error( "Boilers sugg news can't be run because folder isn't present = " + downloadDate) return result if not os.path.exists(write_directory): os.makedirs(write_directory) suggGoogle = jsonData['suggestGoogle'] googleLinks = suggGoogle[Constants.GOOGLE] finalJson = { 'GoogleNews': jsonData['GoogleNews'], 'suggestGoogle': { Constants.GOOGLE: [] } } count = 0 for linkObj in googleLinks: download = linkObj['download'] htmlFile = linkObj['id'] if download == 'yes': try: htmlData = util.readFromFile( os.path.join(read_directory, htmlFile)) if htmlData is not None: html_filename = os.path.join(write_directory, htmlFile) if os.path.isfile(html_filename) is False: htmlText = remove_boiler(htmlData) result = util.writeToFile(htmlText, html_filename) if result == True: linkObj['content'] = htmlText soup = BeautifulSoup(htmlData, 'html.parser') if soup.title and soup.title.contents[0]: title = soup.title.contents[0] else: title = "" linkObj['title'] = title else: result = True if result == True: count = count + 1 util.logger.info('Boilered done for sugg_news =' + html_filename + str(count)) except Exception, e: util.logger.error("Exception at boiler for google news : %s" % read_directory) else: pass finalJson['suggestGoogle'][Constants.GOOGLE].append(linkObj)
import sys import util import fInfo import runExp stackDepth = 3 inclPostExpFIds = False p = fInfo.Parser() p.getFIdDescStrs("/tmp/fi/glob/failPts/fIdStrs", stackDepth) fInfos = p.getFInfos() fIdTofInfoMap = util.getFIdToInfoMap(fInfos) allFIdsLines = util.readFromFile("coverageInfoAllFsns") psFIds = util.readFromFile(util.postSetupFIdsFile) fIdsNotToBeIncl = runExp.getFIdsNotToBeIncluded(fInfos, False, None) countUpBlk = 0 countAbBlk = 0 countRecBlk = 0 countExps = 0 numUpBlks = 0 for allFIdsLn in allFIdsLines: allFIdsLnParts = allFIdsLn.split(":") fIdsInj = allFIdsLnParts[0] fstFIdInj = (fIdsInj.split(" "))[0] fstFIdInj = fstFIdInj.rstrip("\n")