def presence(self): # self.peak_hour1 = Label(self.top, text='Peak hour today:', font=("Bookman", 20), # bg='#3c8081') # self.peak_hour2 = Label(self.top, text=str(ApiProcess.get_peak()) + ':00', font=("Bookman", 25), # fg='#FFA505', bg='#3c8081') # self.peak_hour1.pack(side=LEFT, fill=tk.Y) # self.peak_hour2.pack(side=LEFT) # # self.count_visitors1 = Label(self.top, text=' Count of visitors today:', font=("Bookman", 20), # bg='#3c8081') # self.count_visitors2 = Label(self.top, text=str(ApiProcess.get_today_visitors()), font=("Bookman", 25), # fg='#FFA505', bg='#3c8081') # self.count_visitors1.pack(side=LEFT, fill=tk.Y) # self.count_visitors2.pack(side=LEFT) # # self.count_yes_visitors1 = Label(self.top, text=' Count of visitors yesterday:', font=("Bookman", 20), # bg='#3c8081') # self.count__yes_visitors2 = Label(self.top, text=str(ApiProcess.get_today_visitors()), font=("Bookman", 25), # fg='#FFA505', bg='#3c8081') # self.count_yes_visitors1.pack(side=LEFT, fill=tk.Y) # self.count__yes_visitors2.pack(side=LEFT) # fig, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal")) # self.canvas_dwell = FigureCanvasTkAgg(fig, self.middle) # self.canvas_repeat = FigureCanvasTkAgg(fig, self.middle) # self.make_peak_hour() Analytics.connected_visitors(self) Analytics.repeat_visitors(self)
def ReturnStoreInfo(store): try: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } Store = {} url = 'https://www.walmart.com/store/{}'.format(store) res = requests.get(url, headers=headers, proxies=proxies) page = bs4.BeautifulSoup(res.text, "lxml") Store['Number'] = store Google = EditGoogleMaps( page.select( '#store-side-bar > div.StoreSideBar > div.GoogleMapsIframe > iframe' )[0], 400, 400) Store['GoogleMaps'] = Markup(Google) if len(page.select('.open-24-hours')) == 0: Store['StoreHours'] = 'Not 24 Hours' else: Store['StoreHours'] = '24 Hours' Store['Phone'] = MarkupIgnore(page.select('.phone')[0]) Store['Address2'] = MarkupIgnore(page.select('.address2')[0]) Store['Address1'] = MarkupIgnore(page.select('.address1')[0]) Store['Name'] = MarkupIgnore(page.select('.heading-d')[0]) Store['ItemCount'] = len(Analytics.ConvertStoreToDict(store)) Store['Econ'] = str("{:,.2f}".format(Status( Store['Address2'][-5:])))[:-3] Store.update(Analytics.ReturnStoreInfo(store)) return Store except Exception as exp: print(exp)
def sequenceDTAs(self): curPairedScanData = self._indexedPairData[int(self._pairedScanListbox.curselection()[0])] t1 = time.time() if curPairedScanData['heavy'] != 'N/A': heavySeqMap = copy.deepcopy(self._seqMap) heavySeqMap['Mods']['N-Term'] = self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']]['NModSymbol'] heavySeqMap['Mods']['C-Term'] = self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']]['CModSymbol'] sharedInfo, starts, ends, deltas, termModHash, specs, G = DNS.initializeSpectrumGraph(self._pnet, self._paramsDict, self._scanFDict[curPairedScanData['light']]['dta'], heavyPath=self._scanFDict[curPairedScanData['heavy']]['dta'], ppm=self._ppm, usePaired=True, pairConfigName=curPairedScanData['pair configuration'], verbose=False) precMass = sharedInfo['lightPrecMass'] else: sharedInfo, starts, ends, deltas, termModHash, specs, G = DNS.initializeSpectrumGraph(self._pnet, self._paramsDict, self._scanFDict[curPairedScanData['light']]['dta'], ppm=self._ppm, verbose=False) precMass = sharedInfo['precMass'] epsilon = self._ppm * precMass * 10 ** -6 paths, subG = DNS.getSpectrumGraphPaths(G, deltas, specs, starts, ends, precMass - Constants.mods['H+'] - Constants.mods['H2O'], termModHash=termModHash, unknownPenalty=self._ambigpenalty, maxEdge=self._maxedge, minEdge=self._minedge, subGraphCut=self._subgraphcut, subAlpha=0.3, alpha=self._alpha, epsilon=epsilon, aas=self._aas, verbose=False) seqTime = time.time() - t1 if paths: seqs = [] for path in paths: seqs.extend([DNS.getSequenceFromNodes(subG, path[1], precMass - Constants.mods['H+'] - Constants.mods['H2O'], termModHash)]) scores = list(zip(*paths)[0]) Ord = np.argsort(-1 * np.array(scores)) ambigEdges = [] numAmbig = 0 for j in range(self._numseq): try: for i in range(len(seqs[Ord[j]])): if type(seqs[Ord[j]][i]) == tuple: ambigEdges.extend([seqs[Ord[j]][i]]) numAmbig += 1 seqs[Ord[j]][i] = '-' curSeq = ''.join(seqs[Ord[j]]) curSeq = An.preprocessSequence(curSeq, self._seqMap, ambigEdges=ambigEdges) if j == 0 and curPairedScanData['heavy'] != 'N/A': try: curHeavySeq = An.preprocessSequence(curSeq, heavySeqMap, replaceExistingTerminalMods=True, ambigEdges=ambigEdges) AAs = An.getAllAAs(curHeavySeq, ambigEdges=ambigEdges) self._seqStatus.set('Paired Sequencing Successful! Heavy Sequence: %s. Time taken: %f seconds' % (curHeavySeq, seqTime)) except KeyError: self._seqStatus.set('ERROR: Heavy Sequence %s is not a valid sequence! Time wasted: %f seconds' % (curHeavySeq, seqTime)) elif j == 0: self._seqStatus.set('Unpaired Sequencing Successful! Time taken: %f seconds' % (seqTime)) for labelInst in self._seqScoreData[j]['seq'].children.values(): labelInst.destroy() self.displayConfColoredSequence(subG, self._seqScoreData[j]['seq'], paths[Ord[j]][1], curSeq, ambigEdges=ambigEdges) self._seqScoreData[j]['score'].set(str(scores[Ord[j]])) except IndexError: for labelInst in self._seqScoreData[j]['seq'].children.values(): labelInst.destroy() self._seqScoreData[j]['score'].set('') else: self._seqStatus.set('ERROR: No Sequences Found! Time wasted: %f seconds' % seqTime)
def validateHeavySequence(seq, heavySeqMap, ambigEdges): try: if seq != '-': heavySeq = Analytics.preprocessSequence(seq, heavySeqMap, replaceExistingTerminalMods=True, ambigEdges=ambigEdges) AAs = Analytics.getAllAAs(heavySeq, ambigEdges=ambigEdges) return True else: return False except KeyError: return False
def split(train_data, train_labels, extra_data, extra_labels): random.seed() n_labels = 10 valid_index = [] valid_index2 = [] train_index = [] train_index2 = [] for i in np.arange(n_labels): # Add the first 400 index's in training labels that start with i valid_index.extend(np.where(train_labels[:, 1] == i)[0][:400].tolist()) # Add the rest of the index's to this list train_index.extend(np.where(train_labels[:, 1] == i)[0][400:].tolist()) # The first 200 from the extra set valid_index2.extend( np.where(extra_labels[:, 1] == i)[0][:200].tolist()) # The rest of the extra set train_index2.extend( np.where(extra_labels[:, 1] == i)[0][200:].tolist()) # Randomize the lists random.shuffle(valid_index) random.shuffle(train_index) random.shuffle(valid_index2) random.shuffle(train_index2) # add the extra_data that the valid_index2 index's and the train_data at the # valid_index index's. Then shuffle the data so the labels are the first # column the pixles are the 2, 3rd and the colors (RGB) are the last one. valid_data = np.concatenate( (extra_data[valid_index2, :, :, :], train_data[valid_index, :, :, :]), axis=0) # Do the same thing with the valid set lables valid_labels = np.concatenate( (extra_labels[valid_index2, :], train_labels[valid_index, :]), axis=0) # Do the same thing with the training data train_data = np.concatenate( (extra_data[train_index2, :, :, :], train_data[train_index, :, :, :]), axis=0) # Do the same thing with the training labels train_labels = np.concatenate( (extra_labels[train_index2, :], train_labels[train_index, :]), axis=0) print("Training set created with shape") print(train_data.shape, train_labels.shape) print("Validation set created with shape") print(valid_data.shape, valid_labels.shape) Analytics.load() Analytics.data_set_size['train'] = train_data.shape[0] Analytics.data_set_size['valid'] = valid_data.shape[0] Analytics.save() return train_data, train_labels, valid_data, valid_labels
def customer(enviroment, ressource, kundenNummer): """Modelliert einen Kunden in der Tierhandlung.""" inSystem = enviroment.now event = ressource.getIn() Analytics.addWaitsAtPoint(enviroment.now, waitingCustomers()) if waitingCustomers() > L: enviroment.process(counterOpener(enviroment)) Analytics.addAverageQueueLengthAtPoint( enviroment.now, waitingCustomers() / len(ressource.waitsFor)) yield event gainedCounter = ressource.getCounter(event) with gainedCounter.request() as req: yield req Analytics.addWaittimePerCustomer(kundenNummer, enviroment.now - inSystem) tiere = 1 for i in range(4): if lcg.nextBool(0.5): tiere += 1 #Binominalverteilte Inkrementierung: Zwischen +0 und +4. bezahlzeit = tiere * Tr + lcg.nextTransformed(inverseCDFPareto) print("Kunde %i wird %f Minuten für das Bezahlen brauchen" % (kundenNummer, bezahlzeit)) yield enviroment.timeout(bezahlzeit) Analytics.addTotaltimePerCustomer(kundenNummer, enviroment.now - inSystem) ressource.nextActionForCounter(gainedCounter) return
def __init__(self, rs, vehics): self.rs = rs self.vehics = vehics self.rects = [] self.lc = LightController(self) self.analytics = Analytics() self.learning = Learning(self)
def parseScans(fDict, prog, seqMap, dbDict, delimiter=',', srchID = None, seqDelimLen=2): processedInfo = {} for csvfile in fDict.keys(): MASCOTData = DataFile.getScanInfo(csvfile, dbDict[prog]['fields'] + (['SrchID'] if srchID != None else []), delimiter=delimiter) processedInfo[fDict[csvfile]] = An.preprocessDatabaseScanInfo(MASCOTData, seqMap[fDict[csvfile]], dbDict[prog]['fieldmap'], srchID = srchID, seqDelimLen=seqDelimLen) return processedInfo
def parseDBScans(fDict, prog, seqMap, dbDict): processedInfo = {} for csvfile in fDict.keys(): MASCOTData = DataFile.getScanInfo(csvfile, dbDict[prog]['fields'], delimiter=',') processedInfo[fDict[csvfile]] = An.preprocessDatabaseScanInfo(MASCOTData, seqMap[fDict[csvfile]], dbDict[prog]['fieldmap']) return processedInfo
def __init__(self): self.health = playerhealth self.money = 0 self.gems = 0 self.upgPathSelectLvl = 5 self.abilities = list() self.wavenum = 0 self.gameover = False self.towerSelected = None self.tbbox = None self.layout = None self.wavestart = 999 self.next_wave = False self.pausetime = 0 self.state = "Start" self.restart = False self.score = 0 self.newMoveList = False self.wavetime = None self.wavetimeInt = None self.myDispatcher = EventDispatcher.EventDisp() self.analytics = Analytics.Analytics() self.store = DictStore('settings.txt') if self.store.exists('audio'): self.soundOn = self.store.get('audio')['soundOn'] self.musicOn = self.store.get('audio')['musicOn'] else: self.soundOn = True self.musicOn = True
class Overwatch: def __init__(self, rs, vehics): self.rs = rs self.vehics = vehics self.rects = [] self.lc = LightController(self) self.analytics = Analytics() self.learning = Learning(self) #removes the given vehic from the vehicles list so it will be garbage collected def removeVehic(self, vehic): self.vehics.remove(vehic) self.analytics.vehicPassed(vehic) #automatically spawns vehicls at the rate defined in the road system data file def autoVehicSpawn(self, frameCount, screen): defaultSpawnInfo = self.rs.entranceExitDefaults for key in self.rs.spawns.mods: if frameCount % self.rs.spawns.mods[key] == 0: rand = random.randint(1,100) if rand%2==0: self.spawnVehic(key, 1, defaultSpawnInfo[key][0], screen) elif rand%2==1: self.spawnVehic(key, 2, defaultSpawnInfo[key][0], screen) #returns the last vehicle spawned on the given road in the given lane def getPrevVehic(self, road, lane): if len(self.vehics) == 0 : return None limit = -1*len(self.vehics) i = -1 while i >= limit: if self.vehics[i].road == road and self.vehics[i].lane.id == lane : return self.vehics[i] i -= 1 return None #spawns a vehicle at the given entrance, in the given lane, headed for the given exit, and adds it to the vehicle list def spawnVehic(self, entrance, lane, exit, screen): entrance = self.rs.features[entrance] road = entrance.road exit = self.rs.features[exit] frontVehic = self.getPrevVehic(road, lane) v = Vehicle(screen, self.rs, entrance, road, road.lanes[lane-1], frontVehic, exit, self) road.lanes[lane-1].vehicles.append(v) self.vehics.append(v)
def preprocess_image(digitStruct, dataset=""): filename = os.path.join(dataset, digitStruct['filename']) im = mpimg.imread(filename) # Analytics code Analytics.load() if im.shape[0] > Analytics.max_height[dataset]: Analytics.max_height[dataset] = im.shape[0] if im.shape[1] > Analytics.max_width[dataset]: Analytics.max_width[dataset] = im.shape[1] Analytics.save() t, l, w, h = find_bounding_box(digitStruct) t, l, w, h = scale(t, l, w, h) cropped = crop(im, t, l, w, h) resized = resize(cropped) gray = grayscale(resized) return gray
def listDTAs(self): self._dtaList = glob.glob(self._selectedDir.get() + '/*.dta') if not self._dtaList: self._selectedDir.set('No DTAs in selected directory!') else: self._scanFDict = An.getScanFDict(self._dtaList) self._indexedScanFList = np.zeros(len(self._scanFDict)) for i, scanF in enumerate(sorted(self._scanFDict.keys())): self._scanFListbox.insert(END, str(scanF)) self._indexedScanFList[i] = scanF
def add_numbers(): a = request.args.get('a', 0, type=str) store = request.args.get('b', 0, type=str) # a is the SKIN or search Query a = Analytics.LocalPrice(store, str(a)) a = '{} - {} In Stock'.format('${:,.2f}'.format(a[0]), a[1]) if a != None: return jsonify(result=str(a)) else: return jsonify(result='Item Not Available')
def GrabPrice(smalllist): for s in smalllist: try: a = Analytics.OnlinePricingInfo(s) b = [s, a['ListPrice'], a['Price']] print(b) Info.append(b) except BaseException as exp: print(exp) pass
def getLabels(digitStruct, folder): label = np.ones([6], dtype=int) * 10 boxes = digitStruct['boxes'] num_digits = len(boxes) label[0] = num_digits #Analytics Code Analytics.load() slot = num_digits if num_digits > 5: slot = 6 Analytics.sequence_lengths[folder][slot] += 1 Analytics.save() for i in np.arange(num_digits): if i < 5: label[i + 1] = boxes[i]['label'] if boxes[i]['label'] == 10: label[i + 1] = 0 return label
def displayConfColoredSequence(self, G, masterFrame, path, seq, ambigEdges=None): nodeGen = Constants.nodeInfoGen(seq, addTerminalNodes=False, considerTerminalMods=True, ambigEdges=ambigEdges) prevNode = None for i, node in enumerate(nodeGen): print node, path[i+1] node['prm'] = path[i+1] confScore = An.getAAConfidence(G, prevNode=prevNode, nextNode=node) prevNode = node hexColor = self.getHexString(np.array([1-confScore, confScore, 0])) if prevNode == None and seq[len(node['formAA'])] in Constants.NTermMods: Label(masterFrame, text=node['formAA']+seq[len(node['formAA'])], fg='white', bg=hexColor).pack(side=LEFT) else: Label(masterFrame, text=node['formAA'], fg='white', bg=hexColor).pack(side=LEFT) confScore = An.getAAConfidence(G, prevNode=prevNode, nextNode=None) hexColor = self.getHexString(np.array([1-confScore, confScore, 0])) if seq[-1] in Constants.CTermMods: Label(masterFrame, text=node['lattAA']+seq[-1], fg='white', bg=hexColor).pack(side=LEFT) else: Label(masterFrame, text=node['lattAA'], fg='white', bg=hexColor).pack(side=LEFT)
def getPairs(self): pairs = {} for scanF in self._scanFDict: self._scanFDict[scanF]['paired scans'] = {} for pairConfigName in self._paramsDict['Pair Configurations']: pairConfig = self._paramsDict['Pair Configurations'][pairConfigName] pairs[pairConfigName] = An.findDeltaPairs(self._dtaList, pairConfig['NMod']+pairConfig['CMod'], ppm=self._ppm) for pair in pairs[pairConfigName]: pairData = {'light': pair[0], 'heavy': pair[1], 'pair configuration': pairConfigName, 'pair score': None, 'light precmass': self._scanFDict[pair[0]]['precMass'], 'heavy precmass': self._scanFDict[pair[1]]['precMass']} self._scanFDict[pair[0]]['paired scans'][pair[1]] = pairData self._scanFDict[pair[1]]['paired scans'][pair[0]] = pairData
def preprocess_image(digitStruct, dataset="", single=False): processed_images = [] filename = os.path.join(dataset, digitStruct['filename']) im = mpimg.imread(filename) # Analytics code Analytics.load() if im.shape[0] > Analytics.max_height[dataset]: Analytics.max_height[dataset] = im.shape[0] if im.shape[1] > Analytics.max_width[dataset]: Analytics.max_width[dataset] = im.shape[1] Analytics.save() t, l, w, h = find_bounding_box(digitStruct) t30, l30, w30, h30 = scale(t, l, w, h) w_prime = w + ((w30 - w) / 2) h_prime = h + ((h30 - h) / 2) cropped = crop(im, t30, l30, w30, h30) processed_images.append(cropped) if single == False: cropped = crop(im, t30, l30, w_prime, h_prime) processed_images.append(cropped) cropped = crop(im, t30, l, w_prime, h_prime) processed_images.append(cropped) cropped = crop(im, t, l30, w_prime, h_prime) processed_images.append(cropped) cropped = crop(im, t, l, w_prime, h_prime) processed_images.append(cropped) for i in range(len(processed_images)): processed_images[i] = resize(processed_images[i]) processed_images[i] = grayscale(processed_images[i]) #Visualize.display_processed_example(processed_images[i]) return processed_images
def compute_initial_figure(self): UsersPerCountry, UsersPerPlatform = Analytics.UsersPerCountryOrPlatform() labels = [] sizes = [] print(UsersPerPlatform) for p, c in sorted(UsersPerPlatform.iteritems()): labels.append(p) sizes.append(c) colors = ['turquoise', 'yellowgreen', 'firebrick', 'lightsteelblue', 'royalblue'] pylab.pie(sizes, colors=colors, labels=labels, autopct='%1.1f%%', shadow=True) pylab.title('Users Per Platform') pylab.gca().set_aspect('1') pylab.show()
def generate_dataset(data, folder, single=False): target_size = 64 if folder == 'test' and not example: Analytics.load() Analytics.data_set_size[folder] = len(data) Analytics.save() data_point_per_image = 5 if single == True: data_point_per_image = 1 dataset = np.ndarray( [len(data) * data_point_per_image, target_size, target_size, 1], dtype='float32') labels = np.ones([len(data) * data_point_per_image, 6], dtype=int) * 10 offset = 0 for i in np.arange(len(data)): processed_images = preprocess_image(data[i], folder, single) dataset[offset:offset + len(processed_images), :, :, :] = processed_images labels[offset:offset + len(processed_images), :] = getLabels( data[i], folder) offset += len(processed_images) print(folder) print(np.mean(dataset)) print(np.std(dataset)) # Analytics #Analytics.load() #Analytics.means[folder] = np.mean(dataset) #Analytics.stds[folder] = np.std(dataset) #Analytics.save() #dataset = normalize(dataset) print(folder, "dataset created.") print(dataset.shape) print(labels.shape) return dataset, labels
def updatePairInfo(self, event): curPairedScanData = self._indexedPairData[int(self._pairedScanListbox.curselection()[0])] if curPairedScanData['pair score'] == None: curPairedScanData['pair score'] = An.getSharedPeaksRatio(self._scanFDict[curPairedScanData['light']]['dta'], self._scanFDict[curPairedScanData['heavy']]['dta'], self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']], epsilon=self._ppm * 10**-6 * curPairedScanData['light precmass']) for labelDatum in self._pairInfoLabelVars: self._pairInfoLabelVars[labelDatum].set(str(curPairedScanData[labelDatum])) if curPairedScanData['pair score'] != 'N/A': if curPairedScanData['pair score'] > self._paircutoff: self._pairScoreLabel.config(fg='dark green') else: self._pairScoreLabel.config(fg='red') else: self._pairScoreLabel.config(fg='black')
def getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoSeq, deNovoUnmodSeq, refSeq, alignedIndsMap, deNovoAmbigEdges=[]): prevIntervalStart = newRefEndInds['start'] #print 'End Inds', newRefEndInds tempSeq = '' tempAmbigEdges = [] for interval in sorted(modList): if 'Isobaric' not in modList[interval][0][0] and not ('Insertion' in modList[interval][0][0] and (alignedIndsMap['De Novo'][interval[0]] < 2 or (len(deNovoUnmodSeq) - alignedIndsMap['De Novo'][interval[1]]) < 2)): tempSeq += refSeq[prevIntervalStart:alignedIndsMap['Ref'][interval[0]]] + 'X' #print 'Mod list interval', modList[interval] tempAmbigEdges += [(0, modList[interval][0][3])] #print 'temp ambig edges', tempAmbigEdges prevIntervalStart = alignedIndsMap['Ref'][interval[1]] #print 'TempSeq', tempSeq, tempAmbigEdges tempSeq += refSeq[prevIntervalStart:(len(refSeq) + newRefEndInds['end'])] #print deNovoSeq, refSeq, tempSeq, deNovoAmbigEdges, tempAmbigEdges comp = An.comparePeptideResults(deNovoSeq, tempSeq, ambigEdges1=deNovoAmbigEdges, ambigEdges2=tempAmbigEdges, ppm=10) return comp[0], comp[1]
def resetGame(): '''Resets game variables so player can restart the game quickly.''' Player.player.state = 'Restart' stopAllAnimation() Player.player.gameover = False Map.mapvar.getStartPoints() Map.mapvar.flylistgenerated = False Map.mapvar.flymovelists = [] Map.mapvar.pointmovelists = [] Localdefs.towerGroupDict = {'Life': [], 'Fire': [], 'Ice': [], 'Gravity': [], 'Wind': []} AllLists = [Localdefs.towerlist, Localdefs.bulletlist, Localdefs.menulist, Localdefs.explosions, Localdefs.senderlist, Localdefs.timerlist, Localdefs.shotlist, Localdefs.alertQueue] i = 0 for list in AllLists: while i < len(list): list.pop() for tower in Map.mapvar.towercontainer.children: if tower.type != 'Base': tower.remove() Map.mapvar.baseimg = None Map.mapvar.towercontainer.clear_widgets() Map.mapvar.enemycontainer.clear_widgets() for road in Map.mapvar.roadcontainer.children: road.iceNeighbor = False Map.mapvar.roadcontainer.clear_widgets() Map.mapvar.shotcontainer.clear_widgets() Map.mapvar.wallcontainer.clear_widgets() Map.mapvar.towerdragimagecontainer.clear_widgets() Player.player.wavenum = 0 Player.player.wavetime = int(Map.mapvar.waveseconds) Player.player.myDispatcher.Timer = str(Player.player.wavetime) Player.player.health = Player.playerhealth Player.player.myDispatcher.Health = str(Player.player.health) Player.player.score = 0 Player.player.myDispatcher.Score = str(Player.player.score) Player.player.analytics = Analytics.Analytics() __main__.ids.wavestreamer.removeWaveStreamer() __main__.ids.wavescroller.scroll_x = 0 __main__.ids.play.text = 'Start' if Messenger.messenger.bgrect: Map.mapvar.background.canvas.after.remove(Messenger.messenger.bgrect) Messenger.messenger.bgrect = None
def generate_dataset(data, folder): if folder == 'test': Analytics.load() Analytics.data_set_size[folder] = len(data) Analytics.save() dataset = np.ndarray([len(data), 50, 50, 1], dtype='float32') labels = np.ones([len(data), 6], dtype=int) * 10 for i in np.arange(len(data)): dataset[i, :, :, :] = preprocess_image(data[i], folder) labels[i, :] = getLabels(data[i], folder) # Analytics Analytics.load() Analytics.means[folder] = np.mean(dataset) Analytics.stds[folder] = np.std(dataset) Analytics.save() dataset = normalize(dataset) print(folder, "dataset created.") print(dataset.shape) print(labels.shape) return dataset, labels
def getUniquePeptDict(scanDict, scoreKey, peptideKey, scanKey = 'ScanF', nullVal = 'None', noStrip=['#'], datasets=None): scanFDict = defaultdict(lambda: dict([(dataset, []) for dataset in datasets])) uniquePeptDict = {} if datasets == None: datasets = scanDict.keys() for dataset in datasets: for item in scanDict[dataset]: if item[peptideKey] == nullVal: continue strippedPept = An.stripModifications(item[peptideKey], noRemove=noStrip) if strippedPept in uniquePeptDict and float(item[scoreKey]) > float(uniquePeptDict[strippedPept][scoreKey]): uniquePeptDict[strippedPept] = item elif strippedPept not in uniquePeptDict: uniquePeptDict[strippedPept] = item scanFDict[strippedPept][dataset] += [item[scanKey]] return uniquePeptDict, scanFDict
kundenNummer = [0] lcg = LCG() timestamp = int(time.time()) openerOperation = [False] for lamda in numpy.arange(0.5, 10.5, 0.5): for r in range(10): env = simpy.Environment(8 * 60) #Starte die Simulation um 8 Uhr. Das sind 8*60 Minuten nach Mitternacht. counters = MR(env, K) kundenNummer[0] = 0 openerOperation[0] = False env.process(generate(env)) env.process(counterOpener(env)) env.run(until=16 * 60) #Lasse die Simulation bis 16 Uhr laufen. Das ist dann 16*60 Minuten nach Mitternacht. Analytics.storeRun(lamda, r + 1, kundenNummer[0]) #Zeige Mittelwerte an. print("mittlere Anzahl wartender Kunden: %f" % Analytics.meanWaits()) print("mittlere Wartezeit: %f" % Analytics.meanWaittimePerCustomer()) print("mittlere Verweilzeit: %f" % Analytics.meanTotaltimePerCustomer()) print("mittlere Warteschlangenlänge: %f" % Analytics.meanAverageQueueLength()) #Erzeuge, zeige und speichere Graphen. #Analytics.createWaitAtPointGraph(); #Analytics.createWaittimePerCustomerGraph(); #Analytics.createTotaltimePerCustomerGraph(); #Exportiere Daten in das CSV-Format. #Analytics.exportWaitsAtPoint("wartende_Kunden%i.csv"%timestamp); #Analytics.exportTotaltimePerCustomer("Verweilzeiten%i.csv"%timestamp); #Analytics.exportWaittimePerCustomer("Wartezeiten%i.csv"%timestamp);
unimodDict = pickle.load(fin) hashedUnimodDict = hashUnimodDict(unimodDict) outFile = open(options.output, 'w') cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score'] if 'Ambig Edges' in infoDict: cols.insert(2, 'Ambig Edges') outFile.write('\t'.join([col for col in cols]) + '\n') for entry in DataFile.getScanInfo(options.comp, delimiter='\t'): scanData = {} print "New scan", entry scanData['ScanF'] = entry[infoDict['ScanF']] scanData['Peptide'] = entry[infoDict['Peptide']] scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide']) scanData['Score'] = entry[infoDict['Score']] scanData['Alignment Score'] = None if 'Ambig Edges' in infoDict: ambigEdges = eval(entry[infoDict['Ambig Edges']]) scanData['Ambig Edges'] = ambigEdges else: ambigEdges = [] massIntPairs = DataFile.getMassIntPairs(scanFDict[int(scanData['ScanF'])]['dta']) spec = PN.Spectrum(PNet, precMass, epsilon=2*epSTD, spectrum=massIntPairs) try: #Ignore de novo peptides with noncanonical amino acids for now epsilon = 2 * 10**-6 * options.ppmstd * An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)[-1] except KeyError:
dtaList = glob.glob(options.dtadir + '/*.dta') scanFDict = getScanFDict(dtaList) aas = Constants.addPepsToAADict(300) hashedAAs = Constants.hashAAsEpsilonRange(aas, epStep, maxEp) ambigOpenPenalty = 0 ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(options.minedge, ambigOpenPenalty, options.ambigpenalty) ppmPenaltyFun = DNS.getPPMPenaltyFun(options.ppmstd, hashedAAs, options.minedge, options.ppmpenalty, options.ppmsyserror, epStep) print 'Getting Clusters' parent = os.path.abspath(os.pardir) clusterSVMModel = svmutil.svm_load_model(parent + paramsDict['Cluster Configuration']['model']) clusterSVMRanges = svmutil.load_ranges(parent + os.path.splitext((paramsDict['Cluster Configuration']['model']))[0] + '.range') precMassClusters = Analytics.findSamePrecMassClusters(dtaList, ppm=options.ppmstd) # print 'precMassClusters', precMassClusters samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=float(paramsDict['Cluster Configuration']['cutoff'])) # samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4) # samePeptideClusters = An.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4) # To test without any clustering #samePeptideClusters = [[scanF] for scanF in scanFDict] for pairConfigName in paramsDict['Pair Configurations']: print 'Getting heavy-light pairs for %s' % (pairConfigName,) t1 = time.time() pairConfig = paramsDict['Pair Configurations'][pairConfigName] pairs = Analytics.findDeltaPairsClusters(samePeptideClusters, scanFDict, pairConfig['NMod']+pairConfig['CMod'], ppm=options.ppmstd)
def do_training(self): self.train_accs = [] self.valid_accs = [] print("Training") with self.sess.as_default(): start = time.time() for epoch in range(self.epochs): offset = 0 #epoch_loss = 0 while offset < self.train_dataset.shape[0]: X_batch, y_batch = load_minibatch( self.train_dataset, self.train_labels, offset, self.batch_size, ) _, l, summary = self.sess.run( [self.train_op, self.loss, self.summary_op], feed_dict={ self.x: X_batch, self.y: y_batch, self.keep_prob: 0.5, self.phase_train: True }) #epoch_loss += l * X_batch.get_shape().as_list()[0] offset = min(offset + self.batch_size, self.train_dataset.shape[0]) train_pred = self.prediction.eval( feed_dict={ self.x: X_batch, self.y: y_batch, self.keep_prob: 1.0, self.phase_train: False }) train_acc = accuracy(train_pred, y_batch[:, 1:6]) print("Training Accuracy", train_acc, "at Epoch:", epoch) self.train_accs.append([epoch, train_acc]) offset = 0 valid_acc = 0 count = 0 while offset < self.valid_dataset.shape[0]: X_batch, y_batch = load_minibatch(self.valid_dataset, self.valid_labels, offset, self.batch_size) valid_pred = self.prediction.eval( feed_dict={ self.x: X_batch, self.y: y_batch, self.keep_prob: 1.0, self.phase_train: False }) temp_valid_acc = accuracy(valid_pred, y_batch[:, 1:6]) old_offset = offset offset = min(offset + self.batch_size, self.valid_dataset.shape[0]) count += 1 #valid_acc = ((valid_acc * old_offset) + # (temp_valid_acc * self.batch_size)/offset) valid_acc += temp_valid_acc valid_acc = valid_acc / count self.valid_accs.append([epoch, valid_acc]) print("Valididation Accuracy", valid_acc, "at Epoch:", epoch) save_path = self.saver.save(self.sess, self.savepath) self.writer.add_summary(summary, epoch) # Graph of accuracies self.graph.update([x[0] for x in self.train_accs], [y[1] for y in self.train_accs], "b", "Training") self.graph.update([x[0] for x in self.valid_accs], [y[1] for y in self.valid_accs], "r", "Validation") #if epoch == 0: # self.graph.addLegend() print("Evaluating Test Dataset") offset = 0 test_acc = 0 count = 0 while offset < self.test_dataset.shape[0]: X_batch, y_batch = load_minibatch(self.test_dataset, self.test_labels, offset, self.batch_size) test_pred = self.prediction.eval( feed_dict={ self.x: X_batch, self.y: y_batch, self.keep_prob: 1.0, self.phase_train: False }) temp_test_acc = accuracy(test_pred, y_batch[:, 1:6]) old_offset = offset offset = min(offset + self.batch_size, self.test_dataset.shape[0]) count += 1 #test_acc = ((test_acc * old_offset) + # (temp_test_acc * self.batch_size)/offset) test_acc += temp_test_acc test_acc = test_acc / count print("Test accuracy: %.1f%%" % (test_acc)) Analytics.train_time = time.time() - start Analytics.train_accuracy = train_acc Analytics.valid_accuracy = valid_acc Analytics.test_accuracy = test_acc Analytics.display() Analytics.save() plt.show()
def compareSequences(deNovoPep, deNovoUnmodPep, refPep, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = [], epsilon = 0.02): if 'X' in refPep: refPep = refPep.translate(None, 'X') # KLUDGE: REMOVE WHEN REWRITE #deNovoPep = An.stripModifications(deNovoPep, noRemove=['#', '*']) alignment = getAlignment(deNovoUnmodPep, refPep, AAMap, scoreMatrix) alignedIndsMap = getAlignedIndsMap(alignment) disagreeArr = [1 if alignment[0][i] == alignment[1][i] else 0 for i in range(len(alignment[0]))] intervals = getConnectedDisagreementRegions(disagreeArr) try: refPRMLadder = An.getPRMLadder(refPep) except KeyError: return None deNovoPRMLadder = An.getPRMLadder(deNovoPep, ambigEdges=deNovoAmbigEdges) allResolved = True modList = {} newRefEndInds = {'start': 0, 'end': 0} # rough check of whether or not intervals can be easily explained for interval in intervals: deNovoSubSeq = deNovoUnmodPep[alignedIndsMap['De Novo'][interval[0]]:alignedIndsMap['De Novo'][interval[1]]] refSubSeq = refPep[alignedIndsMap['Ref'][interval[0]]:alignedIndsMap['Ref'][interval[1]]] if alignedIndsMap['De Novo'][interval[0]] == 0: term = 'N-term' elif alignedIndsMap['De Novo'][interval[1]] == len(deNovoUnmodPep): term = 'C-term' else: term = None if deNovoSubSeq != '' and refSubSeq != '': deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]] if term == None: refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]] modList[interval] = resolveInterval(refMass, deNovoMass, refSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon), deNovoSubSeq, refSubSeq else: minSizedMod = ((None, None, 10000000,),) for i in range(len(refSubSeq)): if term == 'N-term': refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]] + i] subRefSubSeq = refSubSeq[i:] else: refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]] - i] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]] subRefSubSeq = refSubSeq[:-i] mod = resolveInterval(refMass, deNovoMass, subRefSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon) if 'TX' in deNovoUnmodPep: print deNovoSubSeq, refSubSeq, subRefSubSeq, mod if (abs(minSizedMod[0][2]) > abs(mod[2]) and (minSizedMod[0][0] == None or 'Isobaric' not in minSizedMod[0][0])) or 'Isobaric' in mod[0]: if mod[1] != None or (mod[1] == None and minSizedMod[0][1] == None) or ('Isobaric' in mod[0] and 'Isobaric' not in minSizedMod[0][0]): minSizedMod = mod, deNovoSubSeq, subRefSubSeq if term == 'N-term': newRefEndInds['start'] = i else: newRefEndInds['end'] = -i modList[interval] = minSizedMod else: # Make sure that lack of sequence is due to overhang of reference peptide if alignedIndsMap['De Novo'][interval[1]] == 0: newRefEndInds['start'] = len(refSubSeq) elif alignedIndsMap['De Novo'][interval[0]] == len(deNovoUnmodPep): newRefEndInds['end'] = -len(refSubSeq) # elif term != None: # raise ValueError('Not enough reference sequence provided for resoluton of terminal discrepancies. De Novo: %s, Reference %s' % (deNovoPep, refPep)) elif term == None: if deNovoSubSeq == '': refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]] modList[interval] = ('Deletion', refMass, 0, -refMass), deNovoSubSeq, refSubSeq else: deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]] modList[interval] = ('Insertion', deNovoMass, 0, deNovoMass), deNovoSubSeq, refSubSeq #print 'Mod List: ', modList acc, prec = getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoPep, deNovoUnmodPep, refPep, alignedIndsMap, deNovoAmbigEdges) return modList, newRefEndInds, alignment, acc, prec
def getSpectrumAndPSMFeatureDict(LADSSeqInfo, seqEntry, scanFDict, pairConfig, PNet): featureList = [] lightScans = seqEntry[0] heavyScans = seqEntry[1] lightSpecs = [DataFile.getMassIntPairs(scanFDict[int(lightScanF)]['dta']) for lightScanF in lightScans] heavySpecs = [DataFile.getMassIntPairs(scanFDict[int(heavyScanF)]['dta']) for heavyScanF in heavyScans] avgLightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in lightScans])) epSTD = options.ppmstd * 10**-6 * avgLightPrecMass specs = [] for i, massIntPairs in enumerate(lightSpecs): specs += [PN.Spectrum(PNet, scanFDict[lightScans[i]]['precMass'], Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)] for i, massIntPairs in enumerate(heavySpecs): specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)] for spec in specs: spec.initializeNoiseModel() clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD) GLFD.addClusterPairingStatsToFeatureList(clusterPairingStats, featureList) scoreStats = {} truePMs = {} prmLadders = {} for PSM in LADSSeqInfo[seqEntry]: lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2]) scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2]) prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True) truePMs[PSM[:2]] = prmLadderWithEnds[-1] prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1] PSMList = scoreStats.keys() spectrumOrderedScoreStats, clusterScoreStats = GLFD.compileScoreStats(scoreStats, specs, PSMList) spectrumAndPSMSpecificFeatureDict = {} PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)]) for i, PSM in enumerate(LADSSeqInfo[seqEntry]): PSMSpecificFeatureList = copy.copy(featureList) peptLength = len(prmLadders[PSM[:2]]) + 1 # Add LADS PScore (and normalized variants) and delta rank, delta score (LADS PScore) to feature list PSMSpecificFeatureList += [PSM[0], PSM[0]/peptLength, PSM[0]/len(specs), -i, PSM[0]-LADSSeqInfo[seqEntry][0][0]] # Add Total Path Score (and normalized variants) and delta rank, delta score (total path score) and total minimum node score to feature list totalPathScore = scoreStats[PSM[:2]]['Total Path Score'] PSMSpecificFeatureList += [totalPathScore, totalPathScore/peptLength, totalPathScore/len(specs), -clusterScoreStats['PSM Rankings'][PSMIndexDict[PSM[:2]]], totalPathScore-clusterScoreStats['Max Cluster Path Score'], scoreStats[PSM[:2]]['Total Minimum Node Score']] # Add minimum path score, maximum path score, (and normalized variants) and minimum score/maximum score for cluster to feature list PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Minimum Path Score'], scoreStats[PSM[:2]]['Minimum Path Score']/peptLength, scoreStats[PSM[:2]]['Maximum Path Score'], scoreStats[PSM[:2]]['Maximum Path Score']/peptLength, scoreStats[PSM[:2]]['Minimum Path Score']/scoreStats[PSM[:2]]['Maximum Path Score']] # Add difference between minimum and maximum ranking for PSM across cluster to feature list rankingsForPSM = [spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]] for i in spectrumOrderedScoreStats] PSMSpecificFeatureList += [min(rankingsForPSM) - max(rankingsForPSM)] #Add Number forbidden node pairs (and normalized variants) to feature list numForbiddenPairs = Discriminator.getNumForbiddenPairs(prmLadders[PSM[:2]], avgLightPrecMass) PSMSpecificFeatureList += [numForbiddenPairs, 2.0*numForbiddenPairs/(peptLength-1)] # Add number of ambiguous edges to feature list PSMSpecificFeatureList += [len(PSM[2])] # Add stats for PRM Evidence over cluster (and normalized variants) to feature list PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence']/float(peptLength-1)] # Add stats for paired PRMs and their corresponding ion types to feature list pairedPRMStats = Discriminator.getPairedPRMStats(prmLadders[PSM[:2]], clusterPairingStats['Light Merged Spec'], clusterPairingStats['Heavy Merged Spec'], lightSpecs, heavySpecs, clusterPairingStats['Cluster Paired PRM Information'], epSTD=epSTD) GLFD.addPairedPRMStatsToFeatureList(pairedPRMStats, PSMSpecificFeatureList, len(prmLadders[PSM[:2]])) pairedPRMLadder = pairedPRMStats['Paired PRM Ladder'] for i, scan in enumerate(lightScans): spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList) # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]] numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]] spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]] # Add mass deviation from true peptide mass to feature list precMass = scanFDict[scan]['precMass'] spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)] peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet) GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength) GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList) spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength] spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList for j, scan in enumerate(heavyScans): i = j + len(lightScans) spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList) # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]] numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]] spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]] # Add mass deviation from true peptide mass to feature list precMass = scanFDict[scan]['precMass'] spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + pairConfig['NMod'] + pairConfig['CMod'] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)] peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet) GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength) GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList) spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength] spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList return spectrumAndPSMSpecificFeatureDict
specs = [] for i, massIntPairs in enumerate(lightSpecs): specs += [PN.Spectrum(PNet, scanFDict[lightScans[i]]['precMass'], Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)] for i, massIntPairs in enumerate(heavySpecs): specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)] for spec in specs: spec.initializeNoiseModel() clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD) addClusterPairingStatsToFeatureList(clusterPairingStats, featureList) scoreStats = {} truePMs = {} prmLadders = {} for PSM in LADSSeqInfo[seqEntry]: lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2]) scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2]) prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True) truePMs[PSM[:2]] = prmLadderWithEnds[-1] prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1] PSMList = scoreStats.keys() spectrumOrderedScoreStats, clusterScoreStats = compileScoreStats(scoreStats, specs, PSMList) PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)]) for i, PSM in enumerate(LADSSeqInfo[seqEntry]): PSMSpecificFeatureList = copy.copy(featureList) lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2]) heavySeq = An.preprocessSequence(PSM[1], heavySeqMaps['silac_light_heavy'], replaceExistingTerminalMods=True, ambigEdges=PSM[2])
def getAlignment(deNovoPept, dbPept, AAMap, scoreMatrix, gapOpenPen=-5, gapExtendPen=0): alignment = An.alignSequences(deNovoPept, dbPept, AAMap, scoreMatrix, gapOpenPen, gapExtendPen)[1][0] return alignment
def do_training(self): self.train_accs = [] self.valid_accs = [] print("Training") with self.sess.as_default(): start = time.time() for step in range(self.num_steps): offset = ((step * BATCH_SIZE) % (self.train_labels.shape[0] - BATCH_SIZE)) batch_data = self.train_dataset[offset:(offset + BATCH_SIZE), :, :, :] batch_labels = self.train_labels[offset:(offset + BATCH_SIZE), :] feed_dict = { self.x: batch_data, self.y: batch_labels, self.keep_prob: .9375 } _, l, summary = self.sess.run( [self.train_op, self.loss, self.summary_op], feed_dict=feed_dict) if (step % 500 == 0): print("Minibatch loss at step %d: %f, %f" % (step, l, l2)) train_pred = self.prediction.eval( feed_dict={ self.x: batch_data, self.y: batch_labels, self.keep_prob: 1.0 }) train_acc = accuracy(train_pred, batch_labels[:, 1:6]) self.train_accs.append([step, train_acc]) print("Minibatch accuracy: %.1f%%" % train_acc) valid_pred = self.prediction.eval( feed_dict={ self.x: self.valid_dataset, self.y: self.valid_labels, self.keep_prob: 1.0 }) valid_acc = accuracy(valid_pred, self.valid_labels[:, 1:6]) self.valid_accs.append([step, valid_acc]) print("Validation accuracy: %.1f%%" % valid_acc) save_path = self.saver.save(self.sess, self.savepath) self.writer.add_summary(summary, step) # Graph of accuracies plt.plot([x[0] for x in self.train_accs], [y[1] for y in self.train_accs], 'b', [a[0] for a in self.valid_accs], [b[1] for b in self.valid_accs], 'r') plt.title("Training and Validation Accuracy") test_pred = self.prediction.eval(feed_dict={ self.x: self.test_dataset, self.keep_prob: 1.0 }) test_acc = accuracy(test_pred, self.test_labels[:, 1:6]) print("Test accuracy: %.1f%%" % test_acc) Analytics.train_time = time.time() - start Analytics.train_accuracy = train_acc Analytics.valid_accuracy = valid_acc Analytics.test_accuracy = test_acc Analytics.display() Analytics.save() plt.show()
log ("Request type: {0}.".format(request['type'])) if request['type'] == 'IntentRequest': log ("Intent name: {0}.".format(request['intent']['name'])) ####################################################### # Sensor Reading Request # ####################################################### if request['intent']['name'] == 'SensorStatusIntent': log ("Sensor value: {0}.".format(request['intent']['slots']['Sensor_Selection']['value'])) sensor_data = request['intent']['slots']['Sensor_Selection']['value'] log (sensor_data) if sensor_data in ['temperature', 'pressure']: if sensor_data == 'temperature': tag_name = "raw.temp3.avg" else: tag_name = "raw.pressure.avg" val = Analytics.last_n_values(tag_name,1) if val: response_txt = "The most recent " + sensor_data + " is " + str(int(val[0][tag_name])) log (response_txt) else: response_txt = "I could not find any recent " + sensor_data + " values" else: log ("Error: Not temperature or pressure.") ####################################################### # LED Control Request # ####################################################### elif request['intent']['name'] == 'ledControlIntent' or request['intent']['name'] == 'ledBlinkIntent': log ("Intent value: {0}.".format(request['intent']['slots']['led_Selection']['value'])) led = request['intent']['slots']['led_Selection']['value'] log (led) #
def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, spec, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0): deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True) #print deNovoPRMLadder dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True) startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept) sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength) tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags) maxScore = None maxScoringTag = None #print sorted(tagGraph.nodes(data=True)) #print sorted(tagGraph.edges(data=True)) for tag in nx.topological_sort(tagGraph): if tagGraph.node[tag]['position'] == 'internal': nodeScore = getScoreFromPRMs(spec, deNovoPRMLadder[tag[0][0]:tag[0][1]+1], deNovoTerm = getDeNovoTerm(tag, len(deNovoPept))) else: nodeScore = 0 #print 'Tag', tag for prevTag in tagGraph.predecessors(tag): nModSymbol = None # Define terminus of peptide for modification annotation if tagGraph.node[prevTag]['position'] == 'start': term = 'N-term' elif tagGraph.node[tag]['position'] == 'end': term = 'C-term' else: term = None refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]] deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]] refSubSeq = dbPept[prevTag[1][1]:tag[1][0]] deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]] mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol) modPenalty = 0 modScore = 0 if len(mods) > 0: if 'Isobaric Substitution' == mods[0][0]: modPenalty = isobaricPenalty modScore = getTagScore(spec, refSubSeq, startMass= deNovoPRMLadder[prevTag[0][1]], deNovoTerm = None, addTerminalNodes=False, verbose=True) print modScore, refSubSeq elif 'Insertion' == mods[0][0]: modPenalty = inDelPenalty modScore = getScoreFromPRMs(spec, deNovoPRMLadder[prevTag[0][1]:tag[0][0]+1], deNovoTerm = None, addTerminalNodes=False) elif 'Deletion' == mods[0][0]: modPenalty = inDelPenalty * len(deNovoSubSeq) elif 'Undefined Mass Shift' == mods[0][0]: modPenalty = undefModPenalty modPepts = getModPeptides(mods[0], refSubSeq, term, unimodDict) modScores = [] for pept in modPepts: modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), pept)] modScore, modPept = max(modScores) mods = (mods[0][:-1] + (modPept[0],),) else: modPenalty = defModPenalty modScores = [] for modData in mods: modPepts = getModPeptides(modData, refSubSeq, term, unimodDict) for pept in modPepts: modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), (modData, pept))] modScore, modPept = max(modScores) mods = (modPept[0][:-1] + (modPept[1][0],),) tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modScore + modPenalty tagGraph.edge[prevTag][tag]['mods'] = mods if 'score' not in tagGraph.node[prevTag]: tagGraph.node[prevTag]['score'] = defaultScore try: tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore']) except KeyError: tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore'] if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore: maxScore = tagGraph.node[tag]['score'] maxScoringTag = tag if maxScoringTag != None: return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag) else: return None, None, None
def process_and_visualize(train_folder="data/train", test_folder="data/test", extra_folder="data/extra", display="", single=False): if not (os.path.exists("data/train") or os.path.exists("data/test") or os.path.exists("data/extra")): if not (os.path.exists("data/train.tar.gz") or os.path.exists("data/test.tar.gz") or os.path.exists("data/extra.tar.gz")): # No tar.gz files found, data must be downloaded tr, t, e = download_data() # no folders found, need to extract the tar.gz train_folder, test_folder, extra_folder = extract_data(tr, t, e) ## # Set sequence lengths to so multiple runs do not add to old run totals Analytics.load() Analytics.sequence_lengths = { 'data/train': { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 }, 'data/extra': { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 }, 'data/test': { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 } } Analytics.save() ## # Set the number of datapoints to create based on one image. data_points = 5 if single: data_points = 1 ## # Get the DigitStructs for Training Data. fin = os.path.join(train_folder, 'digitStruct.mat') dsf = DigitStructFile(fin) print("Parsing the training data from the digitStruct.mat file") train_data = dsf.get_all_digit_structure_by_digit() print("Parsed training data") ## # Display training examples. if display != "": print("Displaying Examples with bounding boxes") example_indeces = np.random.randint(0, len(train_data), size=5) examples = train_data[example_indeces] Analytics.load() Analytics.train_samples = examples Analytics.save() for e in examples: display_example(e, train_folder) ## # Preprocess Training data and fetch labels. print("Generating data set and processing data.") train_dataset, train_labels = generate_dataset(train_data, train_folder, single) ## # Display the processed data. if display != "": print("Displaying examples of preprocessed images") examples = train_dataset[example_indeces] labels = train_labels[example_indeces] for e, l in zip(examples, labels): print("The Label for this is:", l) display_processed_example(e) ## #Delete things to free up space. if display != "": del example_indeces del examples del labels del train_data ## # Split the data into training and validation data. (train_dataset, train_labels, valid_dataset, valid_labels) = split(train_dataset, train_labels, TRAIN_SPLIT * data_points) ## # Save the split data to disk. np.save("temp_train_dataset1", train_dataset) np.save("temp_train_labels1", train_labels) np.save("temp_valid_dataset1", valid_dataset) np.save("temp_valid_labels1", valid_labels) ## # Delete data to free up space. del train_dataset del train_labels del valid_dataset del valid_labels ## #Repeat the process for Extra Data. fin = os.path.join(extra_folder, 'digitStruct.mat') dsf = DigitStructFile(fin) print("Parsing the extra data from the digitStruct.mat file") extra_data = dsf.get_all_digit_structure_by_digit() print("Parsed extra data") ## # Preprocess extra data and fetch labels print("Generating data set and processing data.") extra_dataset, extra_labels = generate_dataset(extra_data, extra_folder, single) ## # Delete data to free space. del extra_data ## # Split the data into training and validation sets. (train_dataset, train_labels, valid_dataset, valid_labels) = split(extra_dataset, extra_labels, EXTRA_SPLIT * data_points) ## # Save the split data onto disk. np.save("temp_train_dataset2", train_dataset) np.save("temp_train_labels2", train_labels) np.save("temp_valid_dataset2", valid_dataset) np.save("temp_valid_labels2", valid_labels) ## # Delete datasets to free up space. del train_dataset del train_labels del valid_dataset del valid_labels del extra_dataset del extra_labels ## # Create the Training and Validation sets. print("Creating the Training and Validation sets") td1 = np.load("temp_train_dataset1.npy") td2 = np.load("temp_train_dataset2.npy") train_dataset = np.concatenate((td1, td2), axis=0) del td1 del td2 print(train_dataset.shape) ## # Find the mean and standard deviation of the training set. mean = np.mean(train_dataset) std = np.std(train_dataset) ## # Save the stats about the training set. Analytics.load() Analytics.mean = mean Analytics.std = std Analytics.data_set_size['train'] = train_dataset.shape[0] Analytics.save() ## # Normalize the training data. train_dataset = normalize(train_dataset, mean, std) np.save("data/train_dataset", train_dataset) del train_dataset ## # Create the training labels. tl1 = np.load("temp_train_labels1.npy") tl2 = np.load("temp_train_labels2.npy") train_labels = np.concatenate((tl1, tl2), axis=0) del tl1 del tl2 np.save("data/train_labels", train_labels) del train_labels ## # Create the validation set vd1 = np.load("temp_valid_dataset1.npy") vd2 = np.load("temp_valid_dataset2.npy") valid_dataset = np.concatenate((vd1, vd2), axis=0) del vd1 del vd2 ## # Normalize the validation dataset with the mean and standard deviation # of the training set. valid_dataset = normalize(valid_dataset, mean, std) np.save("data/valid_dataset", valid_dataset) Analytics.load() Analytics.data_set_size['valid'] = valid_dataset.shape[0] Analytics.save() print(valid_dataset.shape) del valid_dataset ## # Create the Validation labels. vl1 = np.load("temp_valid_labels1.npy") vl2 = np.load("temp_valid_labels2.npy") valid_labels = np.concatenate((vl1, vl2), axis=0) del vl1 del vl2 np.save("data/valid_labels", valid_labels) del valid_labels print("Finished creating the sets") #os.remove("temp_train_dataset1.npy") #os.remove("temp_train_dataset2.npy") #os.remove("temp_train_labels1.npy") #os.remove("temp_train_labels2.npy") #os.remove("temp_valid_dataset1.npy") #os.remove("temp_valid_dataset2.npy") #os.remove("temp_valid_labels1.npy") #os.remove("temp_valid_labels2.npy") # Create the Test data set fin = os.path.join(test_folder, 'digitStruct.mat') dsf = DigitStructFile(fin) print("Parsing the test data from the digitStruct.mat file") test_data = dsf.get_all_digit_structure_by_digit() print("Parsed test data") if display != "": example_indeces = np.random.randint(0, len(test_data), size=5) examples = test_data[example_indeces] Analytics.load() Analytics.test_samples = examples Analytics.save() # Preprocess test data and fetch labels print("Generating data set and processing data.") test_dataset, test_labels = generate_dataset(test_data, test_folder, single) ## # Normalize the test dataset with the mean and standard deviation # from the training set. test_dataset = normalize(test_dataset, mean, std) np.save("data/test_dataset", test_dataset) np.save("data/test_labels", test_labels) ## # Delete to free space del test_data del fin del dsf del test_dataset del test_labels ## # Display and save dataset statistics. Analytics.display() Analytics.save()
if not options.dtaDir or not options.model or not options.config: print 'ERROR: missing model, config, or dtaDir' exit(-1) Constants.aminoacids['C'] = (Constants.aminoacids['C'][0], Constants.aminoacids['C'][1], Constants.aminoacids['C'][2] + Constants.mods['Carbamidomethyl'], Constants.aminoacids['C'][3]) Constants.aminoacids['O'] = (Constants.aminoacids['M'][0], Constants.aminoacids['M'][1] + 'O', Constants.aminoacids['M'][2] + Constants.mods['#'], Constants.aminoacids['M'][3]) if options.Cmod == Constants.mods['*']: Constants.aminoacids['X'] = (Constants.aminoacids['K'][0], Constants.aminoacids['K'][1], Constants.aminoacids['K'][2] + Constants.mods['*'], Constants.aminoacids['K'][3]) PNet = PN.ProbNetwork(options.config, options.model) if options.verbose: t1 = time.time() print 'Getting heavy-light pairs' dtaList = glob.glob(options.dtaDir + '/*.dta') (paired, unpaired) = Analytics.getPairedAndUnpairedSpectra(options.dtaDir, dtaList, delta=(options.Nmod + options.Cmod), ppm=options.ppm, cutOff=options.pairCutoff) if options.verbose: t2 = time.time() print 'Finished getting paired spectra. Time taken: ', t2 - t1 print 'Starting Sequencing' aas = Constants.addPepsToAADict(options.minEdge) for pair in paired: (lightSpec, heavySpec) = pair[1:] if options.verbose: print 'Now sequencing %s %s with shared peaks ratio %f' % (lightSpec, heavySpec, pair[0]) s1 = time.time() heavyPath = heavySpec lightPath = lightSpec sharedInfo = DNS.getPairedSpectraInfoForSequencing(lightPath, heavyPath, options.verbose)
with open(options.unimoddict) as fin: unimodDict = pickle.load(fin) hashedUnimodDict = hashUnimodDict(unimodDict) outFile = open(options.output, 'w') cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score'] if 'Ambig Edges' in infoDict: cols.insert(2, 'Ambig Edges') outFile.write('\t'.join([col for col in cols]) + '\n') for entry in DataFile.getScanInfo(options.comp, delimiter='\t'): scanData = {} scanData['ScanF'] = entry[infoDict['ScanF']] scanData['Peptide'] = entry[infoDict['Peptide']] scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'], noRemove=[]) scanData['Score'] = entry[infoDict['Score']] scanData['Alignment Score'] = None if 'Ambig Edges' in infoDict: ambigEdges = eval(entry[infoDict['Ambig Edges']]) scanData['Ambig Edges'] = ambigEdges else: ambigEdges = [] deNovoPRMLadder = An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges) refList = eval(entry[infoDict['References']]) subjSequence = getSequence(options.fasta, refList[0][0])[refList[0][1]-1:refList[0][2]] if scanData['Unmod Peptide'] == subjSequence: scanData['Modifications'] = []
class GameBot: browser = None executable_path = "source/webdriver/chromedriver" logger = Logger.Logger("Logger") raid_analytics = Analytics.Analytics() def __init__(self, url, username, password, lang="en"): self.url = url self.username = username self.password = password self.raid_text = self.get_raid_text(lang) def start(self): self.browser = webdriver.Chrome(executable_path=self.executable_path) self.browser.get(self.url) sleep(1) def login(self): self.random_sleep(0, 300) self.browser.find_element_by_name("name").send_keys(self.username) self.browser.find_element_by_name("password").send_keys(self.password) self.browser.find_element_by_name("s1").click() # Submit form self.logger.add_line("Logging into: " + self.username) sleep(1) def enter_top_players(self): self.browser.get(self.url + "statistiken.php?id=0&idSub=3") self.logger.add_line("entering top players") sleep(1) def grab_raider_table(self): # raiders table table = self.browser.find_element_by_id("top10_raiders") # take all table contents rows = table.find_elements_by_tag_name("tr") # get the last col (where is the current user's info col = rows[-1].find_elements_by_tag_name("td") # submit it in database self.raid_analytics.add_info(col) ''' for row in rows: col = row.find_elements_by_tag_name("td") self.raid_analytics.add_info(col) ''' def record_raider_rank(self): self.enter_top_players() self.grab_raider_table() def enter_village(self): self.browser.get(self.url + "dorf2.php") self.logger.add_line("entering village") # do something sleep(1) def enter_rally_point(self): self.browser.find_element_by_class_name("g16").click() self.logger.add_line("entering rally point") # do something sleep(1) def enter_farm_list(self): self.browser.find_element_by_class_name("favorKey99").click() self.logger.add_line("entering farm list") sleep(1) def send_farm_list(self, index): self.random_sleep(1, 60) checkboxes = self.browser.find_elements_by_xpath("//input[contains(@class, 'markAll') " "and contains(@class, 'check')]") if len(checkboxes) <= index: self.logger.add_line("Index is out of checkboxes bounds: " + str(len(checkboxes))) return checkboxes[index].send_keys(Keys.SPACE) buttons = self.browser.find_elements_by_xpath("//button[contains(text(),'" + self.raid_text + "')]") if len(buttons) <= index: self.logger.add_line("Index is out of button bounds: " + str(len(buttons))) return buttons[index].click() self.logger.add_line("Sent attack on index: " + str(index)) sleep(2) def send_attacks(self, array): self.enter_village() self.random_sleep(0, 600) self.enter_rally_point() self.enter_farm_list() for i in array: self.send_farm_list(int(i)) def submit(self): self.logger.submit() def submit_error(self): self.logger.add_line("There was an exception during runtime") self.logger.submit() def random_sleep(self, min_num, max_num): if not release: return sleep_time = random.randint(min_num, max_num) self.logger.add_line("sleeping for extra " + str(sleep_time) + " seconds") sleep(sleep_time) @staticmethod def get_raid_text(lang): if lang == 'en': return "Start raid" elif lang == "he": return "שלח בזיזה"
def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0): deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True) #print deNovoPRMLadder print 'De Novo', deNovoPept print 'DB', dbPept dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True) startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept) sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength) tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags) maxScore = None maxScoringTag = None #print sorted(tagGraph.nodes(data=True)) #print sorted(tagGraph.edges(data=True)) for tag in nx.topological_sort(tagGraph): nodeScore = tag[0][1] - tag[0][0] #print 'Tag', tag for prevTag in tagGraph.predecessors(tag): nModSymbol = None # Define terminus of peptide for modification annotation if tagGraph.node[prevTag]['position'] == 'start': term = 'N-term' elif tagGraph.node[tag]['position'] == 'end': term = 'C-term' else: term = None refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]] deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]] refSubSeq = dbPept[prevTag[1][1]:tag[1][0]] deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]] mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol) modPenalty = defModPenalty for mod in mods: if 'Isobaric Substitution' == mod[0]: modPenalty = isobaricPenalty elif 'Insertion' == mod[0] or 'Deletion' == mod[0]: modPenalty = inDelPenalty elif 'Undefined Mass Shift' == mod[0]: modPenalty = undefModPenalty if not mods: modPenalty = 0 tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modPenalty tagGraph.edge[prevTag][tag]['mods'] = mods print prevTag, tag, deNovoSubSeq, refSubSeq, mods if 'score' not in tagGraph.node[prevTag]: tagGraph.node[prevTag]['score'] = defaultScore try: tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + nodeScore + modPenalty) except KeyError: tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + nodeScore + modPenalty if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore: maxScore = tagGraph.node[tag]['score'] maxScoringTag = tag if maxScoringTag != None: return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag) else: return None, None, None
def get_line(): global AGENT return Analytics.get_fitted_line(AGENT)
with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict) if hasattr(options, 'number'): minNumScans = int(options.number) else: minNumScans = 1 processedInfo = {} if options.lads: LADSdict = eval(options.lads) for tdvfile in LADSdict.keys(): LADSScanInfo = DataFile.getScanInfo(tdvfile, dbDict['LADS']['fields'], delimiter='\t') processedInfo[LADSdict[tdvfile]] = An.preprocessLADSScanInfo(LADSScanInfo, seqMap[LADSdict[tdvfile]], paramsDict['LADS Parameters']['pair configurations'], dbDict['LADS']['fieldmap']) if options.mascot: MASCOTdict = eval(options.mascot) processedInfo.update(parseDBScans(MASCOTdict, 'MASCOT', seqMap, dbDict)) if options.sequest: SEQUESTdict = eval(options.sequest) processedInfo.update(parseDBScans(SEQUESTdict, 'SEQUEST', seqMap, dbDict)) cols = ['ScanF'] progNames = processedInfo.keys() cols.extend([val for val in dbDict[progDict[progNames[0]]]['cols']]) outFile = open(options.output, 'w') outFile.write(','.join([col for col in cols]) + '\n')
def get_stats(): global AGENT return Analytics.get_stats(AGENT)
outFile.write('\t'.join([col for col in cols]) + '\n') for seqEntry in LADSSeqInfo: lightScans = seqEntry[0] heavyScans = seqEntry[1] scanScoreDict = getScanScoreDictSVM(LADSSeqInfo, seqEntry, scanFDict, svmModel, svmRange, pairConfigurations[pairConfigName], PNet, desired_feats = desired_feats) # scanScoreDict = getScanScoreDictRankBoost(LADSSeqInfo, seqEntry, scanFDict, rankModel, pairConfigurations['lightdimethyl_heavydimethyl'], PNet) # scanScoreDict = getScanScoreDictClusterNormScore(LADSSeqInfo, seqEntry) for i, scan in enumerate(lightScans): scanData = {'ScanF': scan} lightSeq = An.preprocessSequence(scanScoreDict[scan]['Seq'][0], seqMap, ambigEdges=scanScoreDict[scan]['Seq'][1]) scanData['LADS Sequence'] = lightSeq scanData['LADS Ambig Edges'] = scanScoreDict[scan]['Seq'][1] scanData['LADS Raw Score'] = scanScoreDict[scan]['Raw Score'] scanData['LADS Post Score'] = scanScoreDict[scan]['Post Score'] scanData['M+H'] = scanFDict[scan]['precMass'] try: comp = An.comparePeptideResults(lightSeq, SEQUESTMASCOTResults[scan]['Peptide'], ambigEdges1=scanScoreDict[scan]['Seq'][1], ambigEdges2=[], ppm=20) scanData['SEQUEST XCorr'] = SEQUESTMASCOTResults[scan]['SEQUEST XCorr'] scanData['MASCOT Ion Score'] = SEQUESTMASCOTResults[scan]['MASCOT Ion Score'] scanData['SEQUEST MASCOT Sequence'] = SEQUESTMASCOTResults[scan]['Peptide'] scanData['Accuracy'] = comp[0] scanData['Precision'] = comp[1] except KeyError: scanData['SEQUEST XCorr'] = None
def main(): Analytics.load() #Analytics.save() if not (os.path.exists("train_dataset.npy") or os.path.exists("train_labels.npy") or os.path.exists("valid_dataset.npy") or os.path.exists("valid_labels.npy") or os.path.exists("test_dataset.npy") or os.path.exists("test_labels.npy")): print("Data does not exist, downloading now.") tr, t, e = download_data() train_folder, test_folder, extra_folder = extract_data(tr, t, e) # create my network object and the Tensorflow graph for it net = Network.Network() quit = False # Loop options because Tensorflow takes so long to import while quit == False: print("1. Process The Datasets") print("2. Train the model") print("3. Display Analytics") print("4. Example Use") print("5. Use the model") print("6. Quit") user_input = get_user_input("Select a number: ", "Please enter a number") if user_input == 1: print("Press enter to just process the data") user_input = raw_input("Press y to visualize it also: ") process_and_visualize(display=user_input) elif user_input == 2: user_input = get_user_input("How many training steps? ", "Please enter a number") net.set_num_steps(int(user_input)) # Load in the data from the .npy files net.load_data() net.do_training() elif user_input == 3: Analytics.display() elif user_input == 4: try: net.load() except ValueError: print("Failed to load model from ", net.savepath) print("Did you train your model yet?") exit() example_plot(net) elif user_input == 5: # Restore graph from the savepath file try: net.load() except ValueError: print("Failed to load model from ", net.savepath) print("Did you train your model yet?") exit() print("1. Use a Camera.") print("2. Use an image file.") user_input = get_user_input("Select a number: ", "Please enter a number") if int(user_input) == 1: use_camera(net) else: predict_image(net) elif user_input == 6: quit = True
args.extend(['--lads', '\"' + str(ladsDict) + '\"']) else: args.extend(['--lads', '\"' + str(ladsDict) + '\"']) executeProcess(interpreter, 'CompareSearches.py', args, outBase) infoMap = DataFile.getDBInfo(options.database, key='infoMap') if progDict[options.mainprogname] == 'LADS': getPairStats = True mainProgFields = ['PScore', 'Num Ambig Edges'] else: getPairStats = False mainProgFields = [infoMap[progDict[options.mainprogname]]['Score']] if options.denovoscript: stats = Analytics.getCompStats(getOutputName(outBase, 'CompareSearches.py', '.tdv'), unitTestName, progDict, infoMap, paramsDict) elif options.mainprogname: unitTestName = options.mainprogname if not options.comp: stats = Analytics.getCompStats(getOutputName(outBase, 'CompareSearches.py', '.tdv'), options.mainprogname, progDict, infoMap, paramsDict, getPairStats=getPairStats, mainProgFields=mainProgFields) else: stats = Analytics.getCompStats(options.comp, options.mainprogname, progDict, infoMap, paramsDict, getPairStats=getPairStats, mainProgFields=mainProgFields) outFile = open(options.output, 'w') outFile.write('\nOverall Comparison Statistics\n') writeCategoryInfo(stats, outFile, ['composite'], name='Composite') outFile.write('\nTrue Pairs\n') if getPairStats: for pairConfigName in paramsDict['Pair Configurations']: writeCategoryInfo(stats, outFile, ['truepairs', pairConfigName], name='%s True Pairs' % (pairConfigName,))
def process_and_visualize(train_folder="train", test_folder="test", extra_folder="extra", display=""): if not (os.path.exists("train") or os.path.exists("test") or os.path.exists("extra")): if not (os.path.exists("train.tar.gz") or os.path.exists("test.tar.gz") or os.path.exists("extra.tar.gz")): # No tar.gz files found, data must be downloaded tr, t, e = download_data() # no folders found, need to extract the tar.gz train_folder, test_folder, extra_folder = extract_data(tr, t, e) # Set sequence lengths to 0 so multiple runs do not add to old run totals Analytics.load() Analytics.sequence_lengths = { 'train': { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 }, 'extra': { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 }, 'test': { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 } } Analytics.save() # Get the DigitStructs for Training Data fin = os.path.join(train_folder, 'digitStruct.mat') dsf = DigitStructFile(fin) print("Parsing the training data from the digitStruct.mat file") train_data = dsf.get_all_digit_structure_by_digit() print("Parsed training data") if display != "": print("Displaying Examples with bounding boxes") example_indeces = np.random.randint(0, len(train_data), size=5) examples = train_data[example_indeces] Analytics.load() Analytics.train_samples = examples Analytics.save() for e in examples: display_example(e, train_folder) # Preprocess Training data and fetch labels print("Generating data set and processing data.") train_dataset, train_labels = generate_dataset(train_data, train_folder) if display != "": print("Displaying examples of preprocessed images") examples = train_dataset[example_indeces] labels = train_labels[example_indeces] for e, l in zip(examples, labels): print("The Label for this is:", l) display_processed_example(e) #Delete things to free up space if display != "": del example_indeces del examples del labels del train_data #Repeat for Extra Data fin = os.path.join(extra_folder, 'digitStruct.mat') dsf = DigitStructFile(fin) print("Parsing the extra data from the digitStruct.mat file") extra_data = dsf.get_all_digit_structure_by_digit() print("Parsed extra data") # Preprocess extra data and fetch labels print("Generating data set and processing data.") extra_dataset, extra_labels = generate_dataset(extra_data, extra_folder) # Delete to free space del extra_data # Create the Training and Validation sets print("Creating the Training and Validation sets") train_dataset, train_labels, valid_dataset, valid_labels = split( train_dataset, train_labels, extra_dataset, extra_labels) print("Finished creating the sets") # Delete to free space del extra_dataset del extra_labels # Create the Test data set fin = os.path.join(test_folder, 'digitStruct.mat') dsf = DigitStructFile(fin) print("Parsing the test data from the digitStruct.mat file") test_data = dsf.get_all_digit_structure_by_digit() print("Parsed test data") if display != "": example_indeces = np.random.randint(0, len(test_data), size=5) examples = test_data[example_indeces] Analytics.load() Analytics.test_samples = examples Analytics.save() # Preprocess test data and fetch labels print("Generating data set and processing data.") test_dataset, test_labels = generate_dataset(test_data, test_folder) # Delete to free space del test_data del fin del dsf #Save Data to files save(train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels) del train_dataset del train_labels del valid_dataset del valid_labels del test_dataset del test_labels Analytics.display() Analytics.save()
def get_reward_data(): global AGENT return Analytics.get_reward_data(AGENT)
sys.exit(-1) progName = processedInfo.keys()[0] outFile.write( "Scan information fetched. Total number of scans: %i. Number of scans considered for validation: %i" % (len(scanFDict), len(processedInfo[progName])) ) progPairs = {} for pairConfigName in paramsDict["Pair Configurations"]: progPairs[pairConfigName] = An.findPairsInSearchResults( processedInfo[progName], dbDict["infoMap"], progDict, paramsDict["Pair Configurations"][pairConfigName], progName=progName, isComp=False, ppm=options.ppmstd, ) pairs = {} times = {} t1 = time.time() print "Getting Clusters" clusterSVMModel = svmutil.svm_load_model(paramsDict["Cluster Configuration"]["model"]) clusterSVMRanges = svmutil.load_ranges( os.path.splitext((paramsDict["Cluster Configuration"]["model"]))[0] + ".range" )
O.calcula_energia_poblacion() O.fitness() if resp == 's' or resp == 'S': V.array_elite = O.elite() O.crossover() O.mutacion() V.array_poblacion = O.ruleta() if resp == 's' or resp == 'S': for eli in range(len(V.array_elite)): V.array_poblacion.append(V.array_elite[eli]) O.calcula_energia_poblacion() #A.mayor_menor_promedio(cor) A.asigna_mvp(cor) #V.array_poblacion = np.random.permutation(V.array_poblacion).tolist() """for f in range(P.filas): print(V.cromosoma_mvp[0][f]) for f in range(P.filas): print(V.cromosoma_mvp[2][f]) print(round(V.cromosoma_mvp[1], 2)) print(V.cromosoma_mvp[3])""" #A.mostrar_grafica(grafica) A.mostrar_grilla()
SEQUESTdict = eval(options.sequest) processedInfo.update(CS.parseScans(SEQUESTdict, 'SEQUEST', seqMap, dbDict)) if options.combined: combinedDict = eval(options.combined) processedInfo.update(CS.parseScans(combinedDict, 'Combined', seqMap, dbDict, delimiter='\t', seqDelimLen=0)) if len(processedInfo.keys()) > 1: print 'ERROR: Can only compare results to one database search output. Exiting...' sys.exit(-1) progName = processedInfo.keys()[0] dtaList = glob.glob(options.dtadir + '/*.dta') scanFDict = getScanFDict(dtaList) precMassClusters = An.findSamePrecMassClusters(dtaList, ppm=options.ppmstd) clusterOut = open(options.output + '_cluster.txt', 'w') for cluster in precMassClusters: if len(cluster) == 1: continue specs = [] for scanF in cluster: specs += [DataFile.getMassIntPairs(scanFDict[scanF]['dta'])] for i in range(len(cluster)): for j in range(i+1, len(cluster)): if cluster[i] in processedInfo[progName] and cluster[j] in processedInfo[progName]: