def writeTarquin(self, outpath): #Tarquindir = outpath + '\\' + 'Tarquin_files' outpath = Path(outpath) Tarquindir = outpath / 'Tarquin_files' if os.path.isdir(Tarquindir) == False: os.chdir(outpath) os.mkdir('Tarquin_files') name = self.filename[(self.filename.rfind('\\') + 1):].translate( str.maketrans('', '', r'.')) #file_path = Tarquindir + '\\' + self.dirpass + '__' + name + 'proc_Tarquin' file_path = Path(Tarquindir, name + 'proc_Tarquin') print(file_path) Spec_temp = self.SpecData counter = 0 #Need complex conj for proper display, hence -imag for b in range(0, old_div(self.Frames, 2)): for a in range(0, self.Datapoints): Spec_temp[counter] = self.Kspacewrite[b][a].real counter = counter + 1 Spec_temp[counter] = -self.Kspacewrite[b][a].imag counter = counter + 1 self.ds[0x5600, 0x0020].value = Spec_temp self.ds.save_as(str(file_path.resolve()))
def performOpen(self, options={}): """Perform the operation of opening the instrument connection""" # calling the generic VISA open to make sure we have a connection VISA_Driver.performOpen(self, options=options) #Detect options: (vector) magnet and swicth heater detectedOptions = [] table = str.maketrans(dict.fromkeys(string.ascii_letters + '/')) rate = self.askAndLog('READ:SYS:VRM:RFMX').strip().rsplit( ':', 1)[1][1:-1].translate(table).split() if float(rate[0]) > 0: detectedOptions.append("x magnet") if float(rate[1]) > 0: detectedOptions.append("y magnet") if float(rate[2]) > 0: detectedOptions.append("z magnet") heater = self.askAndLog('READ:SYS:VRM:SWHT').strip().rsplit( ':', 1)[1][1:-1].split() if heater[0] != "NOSW" or heater[1] != "NOSW" or heater[2] != "NOSW": detectedOptions.append("switch heater") self.instrCfg.setInstalledOptions(detectedOptions) # Make sure that the coordinate system matches the device coordFunc = self.instrCfg.getQuantity('CoordSys') v = self.performGetValue(coordFunc) coordFunc.setValue(v)
def translator(*args): trans = str.maketrans(*args) def _(text): return text.translate(trans) return _
def translate_nt_to_RY(seq): """Translates nucleotides to RY (A,G -> R; C,U,T -> Y). >>> translate_nt_to_RY("ACGUTACGUT") RYRYYRYRYY """ trans_table = str.maketrans("AGCUT", "RRYYY") trans_seq = seq.translate(trans_table) logging.debug(seq + " -> " + trans_seq) return trans_seq
def factory(name, value, paramTypes=None, **kwargs): """ Generates a new Parameter type derived from one of the predefined base classes choosen by the supplied value: Providing a string value results in a type derived from ParameterBase, providing an integer value produces a ParameterNumerical type and a float value results in a ParameterFloat type. Alternatively, a class type cls can be provided which is used as base class for the resulting Parameter class type. Make sure in this case, all attributes mandatory for this base type are provided too. - *name*: short name of the new parameter without spaces - *value*: default value from which the type is derived if cls is not given Optional arguments: - *paramTypes*: tuple of available parameter types instead of the default - *cls*: forces a certain Parameter type. - *description*: Updates the __doc__ attribute. May be displayed in the UI somewhere. """ kwargs.update(name=name, value=value) name = kwargs.get("name", None) assertName(name, ParameterNameError) value = kwargs.get("value", None) cls = kwargs.pop("cls", None) # remove 'cls' keyword before forwarding if paramTypes is None: paramTypes = (ParameterBoolean, ParameterFloat, ParameterNumerical, ParameterBase) if not (cls is not None and ((isinstance(cls, super) and issubclass(cls.__thisclass__, ParameterBase)) or issubclass(cls, ParameterBase))): for cls in paramTypes[:-1]: if cls.isDataType(value): break else: cls = paramTypes[-1] # ParameterBase usually # embed description as class documentation clsdict = dict() description = kwargs.get("description", None) if isString(description) and len(description) > 0: clsdict['__doc__'] = description # create a new class/type with given name and base class # translate works different for unicode strings: typeName = (str(name.title()).translate(str.maketrans("", "", ' \t\n\r')) + "Parameter") NewType = None try: NewType = type(typeName, (cls, ), clsdict) except TypeError: # Python 2: type() argument 1 must be string, not unicode NewType = type(typeName.encode('ascii', 'ignore'), (cls, ), clsdict) # set up the new class before return return NewType.setAttributes(**kwargs)
def _parse_words(self, text, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', lower=True, split=' '): if lower: text = text.lower() translate_map = str.maketrans(filters, split * len(filters)) text = text.translate(translate_map) seq = text.split(split) return [i for i in seq if i]
def _save_url(self, url_data, content, url_text, url_pos): """Saves url. Converts url to 1-line text and url position as offset from the file beginning to (line, column). :param url_data: object for url storing :param content: file content :param url_text: url text :param url_pos: url position from the beginning """ line = content.count('\n', 0, url_pos) + 1 column = url_pos - content.rfind('\n', 0, url_pos) url_data.add_url(url_text.translate(str_text.maketrans("", "", '\n ')), line=line, column=column)
def translateFastaAlphabet(source, mapping): out = [] # create translation table to be used by str.translate() s_from = ''.join(list(mapping.keys())) s_to = ''.join(list(mapping.values())) assert (len(s_from) == len(s_to)) trans = str.maketrans(s_from, s_to) for orig in source: translatedSeq = str(orig.seq).translate(trans) out.append( SeqRecord(Seq(translatedSeq), id=orig.id, name=orig.name, description=orig.description)) #out.append( SeqRecord( Seq(translatedSeq), id=orig.id, description="REMOVE" ) ) outname = "/tmp/test.fna" SeqIO.write(out, outname, "fasta") return outname
class keep_chars(object): """Returns a filter object o(s): call to return a filtered string. Specifically, strips out everything in s that is not in keep. This filter is case sensitive by default. """ allchars = str.maketrans('', '') def __init__(self, keep, case_sens=True): """Returns a new keep_chars object, based on string keep""" if not case_sens: low = keep.lower() up = keep.upper() keep = low + up self.delchars = ''.join([c for c in self.allchars if c not in keep]) def __call__(self, s, a=None, d=None): """f(s) -> s, translates using self.allchars and self.delchars""" if a is None: a = self.allchars if d is None: d = self.delchars return s.translate(a, d)
def setTargetField(self, axis, value, sCmd): #Vector results depend on the coordinate system coordFunc = self.instrCfg.getQuantity('CoordSys') if self.Bchanged == False: self.askAndLog('SET:SYS:VRM:ACTN:HOLD') self.waitForIdle() self.performSetValue(coordFunc, coordFunc.getValue()) self.performGetValue(coordFunc) vectValue = self.askAndLog("READ:SYS:VRM:VSET").strip() table = str.maketrans(dict.fromkeys(string.ascii_letters)) a, b, c = vectValue.rsplit(':', 1)[1][1:-1].translate(table).split() if coordFunc.getValue() == 'Cartesian': if axis == 'Bx': a = value elif axis == 'By': b = value elif axis == 'Bz': c = value elif coordFunc.getValue() == 'Cylindrical': a = self.instrCfg.getQuantity('Brho').getValue() b = self.instrCfg.getQuantity('Btheta').getValue() c = self.instrCfg.getQuantity('Bz').getValue() if axis == 'Brho': a = value elif axis == 'Btheta': b = value elif axis == 'Bz': c = value elif coordFunc.getValue() == 'Spherical': a = self.instrCfg.getQuantity('Br').getValue() b = self.instrCfg.getQuantity('Btheta').getValue() c = self.instrCfg.getQuantity('Bphi').getValue() if axis == 'Br': a = value elif axis == 'Btheta': b = value elif axis == 'Bphi': c = value sMsg = sCmd.replace('<*>', str(a) + " " + str(b) + " " + str(c)) self.askAndLog(sMsg)
def writelogfile(self, outpath, version): outpath = Path(outpath) #Logdir = outpath + '\\' + 'Log_files' Logdir = outpath / 'Log_files' if os.path.isdir(Logdir) == False: os.chdir(outpath) os.mkdir('Log_files') if self.Frames == 1: frames = 1 else: frames = old_div( self.Frames, 2) #self.Frames / 2 because NWS data also stored in Dicom file name = self.filename[(self.filename.rfind('\\') + 1):].translate( str.maketrans('', '', r'.')) #file_path = Logdir + '\\' + self.dirpass + '__' + name + 'log_file.txt' file_path = Path(Logdir, name + 'log_file.txt') #self.text_file = open(file_path, 'w') self.text_file = open(str(file_path.resolve()), 'w') # Write Log File self.text_file.write('Tarquin Pre-processing Log file\n\n') print('Filename: %s\n' % (file_path), file=self.text_file) print('Version: %s\n' % (version), file=self.text_file) for cnt in range(0, frames): print('Frame: %i' % (cnt), file=self.text_file) print('Include: %i' % (self.IncludeFrame[cnt]), file=self.text_file) print('Phasing: %i' % (self.optphasearr[cnt]), file=self.text_file) intostr = 'Peak positions: ' + str(self.peakposarr[cnt]) self.text_file.write(intostr + '\n\n') self.text_file.close() print('Log file written')
def text_to_word_sequence(txt, filters=string.punctuation + '\n\t', lower=True, rmSingleChar=True, split=" ", maxLength=None): """Converts a text to a sequence of words (or tokens). Args: txt (str): Input text (string). filters (str,optional): Sequence of characters to filter out. lower (bool,optional): Whether to convert the input to lowercase. rmSingleChar (bool,optional): Whether to remove words with a single letter. split (bool,optional): Sentence split marker (string). maxLength (int,optional): max length of a text. Drops the rest. Returns: A list of words (or tokens). """ maxLen = float("inf") if maxLength is None else maxLength if lower: txt = txt.lower() if sys.version_info < (3, ) and isinstance(text, unicode): translate_map = dict((ord(c), unicode(split)) for c in filters) else: translate_map = str.maketrans(filters, split * len(filters)) txt = txt.translate(translate_map) for i, el in enumerate(txt.split(split)): if rmSingleChar and len(el) == 1: continue if i >= maxLen: break if el: yield el
def escape(str): if isinstance(str, list): return [escape(i) for i in str] else: return str.translate(str.maketrans({"\\": r"\\"}))
def performGetValue(self, quant, options={}): """Perform the Get Value instrument operation""" #on first call clear B-result buffer if self.isFirstCall(options): self.Bresult = [] # check type of quantity if quant.name in ('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9', 'T10', 'T11', 'T12', 'T13'): # temperatures, get value strings sAns = self.askAndLog(quant.get_cmd).strip() # convert string to float by taking everything after last colon, ignoring final 'K' value = float(sAns.rsplit(':', 1)[1][:-1]) elif quant.name in ('ControlLoop'): for i in range(1, 14): pass sAns = self.askAndLog(quant.get_cmd.replace('<c>', str(i))).strip() sAns = sAns.rsplit(':', 1)[1] if sAns != "NOT_FOUND": value = (sAns == "ON") break elif quant.name in ('TSet'): for i in range(1, 14): sAns = self.askAndLog(quant.get_cmd.replace('<c>', str(i))).strip() sAns = sAns.rsplit(':', 1)[1] if sAns != "NOT_FOUND": value = float(sAns[:-1]) break elif quant.name in ('HeaterRange'): for i in range(1, 14): sAns = self.askAndLog(quant.get_cmd.replace('<c>', str(i))).strip() sAns = sAns.rsplit(':', 1)[1] if sAns != "NOT_FOUND": value = quant.getValueFromCmdString(sAns[:-2]) break elif quant.name in ('PoC'): sAns = self.askAndLog(quant.get_cmd).strip() value = (sAns.rsplit(':', 1)[1] == "ON") elif quant.name in ('CoordSys'): sAns = self.askAndLog(quant.get_cmd).strip() value = quant.getValueFromCmdString(sAns.rsplit(':', 1)[1]) elif quant.name in ('Bx', 'By', 'Bz', 'Br', 'Brho', 'Bphi', 'Btheta'): coordFunc = self.instrCfg.getQuantity('CoordSys') if not self.Bresult: vectValue = self.askAndLog(quant.get_cmd).strip() table = str.maketrans(dict.fromkeys(string.ascii_letters)) self.Bresult = vectValue.rsplit( ':', 1)[1][1:-1].translate(table).split() #Vector results depend on the coordinate system value = float('nan') if coordFunc.getValue() == 'Cartesian': if quant.name == 'Bx': return float(self.Bresult[0]) elif quant.name == 'By': return float(self.Bresult[1]) elif quant.name == 'Bz': return float(self.Bresult[2]) elif coordFunc.getValue() == 'Cylindrical': if quant.name == 'Brho': return float(self.Bresult[0]) elif quant.name == 'Btheta': return float(self.Bresult[1]) elif quant.name == 'Bz': return float(self.Bresult[2]) elif coordFunc.getValue() == 'Spherical': if quant.name == 'Br': return float(self.Bresult[0]) elif quant.name == 'Btheta': return float(self.Bresult[1]) elif quant.name == 'Bphi': return float(self.Bresult[2]) else: # for all other cases, call VISA driver cmd = quant.get_cmd if (cmd is not None) and (cmd != ''): value = self.askAndLog(cmd).strip().rsplit(':', 1)[1] else: value = quant.getValue() return value
def setName(cls, name): """Changing the name is allowed for the class/type only, not for instances.""" assertName(name, ParameterNameError) safename = str(name).translate(str.maketrans("", "", ' \t\n\r')) cls._name = safename
import dedupe.levenshtein as levenshtein words = re.compile(r"[\w']+").findall integers = re.compile(r"\d+").findall start_word = re.compile(r"^([\w']+)").match start_integer = re.compile(r"^(\d+)").match alpha_numeric = re.compile(r"(?=.*\d)[a-zA-Z\d]+").findall if sys.version < '3': PUNCTUATION = string.punctuation def strip_punc(s): s = s.encode('utf-8').translate(None, PUNCTUATION) return s.decode('utf-8') else: PUNCTABLE = str.maketrans("", "", string.punctuation) def strip_punc(s): return s.translate(PUNCTABLE) class Predicate(object): def __iter__(self): yield self def __repr__(self): return "%s: %s" % (self.type, self.__name__) def __hash__(self): try: return self._cached_hash
# from __future__ import division from __future__ import print_function from builtins import str from builtins import range from builtins import object from past.utils import old_div import sys import string import random import os import re import pysam tt = str.maketrans("ACTGactg","TGACtgac") class fastaWriter(object): def __init__(self,filename,linelen=60): # print(filename, file=sys.stderr) self.f=open(filename,"w") # print(self.f, file=sys.stderr) self.linelen=linelen self.buff="" self.x=0 self.name="" def write(self,s): self.buff+=s self.x+=len(s) # print(len(self.buff),self.linelen, file=sys.stderr)
else: timerForPreFolding = DummyTimer() timerForFolding = DummyTimer() timerForPostFolding = DummyTimer() def parseOption(possibleValues, name): def checkOption(value): if value in possibleValues: return value else: raise argparse.ArgumentTypeError("Unknown %s '%s', allowed values: %s" % (name, value, ",".join(possibleValues))) return checkOption translateAllDeterminedNucleotides = str.maketrans("acgtACGT", "&&&&&&&&") translateGCNucleotides = str.maketrans("gcGC", "&&&&") translatePurineNucleotides = str.maketrans("agAG", "&&&&") def calcWindowGCContent(seq:str) -> float: allCount = seq.translate( translateAllDeterminedNucleotides ).count('&') if allCount==0: return nan gcCount = seq.translate( translateGCNucleotides ).count('&') return gcCount/allCount def calcWindowPurineContent(seq:str) -> float: allCount = seq.translate( translateAllDeterminedNucleotides ).count('&') if allCount==0: return nan
import uuid from builtins import (ascii, bytes, chr, dict, filter, hex, input, int, map, next, oct, open, pow, range, round, str, super, zip) from itertools import chain, combinations, repeat from operator import itemgetter from warnings import warn import numpy as np import pandas as pd from IPython.core.display import display from past.utils import old_div import editdistance # print(string.punctuation, type(unicode(string.punctuation,'utf-8'))) TRAN_TBL = str.maketrans(str(string.punctuation), u' ' * len(string.punctuation)) def make_good_label(x_value): """Return something that is a better label. Arguments: x_value {string} -- or something that can be converted to a string """ # if isinstance(x_value, str): # x_value = x_value.encode('ascii', 'ignore') return '_'.join(str(x_value).translate(TRAN_TBL).split()).lower() def mash(dframe, flds=None, keep_zeros=False): """Returns df of non-null and non-zero on flds
# # from __future__ import division from __future__ import print_function from builtins import str from builtins import range from builtins import object from past.utils import old_div import sys import string import random import os import re import pysam tt = str.maketrans("ACTGactg", "TGACtgac") class fastaWriter(object): def __init__(self, filename, linelen=60): # print(filename, file=sys.stderr) self.f = open(filename, "w") # print(self.f, file=sys.stderr) self.linelen = linelen self.buff = "" self.x = 0 self.name = "" def write(self, s): self.buff += s self.x += len(s)
''' Updated 19DEC2016 @author: Matt Brewer @organization: University of Bristol @contact: [email protected] @summary: Module to handle reading of FASTA files ''' from __future__ import print_function from builtins import str, range import re from collections import deque TRANS_TABLE = str.maketrans('ATCG', 'TAGC') RNA_TRANS_TABLE = str.maketrans('AUCG', 'UAGC') class Codons(object): '''codon list''' # TODO: implement different DNA translation tables def __init__(self): self.codons = { 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
def fitTarquin(self, outpath): nameinit = self.PatName dialog = PatNameDialog(nameinit) if dialog.exec_(): name = dialog.name.text() try: self.PatName = name except: self.PatName = nameinit outpath = Path(outpath) #Tarquindir = outpath + '\\' + 'Tarquin_files' Tarquindir = outpath / 'Tarquin_files' name = self.filename[(self.filename.rfind('\\') + 1):].translate( str.maketrans('', '', r'.')) filename = name + 'proc_Tarquin' #file_path = Tarquindir + '\\' + filename file_path = str(Path(Tarquindir, filename).resolve()) #Tarquinfitdir = Tarquindir + '\\' + 'Tarquin_fit' Tarquinfitdir = Tarquindir / 'Tarquin_fit' # if os.path.isdir(Tarquinfitdir) == False: # os.chdir(Tarquindir) # os.mkdir('Tarquin_fit') # Tarquinfitdir.mkdir(parents=True, exist_ok=True) # reportout = Tarquinfitdir + '\\' + self.PatName + '_Report.pdf' # tempout = Tarquinfitdir + '\\' + filename + '_temp.pdf' # pdfout = Tarquinfitdir + '\\' + filename + '_plot.pdf' # dataout = Tarquinfitdir + '\\' + filename + '_data.csv' # moddataout = Tarquinfitdir + '\\' + filename + '_data_with_ratios.csv' # resout = Tarquinfitdir + '\\' + filename + '_results.csv' # self.fitout = Tarquinfitdir + '\\' + filename + '_fit.txt' # basis = 'S:\\Neonate_data\\Tarquin\\3_0T_basis_threonine_no_MM' # tarquin = 'S:\\Neonate_data\\Tarquin\\TARQUIN_Windows_4.3.7\\tarquin\\tarquin' reportout = str( Path(Tarquinfitdir, str(self.PatName) + '_Report.pdf').resolve()) #reportout = Path(Tarquinfitdir , self.PatName + '_Report.pdf') tempout = str(Path(Tarquinfitdir, filename + '_temp.pdf').resolve()) pdfout = str(Path(Tarquinfitdir, filename + '_plot.pdf').resolve()) dataout = str(Path(Tarquinfitdir, filename + '_data.csv').resolve()) moddataout = str( Path(Tarquinfitdir, filename + '_data_with_ratios.csv').resolve()) resout = str(Path(Tarquinfitdir, filename + '_results.csv').resolve()) self.fitout = str(Path(Tarquinfitdir, filename + '_fit.txt').resolve()) basis = str(Path(BASE_DIR, '3_0T_basis_threonine_no_MM').resolve()) print(f'basis: {basis}') tarquin_path = Path(BASE_DIR, 'tarquingui.app/Contents/MacOS/tarquin') if tarquin_path.exists(): tarquin = str(tarquin_path.resolve()) elif shutil.which("tarquin"): tarquin = shutil.which("tarquin") else: error = f'\nTarquin not found. \nTo solve it please:\n a) copy the Tarquin app inside {BASE_DIR} folder, or\n b) add Tarquin to the Path. e.g. >> export PATH=$PATH:/Applications/tarquingui.app/Contents/MacOS\n' print(error) sys.exit(error) command = (tarquin + ' --input ' + file_path + ' --output_pdf ' + pdfout + ' --output_csv ' + dataout + ' --output_fit ' + self.fitout + ' --basis_csv ' + basis) # run the command print('this the the command for tarquin: ', command) os.system(command) #Add in sode code to automatically calculate the Lac/Naa ratio #Note that this will assume that the correct basis set is used #csvfile = open(dataout, 'rb') with open(dataout) as csvfile: linereader = csv.reader(csvfile, delimiter=',') #linereader = pd.read_csv(dataout,delimiter = ',') CSVstore = [] counter = 0 for row in linereader: counter += 1 print(row) if counter == 2: row.append('Lac+T/tNaa') row.append('tNaa/tCho') row.append('tNaa/Cr') row.append('tCho/Cr') row.append('Lac+T/tCho') row.append('Lac+T/Cr') if counter == 5: row.append('Lac+T/tNaa') row.append('tNaa/tCho') row.append('tNaa/Cr') row.append('tCho/Cr') row.append('Lac+T/tCho') row.append('Lac+T/Cr') #Calc ratio if counter == 3: #dummy = str(row) #dummy = dummy.translate(None, ''.join(["[", "'", "]"])) #print('dummy is: ',dummy) #fields = dummy.split(', ') fields = row print('type of fields[14] is: ', type(fields[14])) print('fields[14] is: ', fields[14]) Lac = np.float(fields[14]) Naa = np.float(fields[15]) NaaG = np.float(fields[16]) Thre = np.float(fields[21]) Cr = np.float(fields[6]) tCho = np.float(fields[23]) L_N = old_div((Lac + Thre), (Naa + NaaG)) N_Ch = old_div((Naa + NaaG), tCho) N_Cr = old_div((Naa + NaaG), Cr) Ch_Cr = old_div(tCho, Cr) L_Ch = old_div((Lac + Thre), tCho) L_Cr = old_div((Lac + Thre), Cr) row.append(str(L_N)) row.append(str(N_Ch)) row.append(str(N_Cr)) row.append(str(Ch_Cr)) row.append(str(L_Ch)) row.append(str(L_Cr)) #calc error if counter == 6: dummy = str(row) # #dummy = dummy.translate(None, ''.join(["[", "'", "]"])) #dummy = dummy.translate(''.join(["[", "'", "]"])) fields = row Lace = np.float(fields[14]) Naae = np.float(fields[15]) NaaGe = np.float(fields[16]) Three = np.float(fields[21]) Cre = np.float(fields[6]) tChoe = np.float(fields[23]) Lerr = np.sqrt(np.power(Lace, 2) + np.power(Three, 2)) Nerr = np.sqrt(np.power(Naae, 2) + np.power(NaaGe, 2)) L_Ne = np.sqrt( np.power(old_div(Lerr, (Lac + Thre)), 2) + np.power(old_div(Nerr, (Naa + NaaG)), 2)) * L_N N_Che = np.sqrt( np.power(old_div(Nerr, (Naa + NaaG)), 2) + np.power(old_div(tChoe, (tCho)), 2)) * N_Ch N_Cre = np.sqrt( np.power(old_div(Nerr, (Naa + NaaG)), 2) + np.power(old_div(Cre, (Cr)), 2)) * N_Cr Ch_Cre = np.sqrt( np.power(old_div(tChoe, (tCho)), 2) + np.power(old_div(Cre, (Cr)), 2)) * Ch_Cr L_Che = np.sqrt( np.power(old_div(Lerr, (Lac + Thre)), 2) + np.power(old_div(tChoe, (tCho)), 2)) * L_Ch L_Cre = np.sqrt( np.power(old_div(Lerr, (Lac + Thre)), 2) + np.power(old_div(Cre, (Cr)), 2)) * L_Cr row.append(str(L_Ne)) row.append(str(N_Che)) row.append(str(N_Cre)) row.append(str(Ch_Cre)) row.append(str(L_Che)) row.append(str(L_Cre)) #get FWHM and SNR if counter == 9: #dummy = str(row) #dummy = dummy.translate(''.join(["[", "'", "]"])) #fields = dummy.split(", ") fields = row FWHM = np.float(fields[7]) SNR = np.float(fields[9]) CSVstore.append(row) #linewriter.writerow(row) # #csvfile.close() resultsout = open(resout, 'w') line1 = 'Ratio, Value, Error, Proc FWHM, Proc SNR' print(line1) line2 = 'L+T/tNaa,' + str(L_N) + ',' + str(L_Ne) + ',' + str( FWHM) + ',' + str(SNR) line3 = 'tNaa/tCho,' + str(N_Ch) + ',' + str(N_Che) line4 = 'tNaa/Cr,' + str(N_Cr) + ',' + str(N_Cre) line5 = 'tCho/Cr,' + str(Ch_Cr) + ',' + str(Ch_Cre) line6 = 'L+T/tCho,' + str(L_Ch) + ',' + str(L_Che) line7 = 'L+T/Cr,' + str(L_Cr) + ',' + str(L_Cre) resultsout.write(line1) resultsout.write('\n') resultsout.write(line2) resultsout.write('\n') resultsout.write(line3) resultsout.write('\n') resultsout.write(line4) resultsout.write('\n') resultsout.write(line5) resultsout.write('\n') resultsout.write(line6) resultsout.write('\n') resultsout.write(line7) resultsout.close() csvout = open(moddataout, 'w') for line in CSVstore: c = str(line) #line2 = c.translate(None, ''.join(["[", "'", "]"])) line2 = c.translate(''.join(["[", "'", "]"])) #print line2 csvout.write(line2) csvout.write('\n') csvout.close() pdf = PDF() pdf.alias_nb_pages() pdf.add_page() pdf.set_font('Arial', 'B', 16) titleout = 'Spectroscopy Report for ' + str(self.PatName) pdf.cell(0, 0, titleout, 0, 0, 'C') pdf.ln(15) pdf.set_font('Arial', 'B', 14) pdf.cell(10) pdf.cell(0, 0, 'Metabolite Ratios', 0, 0, 'L') pdf.ln(5) pdf.cell(10) pdf.set_font('Arial', 'B', 12) pdf.cell(20, 10, 'Ratio', 1, 0, 'C') pdf.cell(25, 10, 'tNaa/tCho', 1, 0, 'C') pdf.cell(25, 10, 'tNaa/Cr', 1, 0, 'C') pdf.cell(25, 10, 'tCho/Cr', 1, 0, 'C') pdf.cell(25, 10, 'L+T/tCho', 1, 0, 'C') pdf.cell(25, 10, 'L+T/Cr', 1, 0, 'C') pdf.cell(25, 10, 'L+T/tNaa', 1, 1, 'C') pdf.cell(10) pdf.cell(20, 10, 'Value', 1, 0, 'C') pdf.set_font('Arial', '', 10) textout = str(round(N_Ch, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(N_Cr, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(Ch_Cr, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(L_Ch, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(L_Cr, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') pdf.set_font('Arial', 'B', 12) textout = str(round(L_N, 2)) pdf.cell(25, 10, textout, 1, 1, 'C') pdf.cell(10) pdf.cell(20, 10, 'Error', 1, 0, 'C') pdf.set_font('Arial', '', 10) textout = str(round(N_Che, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(N_Cre, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(Ch_Cre, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(L_Che, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') textout = str(round(L_Cre, 2)) pdf.cell(25, 10, textout, 1, 0, 'C') pdf.set_font('Arial', 'B', 12) textout = str(round(L_Ne, 2)) pdf.cell(25, 10, textout, 1, 1, 'C') pdf.ln(3) pdf.cell(10) pdf.set_font('Arial', 'B', 10) pdf.cell(0, 5, 'Notes:', 0, 1, 'L') pdf.set_font('Arial', '', 10) pdf.cell(10) pdf.cell( 0, 5, 'L+T = Lactate + Threonine. Including Threonine yields a better fit at ~ 1.3 ppm', 0, 1, 'L') pdf.cell(10) pdf.cell(0, 5, 'tNaa = Total Naa', 0, 1, 'L') pdf.cell(10) pdf.cell(0, 5, 'tCho = Total Choline', 0, 1, 'L') pdf.cell(10) pdf.cell( 0, 5, 'Errors on ratios calculated using Cramer-Rao low bounds on Tarquin fit', 0, 1, 'L') pdf.ln(5) pdf.cell(10) pdf.set_font('Arial', 'B', 10) pdf.cell( 0, 5, 'Some care must be taken when comparing Tarquin ratios to jMRUI ratios:', 0, 1, 'L') pdf.cell(10) pdf.set_font('Arial', '', 10) pdf.cell( 0, 5, '1) Tarquin fits a complete basis spectrum for each metabolite whereas jMRUI fits individual peaks', 0, 1, 'L') pdf.cell(10) pdf.cell( 0, 5, '2) Tarquin effectively produces T2-weighted metabolite concentration ratios', 0, 1, 'L') pdf.cell(10) pdf.cell(0, 5, '3) jMRUI effectively produces T2-weighted peak-area ratios', 0, 1, 'L') pdf.cell(10) pdf.cell( 0, 5, '4) The Choline peak has 9 equivalent protons whereas the other prominent peaks have only 3', 0, 1, 'L') pdf.cell(10) pdf.cell( 0, 5, '5) This means Tarquin ratios involving Choline are approximately a factor 3 different to jMRUI ratios', 0, 1, 'L') pdf.cell(10) pdf.cell( 0, 5, '6) When comparing Cho ratios to previous or published data please be careful of the methodologies used', 0, 1, 'L') pdf.cell(10) pdf.cell(0, 5, '7) LCModel data will be similar to Tarquin', 0, 1, 'L') pdf.cell(10) pdf.cell( 0, 5, '8) If in doubt, please contact Medical Physics for help or clarification', 0, 1, 'L') pdf.ln(15) pdf.set_font('Arial', 'B', 14) pdf.cell(10) pdf.cell(0, 0, 'Spectrum Quality Control', 0, 0, 'L') pdf.ln(5) pdf.cell(10) pdf.set_font('Arial', 'B', 12) pdf.cell(40, 10, 'Proc FWHM', 1, 0, 'C') pdf.cell(40, 10, 'Proc SNR', 1, 0, 'C') pdf.cell(40, 10, 'Echo Time', 1, 1, 'C') pdf.set_font('Arial', '', 10) pdf.cell(10) textout = str(round(FWHM, 2)) pdf.cell(40, 10, textout, 1, 0, 'C') textout = str(round(SNR, 2)) pdf.cell(40, 10, textout, 1, 0, 'C') pdf.cell(40, 10, self.displayTE, 1, 1, 'C') pdf.ln(3) pdf.cell(10) pdf.set_font('Arial', 'B', 10) pdf.cell(0, 5, 'Notes:', 0, 1, 'L') pdf.set_font('Arial', '', 10) pdf.cell(10) pdf.cell( 0, 5, 'FWHM = Full Width Half Maximum: Measure of linewidth in ppm', 0, 1, 'L') pdf.cell(10) pdf.cell(0, 5, 'SNR = Signal to Noise Ratio', 0, 1, 'L') pdf.output(tempout, 'F') # Merge PDF files pdfFileObj1 = open(tempout, 'rb') pdfFileObj2 = open(pdfout, 'rb') pdfReader1 = PyPDF2.PdfFileReader(pdfFileObj1) pdfReader2 = PyPDF2.PdfFileReader(pdfFileObj2) pageObj1 = pdfReader1.getPage(0) pageObj2 = pdfReader2.getPage(0) pageObj2.rotateClockwise(270) pdf_writer = PyPDF2.PdfFileWriter() pdf_writer.addPage(pageObj1) pdf_writer.addPage(pageObj2) pdf_out = open(reportout, 'wb') pdf_writer.write(pdf_out) pdf_out.close() pdfFileObj1.close() pdfFileObj2.close() print(f'\n\nMRS Report saved in {reportout}') self.report_completed(reportout)
def asCplexName(name): #to remove illegal characters from the names trans = str.maketrans("-+[] ->/","________") return str(name).translate(trans)
def timestampFormat(): """Format for current local time, suitable for file names. >>> timestampFormat() '%Y-%m-%d_%H-%M-%S' """ return str(FORMATTER.datefmt).translate(str.maketrans(" :", "_-"))
def _y64_2_b64(s): return s.translate(str.maketrans('-._', '=+/'))
flanking3utrLengthKey = "CDS:taxid:%d:protid:%s:3utr-flank-length-nt" nextCdsOnOppositeStrandKey = "CDS:taxid:%d:protid:%s:next-cds-opp-strand" #genomicCoordStartKey = "CDS:taxid:%d:protid:%s:genomic-start" #genomicCoordEndKey = "CDS:taxid:%d:protid:%s:genomic-end" #partialCDSKey = "CDS:taxid:%d:protid:%s:partial" speciesCDSList = "species:taxid:%d:CDS" regexLocusId = re.compile("([^.]+[.][^.]+)[.].*") r = redis.StrictRedis(host=config.host, port=config.port, db=config.db, password=config.password) # sequences server (mysql) session = db.Session() translateAmbiguousNucleotides = str.maketrans("RrYyKkMmSsWwBbDdHhVv", "nnnnnnnnnnnnnnnnnnnn") def storeSeqInDB(nucSeq, taxId: int, proteinId: str, seqSourceTag: int, stopCodonPos: int = -1, genomeCoords: tuple = (), nextCDSonOppositeStrand: bool = None, cdsLengthNt: int = None, flankingRegionLengthNt: int = None) -> None: # Compress the CDS sequence encodedCds = nucleic_compress.encode( str(nucSeq).translate(translateAmbiguousNucleotides)
from __future__ import print_function, division import sys import re import json from builtins import bytes, str from .schema import SimpleSchema, SimpleAttribute, make_export_schema from . import tree allowable_secedge = {'refint', 'refvc', 'refmod', 'refcontr', 'EN', 'HD', 'SB', 'OA', 'DA', 'CP', 'MO', 'EP', 'SVP', 'PPROJ'} hash_token_re = re.compile('^#+\\s') # reads lines in an export file and creates a nodes structure # reads up to and including the #EOS kill_spaces_tr = str.maketrans(' ', '_') def read_sentence(f, format=3): ''' reads a sentence in export format from the file descriptor f :param format: the Negra-Export version :param encoding: if a value is supplied here, the file will be assumed to have this encoding :param tree_encoding: passing None here means that the tree will contain unicode strings in the word, lemma, and comment fields, otherwise they will follow this encoding ''' t = tree.Tree() secedges = [] pos = 0 l = f.readline().strip()
def asCplexName(name): #to remove illegal characters from the names trans = str.maketrans("-+[] ->/", "________") return str(name).translate(trans)
def standalone(): argsParser = argparse.ArgumentParser() argsParser.add_argument("--taxid", type=int) argsParser.add_argument("--input") argsParser.add_argument("--variant", type=parseOption( set(("yeastgenome", "NCBI", "Ensembl", "JGI")), "variant")) argsParser.add_argument("--type", type=parseOption( set(("cds", "shuffle", "fixCDSkey")), "sequence type")) argsParser.add_argument("--dry-run", action="store_true", default=False) argsParser.add_argument("--output-fasta") argsParser.add_argument("--gene-ids-file") argsParser.add_argument("--alt-protein-ids", type=parseOption(set(("locus_tag", )), "alt-protein-id")) argsParser.add_argument("--headers-from-another-fasta") argsParser.add_argument("--ignore-id-check", action="store_true", default=False) args = argsParser.parse_args() if (args.output_fasta): if (args.output_fasta == args.input): raise Exception("Fasta output file cannot match input file!") #if( len(sys.argv) < 5 ): # print("Usage: %s <taxid> <fasta-file> <fasta-variant> <cds|shuffle>" % (sys.argv[0],)) # sys.exit(-1) # command-line arguments taxId = args.taxid f = None if (args.input[-3:] == ".gz"): f = gzip.open(args.input, "r") elif (args.input[-4:] == ".bz2"): # TODO: impl this... assert (False) else: f = open(args.input, 'r') #sequenceFormat = args.variant sequenceType = args.type if (sequenceType == "cds"): seqSourceTag = db.Sources.External elif (sequenceType == "shuffle"): seqSourceTag = db.Sources.ShuffleCDSv2_matlab elif (sequenceType == "fixCDSkey"): seqSourceTag = None else: raise Exception("Unknown sequence type '%s'" % sequenceType) # establish connections # metadata server (redis) #r = redis.StrictRedis(host=config.host, port=config.port, db=config.db, password=config.password) # sequences server (mysql) #session = db.Session() visitedProteinIds = set() assert (r.exists("species:taxid:%d:name" % taxId)) if (seqSourceTag == db.Sources.External): # Clear any previously imported CDSs... #r.delete(speciesCDSList % (taxId,)) count = data_helpers.countSpeciesCDS(taxId) if (count > 0 and (not args.dry_run)): print("%d sequences already exist for specied %d. Aborting..." % (count, taxId)) sys.exit(-1) elif (sequenceType == "fixCDSkey"): r.delete(speciesCDSList % (taxId, )) # Delete and reconstruct the CDS key else: assert (data_helpers.countSpeciesCDS(taxId) > 0) reNuclearYeastGene = re.compile("Y[A-P][RL]\d+[CW](-[A-Z])?") geneIdsToInclude = set() if (args.gene_ids_file): with open(args.gene_ids_file, "r") as genesFile: for geneId in genesFile: geneIdsToInclude.add(geneId.rstrip()) reNCBIattributes = re.compile("\[(\S+)=([^\]]+)\]") reNCBIbareheader = re.compile("\w+\|\w+\.\d+_cds_(\w+.\d+)_\d+") outRecords = [] headersFromAnotherFasta = {} if args.headers_from_another_fasta: with open(args.headers_from_another_fasta, "r") as f2: for record in SeqIO.parse(f2, "fasta", alphabet=generic_dna): assert (not record.id in headersFromAnotherFasta) headersFromAnotherFasta[record.id] = record.description cdsCount = 0 notFoundCount = 0 skippedCount = 0 validNucleotideChars = str.maketrans("ACGTacgt", "%%%%%%%%") #print("Opening fasta file: {}".format(f)) for record in SeqIO.parse(f, "fasta", alphabet=generic_dna): #proteinId = regexLocusId.match(record.id).group(1) # Work-around for multiple-transcript identifiers in JGI's Chlamy genome if args.headers_from_another_fasta: record.description = headersFromAnotherFasta[record.id] numNonNucleotideChars = len(record.seq) - str( record.seq).translate(validNucleotideChars).count("%") if numNonNucleotideChars: print( "Skipping record %s, containing non-nucleotide or ambiguous symbols '%s'" % (record.id, numNonNucleotideChars)) skippedCount += 1 continue # yeastgenome.org - skip suspected pseudo-genes if (args.variant == "yeastgenome" and record.description.find("Dubious ORF") != -1): skippedCount += 1 continue # yeastgenome.org - skip mitochondrial genes if (args.variant == "yeastgenome"): geneType = record.id[0] if geneType == "Q" or geneType == "R": skippedCount += 1 continue # yeastgenome.org - verify gene-id conforms to: http://www.yeastgenome.org/help/community/nomenclature-conventions if (args.variant == "yeastgenome"): geneId = record.id assert (reNuclearYeastGene.match(geneId)) # Obtain attributes mapping attributes = [] if (args.variant == "NCBI"): attributes = dict(re.findall(reNCBIattributes, record.description)) if (args.variant == "NCBI"): if ('pseudo' in attributes and attributes['pseudo'] == 'true'): print("Skipping pseudo-gene entry %s" % (record.id, )) skippedCount += 1 continue # Determine gene id proteinId = None additionalProteinIds = set() altProteinId = None if (args.variant == "yeastgenome"): proteinId = record.id elif (args.variant == "NCBI"): if (sequenceType == "shuffle" and not attributes): #Workaround for shuffle-seq files missing the header... #Extract the protein-id from sequence-id like this: #>lcl|NC_002516.2_cds_NP_064721.1_1 if not args.alt_protein_ids: proteinId = reNCBIbareheader.match(record.id).group(1) elif args.alt_protein_ids == "locus_tag": if ('locus_tag' not in attributes): print("Skipping entry %s missing locus_tag - %s" % (record.id, attributes)) skippedCount += 1 continue proteinId = attributes['locus_tag'] print(proteinId) else: assert False else: # Note - not currently used #if 'db_xref' in attributes: # _db_xrefs = attributes['db_xref'].split(",") # db_xrefs = dict(map( lambda x: tuple(x.split(":")), _db_xrefs)) if not args.alt_protein_ids: if ('protein_id' not in attributes): print("Skipping entry %s missing protein_id - %s" % (record.id, attributes)) skippedCount += 1 continue proteinId = attributes['protein_id'] elif args.alt_protein_ids == "locus_tag": if ('locus_tag' not in attributes): print("Skipping entry %s missing locus_tag - %s" % (record.id, attributes)) skippedCount += 1 continue proteinId = attributes['locus_tag'] if ('protein_id' in attributes): altProteinId = attributes['protein_id'] else: assert (False) elif (args.variant == "Ensembl"): # Sample id: ABD29211.1 dotPos = record.id.rfind('.') if (dotPos > 3): proteinId = record.id[:dotPos] additionalProteinIds.add( record.id ) # also allow matching the full format (including the transcript-id) - some CDS files include it... else: proteinId = record.id elif (args.variant == "JGI"): # Variant 1 (Phytozome, Mpus) # (gff3): 60050 # (fasta): 60050 # Variant 2 (Phytozome, Dsal) # (gff3): Dusal.1637s00001.1 # (fasta): Dusal.1637s00001.1 # Variant 3: # (gff3): jgi.p|Ostta1115_2|10314 # (fasta): jgi|Ostta1115_2|10314|CE10313_131 proteinId = record.id if record.id.startswith("jgi|"): parts = record.id.split('|') parts[0] = 'jgi.p' # add the '.p' additionalProteinIds.add('|'.join( parts[:3])) # drop the suffix (parts[4]) else: assert (False) if not args.ignore_id_check: assert (len(proteinId) > 2) # Skip sequences that have non-standard translations if (args.variant == "NCBI"): if "transl_except" in attributes: print("Skipping %s (because of transl_except)" % (proteinId, )) skippedCount += 1 continue # If an inclusion list (white list) is defined, skip sequences missing from it if args.gene_ids_file: if (proteinId not in geneIdsToInclude): # Also try the additional ids if (not geneIdsToInclude.intersection(additionalProteinIds)): print("Skipping %s (sequence %s, alternate ids=%s)" % (proteinId, record.id, list(additionalProteinIds))) skippedCount += 1 continue print("Inserting %s (sequence %s)..." % (proteinId, record.id)) # Verify there are no duplicates entries if (proteinId in visitedProteinIds): print("MULTIPLE Entry: %s", proteinId) skippedCount += 1 continue #assert(proteinId not in visitedProteinIds) visitedProteinIds.add(proteinId) # Write the filtered sequences into an output file (if needed) # Note - this also works in dry-run... if (args.output_fasta): outRecords.append(record) if (args.dry_run): continue if (sequenceType == "fixCDSkey"): cds = data_helpers.CDSHelper(taxId, proteinId) seqId = cds.seqId() if (not seqId is None): r.sadd(speciesCDSList % (taxId, ), proteinId) else: print("Couldn't find entry for proteinId=%s" % proteinId) continue # Skip the rest of the processing... storeSeqInDB(nucSeq=record.seq, taxId=taxId, proteinId=proteinId, seqSourceTag=seqSourceTag) cdsCount += 1 if (notFoundCount + skippedCount > 0): print("Warning: %d entries skipped and %d entries not found" % (skippedCount, notFoundCount)) print("Processed %d CDS entries" % (cdsCount, )) print("(out of %d CDS entries for this species)" % (r.scard("species:taxid:%d:CDS" % (taxId, )))) if (args.output_fasta): with open(args.output_fasta, "w") as outfile: out = SeqIO.write(outRecords, outfile, "fasta")