def change_all( cls, level=(), buffer=(), terminator=(), colored_format=(), format=() ): # # ''' This method changes the given properties to all created logger \ instances and saves the given properties as default properties \ for future created logger instances. Note that every argument except buffer setted to "None" will not \ edit this logger component. If you don't want to change buffer \ leave it by its default value. **level** - sets levels for all logger **buffer** - sets buffers for all logger **terminator** - sets an ending char for each log message in \ each logger **colored_format** - sets templates for colored logging messages \ in returned logger **format** - sets templates for logging messages in each \ logger Examples: >>> Logger.change_all() # doctest: +ELLIPSIS <class ...Logger...> ''' cls._set_properties(level, buffer, terminator, colored_format, format) for logger in cls.instances: # # python3.5 new_handler = logger.handlers.copy() new_handler = copy(logger.handlers) if buffer: new_handler = [] for new_buffer in cls.buffer: new_handler.append(LoggingStreamHandler(stream=new_buffer)) for handler, level, terminator, colored_format, format in \ builtins.zip( new_handler, cls.level, cls.terminator, cls.colored_format, cls.format ): # TODO check new branches. if colored_format is None: handler.setFormatter(LoggingFormatter(format)) else: handler.setFormatter(ColoredLoggingFormatter( colored_format)) handler.terminator = terminator handler.setLevel(level.upper()) for handler in logger.handlers: logger.removeHandler(handler) for handler in new_handler: logger.addHandler(handler) logger.setLevel(builtins.getattr(logging, cls.level[0].upper())) return cls
def _eventListToDict(eventValueList): """ Convert an ioHub event that is current represented as an orderded list of values, and return the event as a dictionary of attribute name, attribute values for the object. """ eclass = EventConstants.EVENT_CLASSES[eventValueList[3]] combo = zip(eclass.attributeNames, eventValueList) return dict(combo)
def unzip(seq): """ Inverse of zip. Converts a list of tuples into a tuple of lists. >>> unzip([(1,2), (3,4), (5,6)]) ([1, 3, 5], [2, 4, 6]) """ return tuple(map(list, __builtin__.zip(*seq)))
def _eventListToDict(eventValueList): """ Convert an ioHub event that is current represented as an orderded list of values, and return the event as a dictionary of attribute name, attribute values for the object. """ eclass=EventConstants.EVENT_CLASSES[eventValueList[3]] combo = zip(eclass.attributeNames,eventValueList) return dict(combo)
def _eventListToObject(eventValueList): """ Convert an ioHub event that is current represented as an orderded list of values, and return the correct ioHub.devices.DeviceEvent subclass for the given event type. """ eclass = EventConstants.EVENT_CLASSES[eventValueList[3]] combo = zip(eclass.attributeNames, eventValueList) kwargs = dict(combo) return eclass(**kwargs)
def _eventListToObject(eventValueList): """ Convert an ioHub event that is current represented as an orderded list of values, and return the correct ioHub.devices.DeviceEvent subclass for the given event type. """ eclass=EventConstants.EVENT_CLASSES[eventValueList[3]] combo = zip(eclass.attributeNames,eventValueList) kwargs = dict(combo) return eclass(**kwargs)
def _generate_logger( cls, name, level, buffer, terminator, colored_format, format ): # # ''' Creates a new logger instance by initializing all its components \ with given arguments or default properties saved as class \ properties. Examples: >>> Logger._generate_logger( ... 'test', ('info',), (Buffer(),), ('',), (''), ('',) ... ) # doctest: +ELLIPSIS <logging.Logger object at ...> ''' # TODO check branches. properties = [] for property_name in ( 'level', 'buffer', 'terminator', 'colored_format', 'format' ): properties.append( builtins.locals()[property_name] if builtins.locals()[ property_name ] else builtins.getattr(cls, property_name)) for handler in getLogger(name).handlers: getLogger(name).removeHandler(handler) logger = getLogger(name) logger.propagate = False for _level, _buffer, _terminator, _colored_format, _format in \ builtins.zip(properties[0], properties[1], properties[2], properties[3], properties[4]): handler = LoggingStreamHandler(stream=_buffer) handler.terminator = _terminator handler.setLevel(_level.upper()) # TODO check new branches if _colored_format is None: handler.setFormatter(LoggingFormatter(_format)) else: handler.setFormatter(ColoredLoggingFormatter(_colored_format)) logger.addHandler(handler) ''' Set meta logger level to first given level (level is first \ property). ''' logger.setLevel(builtins.getattr(logging, properties[0][0].upper())) return logger
def predict(self, X): lp_predictions = [c.predict(X).tocsc() for c in self.classifiers] voters = [0.0] * self.label_count votes = sparse.csc_matrix( (lp_predictions[0].shape[0], self.label_count), dtype='int') for model in range(self.model_count): for label in range(len(self.partition[model])): voters[self.partition[model][label]] += 1 votes[:, self.partition[model][label]] = votes[:, self.partition[ model][label]] + lp_predictions[model][:, label] nonzeros = votes.nonzero() predictions = sparse.csc_matrix( (lp_predictions[0].shape[0], self.label_count), dtype='int') for row, column in zip(nonzeros[0], nonzeros[1]): if (np.round(votes[row, column] / float(voters[column])) == 1): predictions[row, column] = 1 return predictions.tocsr()
def zip(*args): """ Combines corresponding tuples of elements from several sequences into a sequence of pairs. >>> zip([1, 2, 3], [4, 5, 6]) [(1, 4), (2, 5), (3, 6)] Zipping empty sequences will produce the empty sequence. >>> zip([], []) [] The given sequences must be of the same length. >>> zip([1, 2], [3]) Traceback (most recent call last): ... AssertionError """ return __builtin__.zip(*args)
def zip3(seq1, seq2, seq3): """ Combines corresponding pairs of elements from the given sequences into a sequence of 3-tuples. >>> zip3([1, 2], [3, 4], [5, 6]) [(1, 3, 5), (2, 4, 6)] Zipping empty sequences will produce the empty sequence. >>> zip3([], [], []) [] The given sequences must be of the same length. >>> zip3([1, 2], [3], [4]) Traceback (most recent call last): ... AssertionError """ assert len(seq1) == len(seq2) assert len(seq1) == len(seq3) return __builtin__.zip(seq1, seq2, seq3)
def zip3(seq1, seq2, seq3): """ Combines corresponding pairs of elements from the given sequences into a sequence of 3-tuples. >>> zip3([1, 2], [3, 4], [5, 6]) [(1, 3, 5), (2, 4, 6)] Zipping empty sequences will produce the empty sequence. >>> zip3([], [], []) [] The given sequences must be of the same length. >>> zip3([1, 2], [3], [4]) Traceback (most recent call last): ... AssertionError """ assert len(seq1)==len(seq2) assert len(seq1)==len(seq3) return __builtin__.zip(seq1, seq2, seq3)
def zip(*args): """ Combines corresponding pairs of elements from seq1 and seq2 into a sequence of pairs. >>> zip([1, 2, 3], [4, 5, 6]) [(1, 4), (2, 5), (3, 6)] Zipping empty sequences will produce the empty sequence. >>> zip([], []) [] The given sequences must be of the same length. >>> zip([1, 2], [3]) Traceback (most recent call last): ... AssertionError """ # XXX We need to figure out how to make this fit Python better #assert len(seq1)==len(seq2) #return __builtin__.zip(seq1, seq2) return __builtin__.zip(*args)
def _zip(*args, **kwargs): #to not overwrite builtin zip in globals return __builtin__.zip(*args, **kwargs)
# id,name = line.split('\t') # drugs[id].append(name) with codecs.open(args.drugList, "r", "utf-8") as f2: drugs = {} for line in f2: split = line.split('\t') drugs[split[0]] = split[1].strip().split('|') #drugLookup = { x:i for i,x in enumerate(drugs) } #geneLookup = { x:i for i,x in enumerate(genes) } #mutationLookup = { x:i for i,x in enumerate(mutationKeywords) } print "Generating lookup table..." duplicates = set() lookup = defaultdict(list) for termType, mainDict in zip(['cancer', 'gene', 'mutation', 'drug'], [cancers, genes, mutationKeywords, drugs]): #for type,mainList in enumerate([drugs,genes,mutationKeywords]): for id, lst in mainDict.iteritems(): lst = list(set(lst)) keys = set([parseWordlistTerm(x.lower()) for x in lst]) #for x in lst: for key in keys: #key = tuple(x.lower().split(' ')) #key = parseWordlistTerm(x.lower()) matching = None if key in lookup: prevItems = lookup[key] matching = None for i, (prevType, prevIds) in enumerate(prevItems):
def selectSentences(outFile, textInput, textSourceInfo): pipeline = getPipeline() #textInput = [ u'She noticed that a deletion of PTEN correlates with sensitivity to Erlotinib.' ] #textInput = [ u'The V600E mutation in BRAF is known to cause resistance to Gefitinib.' ] pmid = str(textSourceInfo['pmid']) pmcid = str(textSourceInfo['pmcid']) print "pmid:%s pmcid:%s" % (pmid, pmcid) driven1 = re.compile(re.escape('-driven'), re.IGNORECASE) driven2 = re.compile(re.escape('- driven'), re.IGNORECASE) #print textInput assert isinstance(textInput, list) for text in textInput: text = text.strip().replace('\n', ' ').replace('\r', ' ').replace('\t', ' ') text = text.replace(u'\u2028', ' ').replace(u'\u2029', ' ').replace( u'\u202F', ' ').replace(u'\u2012', ' ').replace(u'\u2010', ' ') text = "".join(ch for ch in text if unicodedata.category(ch)[0] != "C") text = text.strip() #text = text.replace('-driven',' driven') #text = text.replace('- driven',' driven') #text = text.replace('-Driven',' Driven') #text = text.replace('- Driven',' Driven') text = driven1.sub(' driven', text) text = driven2.sub(' driven', text) if len(text) == 0: continue assert isinstance(text, str) or isinstance(text, unicode) document = pipeline.process(text) for sentence in document.get(SentencesAnnotation): sentenceStart = None words = [] positions = [] for i, token in enumerate(sentence.get(TokensAnnotation)): if sentenceStart is None: sentenceStart = token.beginPosition() word = token.word() startPos = token.beginPosition() - sentenceStart endPos = token.endPosition() - sentenceStart words.append(word) positions.append((startPos, endPos)) #print "-"*30 #print words snvRegex = r'^[A-Z][0-9]+[A-Z]$' snvMatches = [not (re.match(snvRegex, w) is None) for w in words] termtypesAndids, terms, locs = getTermIDsAndLocations( words, lookup) fusionTermtypesAndids, fusionTerms, fusionLocs = detectFusionTerms( words, lookup) termtypesAndids += fusionTermtypesAndids terms += fusionTerms locs += fusionLocs for i, (w, snvMatch) in enumerate(zip(words, snvMatches)): if snvMatch: termtypesAndids.append([('mutation', ['snv'])]) terms.append((w, )) locs.append((i, i + 1)) for i, w in enumerate(words): if w.lower().startswith("mir-") or w.lower().startswith( "hsa-mir-"): termtypesAndids.append([('gene', ['mrna'])]) terms.append((w, )) locs.append((i, i + 1)) #print "-"*30 #print sentence #print termtypesAndids #print snvMatches locsToRemove = set() acronyms = detectAcronyms(words) for (wordsStart, wordsEnd, acronymLoc) in acronyms: wordIsTerm = (wordsStart, wordsEnd) in locs acronymIsTerm = (acronymLoc, acronymLoc + 1) in locs if wordIsTerm and acronymIsTerm: # Remove the acronym locsToRemove.add((acronymLoc, acronymLoc + 1)) elif acronymIsTerm: # Remove any terms that contain part of the spelt out word newLocsToRemove = [(i, j) for i in range(wordsStart, wordsEnd) for j in range(i, wordsEnd + 1)] locsToRemove.update(newLocsToRemove) zipped = zip(locs, terms, termtypesAndids) filtered = [(locs, terms, termtypesAndids) for locs, terms, termtypesAndids in zipped if not locs in locsToRemove] #print "len(list(zipped))", len(list(zipped)) #print "len(filtered)", len(filtered) cancerLocs, geneLocs = set(), set() for loc, term, x in filtered: for t, _ in x: if t == 'cancer': cancerLocs.add(loc) elif t == 'gene': geneLocs.add(loc) overlap = [t for t in cancerLocs if t in geneLocs] uniqCancerLocs = [t for t in cancerLocs if not t in overlap] uniqGeneLocs = [t for t in geneLocs if not t in overlap] hasCancerAndGeneTerms = (len(cancerLocs) >= 1 and len(geneLocs) >= 1) and not (len(cancerLocs) == 1 and len(geneLocs) == 1 and len(overlap) == 1) #types = set ( [ t for x in termtypesAndids for t,_ in x ] ) #if len(types) == 3: # All three types: drugs,genes,mutationTypes #if "cancer" in types and "gene" in types: if hasCancerAndGeneTerms: #print words print "-" * 30 #print textSourceInfo #print sentence out = [pmid, pmcid, unicode(sentence)] #for thesetypesAndIDs,term,(startT,endT) in zip(termtypesAndids,terms,locs): for (startT, endT), term, thesetypesAndIDs in filtered: for type, termid in thesetypesAndIDs: startPos = positions[startT][0] endPos = positions[endT - 1][1] #termTxt = " ".join(term) termTxt = sentence.toString()[startPos:endPos] data = [ type, ",".join(map(str, termid)), startPos, endPos, termTxt ] txt = u"|".join(map(unicode, data)) out.append(txt) outLine = "\t".join(out) outFile.write(outLine + "\n")
def _zip(*args, **kwargs): #to not overwrite builtin zip in globals """ This function allows the use of "zip" in jinja2 templates """ return __builtin__.zip(*args, **kwargs)
def Keyboard(code, flags=0): return Input(KeybdInput(code, flags)) def Hardware(message, parameter=0): return Input(HardwareInput(message, parameter)) ################################################################################ UPPER = frozenset('~!@#$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:"ZXCVBNM<>?') LOWER = frozenset("`1234567890-=qwertyuiop[]\\asdfghjkl;'zxcvbnm,./") ORDER = string.ascii_letters + string.digits + " \b\r\t" ALTER = dict(__builtin__.zip("!@#$%^&*()", "1234567890")) OTHER = { "`": VK_OEM_3, "~": VK_OEM_3, "-": VK_OEM_MINUS, "_": VK_OEM_MINUS, "=": VK_OEM_PLUS, "+": VK_OEM_PLUS, "[": VK_OEM_4, "{": VK_OEM_4, "]": VK_OEM_6, "}": VK_OEM_6, "\\": VK_OEM_5, "|": VK_OEM_5, ";": VK_OEM_1, ":": VK_OEM_1,