def parse(self, response): if response.status == 200: # Replace the nested <a> tags new_body = sub(REPLACE, lambda x: x.group(1), response.body) response = response.replace(body = new_body) url_params = search(EXPRESSION, response.url) chan = intern(url_params.group(1)) ar_time_match = search(ARCHIVE_TIME, response.request.url) archive_time = datetime.datetime.strptime(ar_time_match.group(1), "%Y%m%d%H%M%S") for row in response.xpath('//div[@class="container"]//tr'): elements = row.xpath("td") time_string = elements[0].xpath("a/@href").extract()[0].replace("#l", "") pytime = datetime.datetime.fromtimestamp(float(time_string), datetime.timezone.utc) try: username = intern(elements[1].xpath("text()").extract()[0]) except IndexError: username = "" extracted_text = elements[2].xpath(".//text()").extract() text = "".join(extracted_text) item = WaybackArchive() item["time"] = pytime item["chan"] = chan item["username"] = username item["text"] = text item["archive_time"] = archive_time yield item
def __set_values_from_fields(self): """ Private method that sets the correct values from the fields derived from the input line. :return: """ self.chrom, self.start, self.end, \ self.name, self.score, self.strand, \ self.thick_start, self.thick_end, self.rgb, \ self.block_count, block_sizes, block_starts = self._fields # Reduce memory usage intern(self.chrom) self.start = int(self.start) + 1 self.end = int(self.end) self.score = float(self.score) self.thick_start = int(self.thick_start) + 1 self.thick_end = int(self.thick_end) self.block_count = int(self.block_count) self.block_sizes = [int(x) for x in block_sizes.split(",")] self.block_starts = [int(x) for x in block_starts.split(",")] self.has_start_codon = None self.has_stop_codon = None self.start_codon = None self.stop_codon = None self.fasta_length = len(self) return
def _mapf_signal(member): mname=member.name iname=member.interface._name mtype=member.signal_type mtype_read=mtype.unpack myKeyError=KeyError Bus_signal=Bus.signal def cb(callback, slot, msg): callback(*mtype_read(msg)) cb_get=cb.__get__ def cbm(callback, slot, msg): callback(msg, *mtype_read(msg)) cbm_get=cbm.__get__ def onsignal(_pybus_bound, callback): bus,destination,path,data=_pybus_bound return Bus_signal(bus, destination, path, iname, mname, cb_get(callback)) def onsignalm(_pybus_bound, callback): bus,destination,path,data=_pybus_bound return Bus_signal(bus, destination, path, iname, mname, cbm_get(callback)) onsignal.__name__=_sys.intern(mname+'__onsignal') onsignal.__qualname__=iname+'.'+mname+'__onsignal' onsignalm.__name__=_sys.intern(mname+'__onsignalm') onsignalm.__qualname__=iname+'.'+mname+'__onsignalm' return (onsignal,onsignalm)
def parse(self, response): if response.status == 200: # Replace the nested <a> tags new_body = sub(REPLACE, lambda x: x.group(1), response.body) response = response.replace(body = new_body) url_params = search(EXPRESSION, response.url) chan = intern(url_params.group(1)) for row in response.xpath("//tr"): elements = row.xpath("td") time_string = elements[0].xpath("a/@href").extract()[0].replace("#l", "") pytime = datetime.datetime.fromtimestamp(float(time_string), datetime.timezone.utc) try: username = intern(elements[1].xpath("text()").extract()[0]) except IndexError: username = "" extracted_text = elements[2].xpath(".//text()").extract() text = "".join(extracted_text) item = BitcoinIrcItem() item["time"] = pytime item["chan"] = chan item["username"] = username item["text"] = text yield item
def __set_values_from_gff(self, fasta_length): """ Private method that sets the correct values from the fields derived from an input GFF line. :return: """ (self.chrom, self.thick_start, self.thick_end, self.strand, self.name) = (self._line.chrom, self._line.start, self._line.end, self._line.strand, self._line.id) intern(self.chrom) assert self.name is not None self.start = 1 self.end = fasta_length self.score = self._line.score self.rgb = None self.block_count = 1 self.block_sizes = [self.thick_end - self.thick_start +1] self.block_starts = [self.thick_start] self.has_start_codon = None self.has_stop_codon = None self.start_codon = None self.stop_codon = None self.fasta_length = fasta_length return
def parse_GFF_attribute_string(attrStr, extra_return_first_value=False): """Parses a GFF attribute string and returns it as a dictionary. If 'extra_return_first_value' is set, a pair is returned: the dictionary and the value of the first attribute. This might be useful if this is the ID. """ if attrStr.endswith("\n"): attrStr = attrStr[:-1] d = {} first_val = "_unnamed_" for (i, attr) in zip( itertools.count(), _HTSeq.quotesafe_split(attrStr.encode())): attr = attr.decode() if _re_attr_empty.match(attr): continue if attr.count('"') not in (0, 2): raise ValueError( "The attribute string seems to contain mismatched quotes.") mo = _re_attr_main.match(attr) if not mo: raise ValueError("Failure parsing GFF attribute line") val = mo.group(2) if val.startswith('"') and val.endswith('"'): val = val[1:-1] d[sys.intern(mo.group(1))] = sys.intern(val) if extra_return_first_value and i == 0: first_val = val if extra_return_first_value: return (d, first_val) else: return d
def load_dict(self, state, exclude_utr=False, protein_coding=False): for key in ["chrom", "source", "start", "end", "strand", "id"]: setattr(self, key, state[key]) for tid, tvalues in state["transcripts"].items(): transcript = Transcript(logger=self.logger) transcript.load_dict(tvalues) transcript.finalize() if protein_coding is True and transcript.is_coding is False: self.logger.debug("{0} is non coding ({1}, {2})".format( transcript.id, transcript.combined_cds, transcript.segments)) continue if exclude_utr is True: has_utrs = (transcript.utr_length > 0) transcript.remove_utrs() if has_utrs is True and (transcript.utr_length > 0): raise AssertionError("Failed to remove the UTRs!") self.transcripts[tid] = transcript self.chrom = intern(self.chrom) self.source = intern(self.source) self.id = intern(self.id) return
def parse_parts(self, parts): parsed = [] sep = self.sep altsep = self.altsep drv = root = '' it = reversed(parts) for part in it: if not part: continue if altsep: part = part.replace(altsep, sep) drv, root, rel = self.splitroot(part) if sep in rel: for x in reversed(rel.split(sep)): if x and x != '.': parsed.append(sys.intern(x)) else: if rel and rel != '.': parsed.append(sys.intern(rel)) if drv or root: if not drv: # If no drive is present, try to find one in the previous # parts. This makes the result of parsing e.g. # ("C:", "/", "a") reasonably intuitive. for part in it: drv = self.splitroot(part)[0] if drv: break break if drv or root: parsed.append(drv + root) parsed.reverse() return drv, root, parsed
def read_sources_file(filename, sources=None, intern=sys.intern): """Parse a single Sources file into a hash Parse a single Sources file into a dict mapping a source package name to a SourcePackage object. If there are multiple source packages with the same version, then highest versioned source package (that is not marked as "Extra-Source-Only") is the version kept in the dict. :param filename: Path to the Sources file. Can be compressed by any algorithm supported by apt_pkg.TagFile :param sources: Optional dict to add the packages to. If given, this is also the value returned. :param intern: Internal optimisation / implementation detail to avoid python's "LOAD_GLOBAL" instruction in a loop :return a dict mapping a name to a source package """ if sources is None: sources = {} tag_file = apt_pkg.TagFile(filename) get_field = tag_file.section.get step = tag_file.step while step(): if get_field('Extra-Source-Only', 'no') == 'yes': # Ignore sources only referenced by Built-Using continue pkg = get_field('Package') ver = get_field('Version') # There may be multiple versions of the source package # (in unstable) if some architectures have out-of-date # binaries. We only ever consider the source with the # largest version for migration. if pkg in sources and apt_pkg.version_compare(sources[pkg][0], ver) > 0: continue maint = get_field('Maintainer') if maint: maint = intern(maint.strip()) section = get_field('Section') if section: section = intern(section.strip()) build_deps_arch = ", ".join(x for x in (get_field('Build-Depends'), get_field('Build-Depends-Arch')) if x is not None) if build_deps_arch != '': build_deps_arch = sys.intern(build_deps_arch) else: build_deps_arch = None build_deps_indep = get_field('Build-Depends-Indep') if build_deps_indep is not None: build_deps_indep = sys.intern(build_deps_indep) sources[intern(pkg)] = SourcePackage(intern(ver), section, set(), maint, False, build_deps_arch, build_deps_indep, get_field('Testsuite', '').split(), get_field('Testsuite-Triggers', '').replace(',', '').split(), ) return sources
def _read_episode(self, data_generator): """Reads one episode at a time from the provided iterator over entries. """ episode = [] last_cands = None for entry, new in data_generator: if new and len(episode) > 0: yield tuple(episode) episode = [] last_cands = None # intern all strings so we don't store them more than once new_entry = [] if len(entry) > 0: # process text if available if entry[0] is not None: new_entry.append(sys.intern(entry[0])) else: new_entry.append(None) if len(entry) > 1: # process labels if available if entry[1] is None: new_entry.append(None) elif hasattr(entry[1], '__iter__') and type(entry[1]) is not str: # make sure iterable over labels, not single string new_entry.append(tuple(sys.intern(e) for e in entry[1])) else: raise TypeError('Must provide iterable over labels, not a single string.') if len(entry) > 2: # process reward if available if entry[2] is not None: new_entry.append(entry[2]) else: new_entry.append(None) if len(entry) > 3: # process label candidates if available if entry[3] is None: new_entry.append(None) elif last_cands and entry[3] is last_cands: # if cands are shared, say "same" so we # don't store them again new_entry.append( sys.intern('same as last time')) elif hasattr(entry[3], '__iter__') and type(entry[3]) is not str: # make sure iterable over candidates, not single string last_cands = entry[3] new_entry.append(tuple( sys.intern(e) for e in entry[3])) else: raise TypeError('Must provide iterable over label candidates, not a single string.') if len(entry) > 4 and entry[4] is not None: new_entry.append(sys.intern(entry[4])) episode.append(tuple(new_entry)) if len(episode) > 0: yield tuple(episode)
def set_identifier(self, identifier): self._identifier = str(identifier) sys.intern(self._identifier) # identifier_first_part represents the part of the name in front of the first dot (if any), eg. for myfamily.myvar it would represent myfamily if '.' in identifier: self.identifier_first_part = identifier[:identifier.index('.')] self.identifier_last_part = identifier[identifier.index('.'):] else: self.identifier_first_part = identifier self.identifier_last_part = ''
def test_sys_intern(self): """ Py2's builtin intern() has been moved to the sys module. Tests whether sys.intern is available. """ from sys import intern if utils.PY3: self.assertEqual(intern('hello'), 'hello') else: # intern() requires byte-strings on Py2: self.assertEqual(intern(b'hello'), b'hello')
def _reduce_memory_dict(old_dict): new_dict = dict() for (k, v) in iteritems(old_dict): if type(k) is str: k = intern(k) if type(v) is str: v = intern(v) elif type(v) is dict: # This handles [{'Caller': ..., 'DebugLoc': { 'File': ... }}] v = _reduce_memory_dict(v) new_dict[k] = v return tuple(new_dict.items())
def loadAlignedParts(self, db): "load lists of existing aligned partitions for the updated, if not already done" if self.alignDb != db: self.alignDb = db self.alignParts = [] self.alignMap = {} alnDir = "data/aligned/" + self.rel + "/" + db + "/" + self for alIdx in globSort(alnDir + "/mrna.*.alidx"): names = os.path.basename(alIdx).split(".") self._addAlignedPart(MRNA, sys.intern(names[1])) for alIdx in globSort(alnDir + "/est.*.*.alidx"): names = os.path.basename(alIdx).split(".") self._addAlignedPart(EST, sys.intern(names[2]), sys.intern(names[1]))
def create_from_describe(vardict, index): """Create P4File from p4 describe Describe does not report the client path, but that will be reported later by p4 sync and set on the P4File at that time. """ f = P4File() f.depot_path = sys.intern(vardict["depotFile"][index]) f.type = sys.intern(vardict["type"][index]) f.action = sys.intern(vardict["action"][index]) f._revision = int(vardict["rev"][index]) return f
def format_stack_trace(frame, thread_category): """Formats the frame obj into a list of stack trace tuples. """ stack_trace = deque() while frame: # The value frame.f_code.co_firstlineno is the first line of # code in the file for the specified function. The value # frame.f_lineno is the actual line which is being executed # at the time the stack frame was being viewed. code = frame.f_code filename = intern(code.co_filename) func_name = intern(code.co_name) first_line = code.co_firstlineno real_line = frame.f_lineno # Set ourselves up to process next frame back up the stack. frame = frame.f_back # So as to make it more obvious to the user as to what their # code is doing, we drop out stack frames related to the # agent instrumentation. Don't do this for the agent threads # though as we still need to seem them in that case so can # debug what the agent itself is doing. if (thread_category != 'AGENT' and filename.startswith(AGENT_PACKAGE_DIRECTORY)): continue if not stack_trace: # Add the fake leaf node with line number of where the # code was executing at the point of the sample. This # could be actual Python code within the function, or # more likely showing the point where a call is being # made into a C function wrapped as Python object. The # latter can occur because we will not see stack frames # when calling into C functions. stack_trace.appendleft((filename, func_name, real_line, real_line)) # Add the actual node for the function being called at this # level in the stack frames. stack_trace.appendleft((filename, func_name, first_line, real_line)) return stack_trace
def __init__(self, next_attr_name=None, prev_attr_name=None): """Initializes this list. next_attr_name: The name of the attribute that holds a reference to the next item in the list. prev_attr_name: the name of the attribute that holds a reference to the previous item in the list. """ # Keep an interned version of the attribute names. This should # speed up the process of looking up the attributes. self.next_name = intern(next_attr_name) self.prev_name = intern(prev_attr_name)
def __init__(self, row): self.taxId = int(row[0]) self.parentTaxId = int(row[1]) self.rank = sys.intern(row[2]) self.emblCode = sys.intern(row[3]) self.divisionId = int(row[4]) self.inheritedDivFlag = bool(row[5]) self.geneticCodeId = int(row[6]) self.inheritedGCflag = bool(row[7]) self.mitochondrialGeneticCodeId = int(row[8]) self.inheritedMGCflag = bool(row[9]) self.genBankHiddenFlag = bool(row[10]) self.hiddenSubtreeRootFlag = bool(row[11]) self.comments = row[12]
def dig_node(node, parent_summary_builder, child_literals_holder): if node is None: return elif isinstance(node, list): n0 = node[0] assert n0 in (ct.ORDERED_AND, ct.ORDERED_OR) for subn in node[1:]: dig_node(subn, parent_summary_builder, child_literals_holder) return elif isinstance(node, ct.CallNode): invoked = node.invoked lits = invoked.literals lits and parent_summary_builder.extend_literal(lits) lbl = callnode_label(node) if lbl not in parent_summary_builder.already_appended_callnodes: stack.append(lbl) nodesum = summary_table.get(lbl) if nodesum is None: sb = SummaryBuilder() clh = [] subnode = node.body if subnode is None: pass elif isinstance(subnode, (list, ct.CallNode)): dig_node(subnode, sb, clh) elif isinstance(subnode, ct.Invoked): sb.append_callee(intern(subnode.callee)) lits = subnode.literals if lits: sb.extend_literal(lits) clh.append(lits) else: assert False nodesum = sb.to_summary() nodesum.literals = intern_literals(nodesum.literals, clh) summary_table[lbl] = nodesum parent_summary_builder.append_summary(nodesum, lbl) parent_summary_builder.append_callee(invoked.callee) child_literals_holder.append(nodesum.literals) stack.pop() return elif isinstance(node, ct.Invoked): parent_summary_builder.append_callee(intern(node.callee)) if node.literals: parent_summary_builder.extend_literal(node.literals) child_literals_holder.append(node.literals) else: assert False
def parse_full(cls, line_string): match = cls._line_regex.match(line_string.decode('utf8', errors='replace')) if match is None: # raise ValueError ("not a valid log line (%r)" % (line_string,)) groups = [0, 0, 0, 0, "", "", 0, "", "", 0] return cls(groups) line = cls(match.groups()) # Timestamp. line[0] = parse_time(line[0]) # PID. line[1] = int(line[1]) # Thread. line[2] = int(line[2], 16) # Level (this is handled in LineCache). line[3] = 0 # Line. line[6] = int(line[6]) # Message start offset. line[9] = match.start(9 + 1) for col_id in (4, # COL_CATEGORY 5, # COL_FILENAME 7, # COL_FUNCTION, 8,): # COL_OBJECT line[col_id] = sys.intern(line[col_id] or "") return line
def string_from_print(d): """create a string from p4 print dict This is a noop for unicode servers, because p4python returns strings. But for non-unicode servers, when running 'p4 print' we use "raw" encoding with p4python to avoid mangling file content, so we get back bytes from p4python, which need to be decoded according to the locale encoding""" if type(d) == str: return sys.intern(d) try: return sys.intern(d.decode(locale.nl_langinfo(locale.CODESET))) except UnicodeDecodeError: replaced = d.decode(locale.nl_langinfo(locale.CODESET), 'replace').replace('\ufffd', '?') msg = _('Error decoding file path: {}').format(replaced) raise RuntimeError(msg)
def read_phones(path, dialect, sr = None): output = [] with open(path,'r') as file_handle: if dialect == 'timit': if sr is None: sr = 16000 for line in file_handle: l = line.strip().split(' ') start = float(l[0]) end = float(l[1]) label = l[2] if sr is not None: start /= sr end /= sr output.append(BaseAnnotation(label, begin, end)) elif dialect == 'buckeye': header_pattern = re.compile("#\r{0,1}\n") line_pattern = re.compile("\s+\d{3}\s+") label_pattern = re.compile(" {0,1};| {0,1}\+") f = header_pattern.split(file_handle.read())[1] flist = f.splitlines() begin = 0.0 for l in flist: line = line_pattern.split(l.strip()) end = float(line[0]) label = sys.intern(label_pattern.split(line[1])[0]) output.append(BaseAnnotation(label, begin, end)) begin = end else: raise(NotImplementedError) return output
def read_phones(path): """ From a buckeye file, reads the phone lines, appends label, begin, and end to output Parameters ---------- path : str path to file Returns ------- output : list of tuples each tuple is label, begin, end for a phone """ output = [] with open(path,'r') as file_handle: header_pattern = re.compile("#\r{0,1}\n") line_pattern = re.compile("\s+\d{3}\s+") label_pattern = re.compile(" {0,1};| {0,1}\+") f = header_pattern.split(file_handle.read())[1] flist = f.splitlines() begin = 0.0 for l in flist: line = line_pattern.split(l.strip()) try: end = float(line[0]) except ValueError: # Missing phone label print('Warning: no label found in line: \'{}\''.format(l)) continue label = sys.intern(label_pattern.split(line[1])[0]) output.append((label, begin, end)) begin = end return output
def __init__(self, name): if isinstance(name, Property): self._name = name._name self._hash = name._hash else: self._name = intern(name) self._hash = id(self._name)
def read_words(path, dialect, sr = None): output = list() with open(path,'r') as file_handle: if dialect == 'buckeye': f = re.split(r"#\r{0,1}\n",file_handle.read())[1] line_pattern = re.compile("; | \d{3} ") begin = 0.0 flist = f.splitlines() for l in flist: line = line_pattern.split(l.strip()) end = float(line[0]) word = sys.intern(line[1]) if word[0] != "<" and word[0] != "{": try: citation = line[2].split(' ') phonetic = line[3].split(' ') category = line[4] except: citation = None phonetic = None category = None else: citation = None phonetic = None category = None if word in FILLERS: category = 'UH' line = {'spelling':word,'begin':begin,'end':end, 'transcription':citation,'surface_transcription':phonetic, 'category':category} output.append(line) begin = end else: raise(NotImplementedError) return output
def __init__(self, name, positive_properties=None, negative_properties=None): if not isinstance(name, str): raise TypeError(name, str) self._name = intern(name) self._positive_properties = ( frozenset([Property(prop) for prop in positive_properties]) if positive_properties else frozenset() ) self._negative_properties = ( frozenset([Property(prop) for prop in negative_properties]) if negative_properties else frozenset() ) if self._positive_properties & self._negative_properties: raise ValueError("Property is both positive and negative.") # This works because we intern the name & properties beforehand: self._hash = ( id(self._name) ^ reduce( lambda a, b: a ^ hash(b), self._positive_properties, 0) ^ reduce( lambda a, b: a ^ -hash(b), self._negative_properties, 0 ) )
def _read_section_data(self, leaf, data): records = [] while data: contig, start, end = unpack("=III", data[:12]) data = data[12:] _null = data.find(b'\0') fields = data[:_null].split(b'\t') name = fields[0] if fields else sys.intern('.') score = float(fields[1]) if len(fields) > 1 else numpy.nan strand = fields[2] if len(fields) > 2 else sys.intern('.') rest = tuple(fields[3:]) if len(fields) > 3 else None record = BED(self._contig_by_id[contig].name, start, end, name, score, strand, rest) records.append(record) data = data[(_null+1):] return records
def _extract_path(s, encoding): pos = len(s) count = 3 while count > 0 and pos > 0 and s[pos - 1] == 0: pos -= 1 count -= 1 return intern(s[0:pos].decode(encoding))
def unique_names(self): names = set() for t in self.techniques: while t.name in names: t.name += '~' t.name = sys.intern(t.name) names.add(t.name)
def tokenize(self, doc, stop_rule=lambda token: False): """ Tokenizes a document. This is a very naive tokenizer; i.e. it has no stop words, since we need those words to generate convincing speech. It also strips punctuation from the beginning and end of tokens, except for '@' at the beginning of a token. Optionally provide a `stop_rule` function, which should return True if a token should be stopped on. """ tokens = [] punctuation = string.punctuation.replace('@', '') + '“”‘’–"' for token in doc.split(' '): # This saves memory by having # duplicate strings just point to the same memory. token = sys.intern(token.strip(punctuation)) # Ignore punctuation and stopwords if not token or stop_rule(token): continue tokens.append(token.lower()) return tokens
# we use the unicode identifier rule if this python version is able # to handle unicode identifiers, otherwise the standard ASCII one. try: compile('föö', '<unknown>', 'eval') except SyntaxError: name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b') else: from jinja2 import _stringdefs name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start, _stringdefs.xid_continue)) float_re = re.compile(r'(?<!\.)\d+\.\d+') newline_re = re.compile(r'(\r\n|\r|\n)') # internal the tokens and keep references to them TOKEN_ADD = sys.intern('add') TOKEN_ASSIGN = sys.intern('assign') TOKEN_COLON = sys.intern('colon') TOKEN_COMMA = sys.intern('comma') TOKEN_DIV = sys.intern('div') TOKEN_DOT = sys.intern('dot') TOKEN_EQ = sys.intern('eq') TOKEN_FLOORDIV = sys.intern('floordiv') TOKEN_GT = sys.intern('gt') TOKEN_GTEQ = sys.intern('gteq') TOKEN_LBRACE = sys.intern('lbrace') TOKEN_LBRACKET = sys.intern('lbracket') TOKEN_LPAREN = sys.intern('lparen') TOKEN_LT = sys.intern('lt') TOKEN_LTEQ = sys.intern('lteq') TOKEN_MOD = sys.intern('mod')
def read_data( input_file, method, omit_bias=False, omit_lexemes=False, max_lines=None, bins=None, seed=-1, training_fraction=1.0, ): # read learning trace data in specified format, see README for details sys.stderr.write("reading data...") if method == "hlr-pw": num_quantiles = len(bins) - 1 quantile_intervals = list(zip(bins[:-1], bins[1:])) else: num_quantiles, quantile_intervals = None, [] instances = list() if input_file.endswith("gz"): f = gzip.open(input_file, "rb") else: f = open(input_file, "r") reader = csv.DictReader(f) for i, row in enumerate(reader): if max_lines is not None and i >= max_lines: break p = pclip(float(row["p_recall"])) t = float(row["delta"]) / (60 * 60 * 24) # convert time delta to days h = hclip(-t / (math.log(p, 2))) lang = "%s->%s" % (row["ui_language"], row["learning_language"]) # lexeme_id = row['lexeme_id'] lexeme_string = row["lexeme_string"] timestamp = int(row["timestamp"]) user_id = row["user_id"] seen = int(row["history_seen"]) right = int(row["history_correct"]) wrong = seen - right right_this = int(row["session_correct"]) wrong_this = int(row["session_seen"]) - right_this # feature vector is a list of (feature, value) tuples fv = [] # core features based on method if method == "leitner": fv.append((intern("diff"), right - wrong)) elif method == "pimsleur": fv.append((intern("total"), right + wrong)) elif method == "hlr" or method == "power": fv.append((intern("right"), right)) fv.append((intern("wrong"), wrong)) # fv.append((intern('right'), math.sqrt(1+right))) # fv.append((intern('wrong'), math.sqrt(1+wrong))) elif method == "hlr-pw": # Now need to fill in the right_{quantile} for each row. for q in range(num_quantiles): in_this_quantile = (quantile_intervals[q][0] <= t < quantile_intervals[q][1]) fv.append(("right_%d" % q, right if in_this_quantile else 0)) fv.append(("wrong_%d" % q, wrong if in_this_quantile else 0)) else: raise Exception("Unknown method {}".format(method)) # optional flag features if method == "lr": fv.append((intern("time"), t)) if not omit_bias: fv.append((intern("bias"), 1.0)) if not omit_lexemes: # fv.append((intern('%s:%s' % (row['learning_language'], lexeme_string)), 1.)) # Remove the 'de:' prefix. fv.append((intern(lexeme_string), 1.0)) instances.append( Instance( p, t, fv, h, (right + 2.0) / (seen + 4.0), lang, right_this, wrong_this, timestamp, user_id, lexeme_string, )) if i % 1000000 == 0: sys.stderr.write("%d..." % i) sys.stderr.write("done!\n") splitpoint = int(0.9 * len(instances)) if seed > 0: sys.stderr.write("Shuffling with seed %d.\n" % seed) random.seed(seed) random.shuffle(instances) training = instances[:int(splitpoint * training_fraction)] testing = instances[splitpoint:] return training, testing
def apply_transform(bug): is_couple = isinstance(bug, tuple) if not is_couple: bug_id = bug["id"] if self.rollback and bug_id not in already_rollbacked: bug = bug_snapshot.rollback(bug, self.rollback_when) already_rollbacked.add(bug_id) else: bug1_id = bug[0]["id"] bug2_id = bug[1]["id"] if self.rollback: if bug1_id not in already_rollbacked: bug[0] = bug_snapshot.rollback(bug[0], self.rollback_when) already_rollbacked.add(bug1_id) if bug2_id not in already_rollbacked: bug[1] = bug_snapshot.rollback(bug[1], self.rollback_when) already_rollbacked.add(bug2_id) data = {} for feature_extractor in self.feature_extractors: res = None if isinstance(feature_extractor, single_bug_feature) and not is_couple: res = feature_extractor( bug, reporter_experience=reporter_experience_map[ bug["creator"]], author_ids=author_ids, ) elif isinstance(feature_extractor, couple_bug_feature) and is_couple: res = feature_extractor(bug) if hasattr(feature_extractor, "name"): feature_extractor_name = feature_extractor.name else: feature_extractor_name = feature_extractor.__class__.__name__ if res is None: continue if isinstance(res, (list, set)): for item in res: data[sys.intern( f"{item} in {feature_extractor_name}")] = "True" continue if isinstance(res, bool): res = str(res) data[feature_extractor_name] = res if is_couple: reporter_experience_map[bug[0]["creator"]] += 1 reporter_experience_map[bug[1]["creator"]] += 1 return {"data": data} else: reporter_experience_map[bug["creator"]] += 1 # TODO: Try simply using all possible fields instead of extracting features manually. summary = bug["summary"] comments = [c["text"] for c in bug["comments"]] for cleanup_function in self.cleanup_functions: summary = cleanup_function(summary) comments = [ cleanup_function(comment) for comment in comments ] return { "data": data, "title": summary, "first_comment": "" if len(comments) == 0 else comments[0], "comments": " ".join(comments), }
class_namespace = { '__doc__': f'{typename}({arg_list})', '__slots__': (), '_fields': field_names, '_field_defaults': field_defaults, # alternate spelling for backward compatibility '_fields_defaults': field_defaults, '__new__': __new__, '_make': _make, '_replace': _replace, '__repr__': __repr__, '_asdict': _asdict, '__getnewargs__': __getnewargs__, } for index, name in enumerate(field_names): doc = _sys.intern(f'Alias for field number {index}') class_namespace[name] = _tuplegetter(index, doc) result = type(typename, (tuple, ), class_namespace) # For pickling to work, the __module__ variable needs to be set to the frame # where the named tuple is created. Bypass this step in environments where # sys._getframe is not defined (Jython for example) or sys._getframe is not # defined for arguments greater than 0 (IronPython), or where the user has # specified a particular module. if module is None: try: module = _sys._getframe(1).f_globals.get('__name__', '__main__') except (AttributeError, ValueError): pass if module is not None:
def _unpack(self, execute=EX_CONSTRUCT): typ, n, obj = self._read_header(execute) if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: raise ValueError("Expected array") return n if execute == EX_READ_MAP_HEADER: if typ != TYPE_MAP: raise ValueError("Expected map") return n # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call `list_hook` self._unpack(EX_SKIP) return ret = newlist_hint(n) for i in xrange(n): ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) # TODO is the interaction between `list_hook` and `use_list` ok? return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call hooks self._unpack(EX_SKIP) self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in xrange(n) ) else: ret = {} for _ in xrange(n): key = self._unpack(EX_CONSTRUCT) if self._strict_map_key and type(key) not in (unicode, bytes): raise ValueError( "%s is not allowed for map key" % str(type(key)) ) if not PY2 and type(key) is str: key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: ret = self._object_hook(ret) return ret if execute == EX_SKIP: return if typ == TYPE_RAW: if self._raw: obj = bytes(obj) else: obj = obj.decode("utf_8", self._unicode_errors) return obj if typ == TYPE_BIN: return bytes(obj) if typ == TYPE_EXT: if n == -1: # timestamp ts = Timestamp.from_bytes(bytes(obj)) if self._timestamp == 1: return ts.to_unix() elif self._timestamp == 2: return ts.to_unix_nano() elif self._timestamp == 3: return ts.to_datetime() else: return ts else: return self._ext_hook(n, bytes(obj)) assert typ == TYPE_IMMEDIATE return obj
float_re = re.compile( r""" (?<!\.) # doesn't start with a . (\d+_)*\d+ # digits, possibly _ separated ( (\.(\d+_)*\d+)? # optional fractional part e[+\-]?(\d+_)*\d+ # exponent part | \.(\d+_)*\d+ # required fractional part ) """, re.IGNORECASE | re.VERBOSE, ) # internal the tokens and keep references to them TOKEN_ADD = intern("add") TOKEN_ASSIGN = intern("assign") TOKEN_COLON = intern("colon") TOKEN_COMMA = intern("comma") TOKEN_DIV = intern("div") TOKEN_DOT = intern("dot") TOKEN_EQ = intern("eq") TOKEN_FLOORDIV = intern("floordiv") TOKEN_GT = intern("gt") TOKEN_GTEQ = intern("gteq") TOKEN_LBRACE = intern("lbrace") TOKEN_LBRACKET = intern("lbracket") TOKEN_LPAREN = intern("lparen") TOKEN_LT = intern("lt") TOKEN_LTEQ = intern("lteq") TOKEN_MOD = intern("mod")
"""String interning machinery.""" import sys last_letter = "d" a = sys.intern("Hello World") b = sys.intern("Hello Worl" + last_letter) print("The ID of a: {}".format(id(a))) print("The ID of b: {}".format(id(b))) print("a is b? {}".format(a is b))
def position(i): return sys.intern('p%d' % (i, ))
def value(i): return sys.intern('n%d' % (i, ))
while True: N,=read_vals() if N == 0: break start, target = input().split() G = defaultdict(list) for _ in range(N): toks = input().split() toks = [sys.intern(t) for t in toks] node = toks[0] toks = toks[2:-1] cost = len(list(filter(lambda x: x[0] == '"', toks))) ch = list(filter(lambda x: x[0] != '"', toks)) goal = target in toks G[node].append((cost,ch,goal)) #print("G", G) mincosts = {n:-2 for n in G} gcosts = {n:-2 for n in G} visited = set()#start] def visit(n,visited_,mincosts_): ##print("visit", n, "visited=", visited)
def namedtuple(typename, field_names, *, rename=False, defaults=None, module=None): """Returns a new subclass of tuple with named fields. >>> Point = namedtuple('Point', ['x', 'y']) >>> Point.__doc__ # docstring for the new class 'Point(x, y)' >>> p = Point(11, y=22) # instantiate with positional args or keywords >>> p[0] + p[1] # indexable like a plain tuple 33 >>> x, y = p # unpack like a regular tuple >>> x, y (11, 22) >>> p.x + p.y # fields also accessible by name 33 >>> d = p._asdict() # convert to a dictionary >>> d['x'] 11 >>> Point(**d) # convert from a dictionary Point(x=11, y=22) >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields Point(x=100, y=22) """ # Validate the field names. At the user's option, either generate an error # message or automatically replace the field name with a valid name. if isinstance(field_names, str): field_names = field_names.replace(',', ' ').split() field_names = list(map(str, field_names)) typename = _sys.intern(str(typename)) if rename: seen = set() for index, name in enumerate(field_names): if (not name.isidentifier() or _iskeyword(name) or name.startswith('_') or name in seen): field_names[index] = f'_{index}' seen.add(name) for name in [typename] + field_names: if type(name) is not str: raise TypeError('Type names and field names must be strings') if not name.isidentifier(): raise ValueError('Type names and field names must be valid ' f'identifiers: {name!r}') if _iskeyword(name): raise ValueError('Type names and field names cannot be a ' f'keyword: {name!r}') seen = set() for name in field_names: if name.startswith('_') and not rename: raise ValueError('Field names cannot start with an underscore: ' f'{name!r}') if name in seen: raise ValueError(f'Encountered duplicate field name: {name!r}') seen.add(name) field_defaults = {} if defaults is not None: defaults = tuple(defaults) if len(defaults) > len(field_names): raise TypeError('Got more default values than field names') field_defaults = dict( reversed(list(zip(reversed(field_names), reversed(defaults))))) # Variables used in the methods and docstrings field_names = tuple(map(_sys.intern, field_names)) num_fields = len(field_names) arg_list = repr(field_names).replace("'", "")[1:-1] repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')' tuple_new = tuple.__new__ _dict, _tuple, _len, _map, _zip = dict, tuple, len, map, zip # Create all the named tuple methods to be added to the class namespace s = f'def __new__(_cls, {arg_list}): return _tuple_new(_cls, ({arg_list}))' namespace = {'_tuple_new': tuple_new, '__name__': f'namedtuple_{typename}'} # Note: exec() has the side-effect of interning the field names exec(s, namespace) __new__ = namespace['__new__'] __new__.__doc__ = f'Create new instance of {typename}({arg_list})' if defaults is not None: __new__.__defaults__ = defaults @classmethod def _make(cls, iterable): result = tuple_new(cls, iterable) if _len(result) != num_fields: raise TypeError( f'Expected {num_fields} arguments, got {len(result)}') return result _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence ' 'or iterable') def _replace(self, /, **kwds): result = self._make(_map(kwds.pop, field_names, self)) if kwds: raise ValueError(f'Got unexpected field names: {list(kwds)!r}') return result
def _read_episode(self, data_loader): """ Read one episode at a time from the provided iterable over entries. :param data_loader: an iterable which returns tuples in the format described in the class docstring. """ episode = [] last_cands = None for entry, new in data_loader: if new and len(episode) > 0: yield tuple(episode) episode = [] last_cands = None # intern all strings so we don't store them more than once # TODO: clean up the if .. sys.intern else None by refactoring new_entry = [] if len(entry) > 0: # process text if available if entry[0] is not None: new_entry.append(sys.intern(entry[0])) else: new_entry.append(None) # TODO: unindent all of these one level. if len(entry) > 1: # process labels if available if entry[1] is None: new_entry.append(None) elif hasattr(entry[1], '__iter__') and type( entry[1]) is not str: # TODO: this could use the abc collections # make sure iterable over labels, not single string new_entry.append(tuple( sys.intern(e) for e in entry[1])) else: raise TypeError( 'Must provide iterable over labels, not a single string.' ) if len(entry) > 2: # process reward if available if entry[2] is not None: new_entry.append(entry[2]) else: new_entry.append(None) if len(entry) > 3: # process label candidates if available if entry[3] is None: new_entry.append(None) elif last_cands and entry[3] is last_cands: # if cands are shared, say "same" so we # don't store them again # TODO: This is bad, and it's not actually used anywhere # DEPRECATIONDAY: make this more rational new_entry.append(sys.intern('same as last time')) elif hasattr(entry[3], '__iter__') and type( entry[3]) is not str: # make sure iterable over candidates, not single string last_cands = entry[3] new_entry.append(tuple( sys.intern(e) for e in entry[3])) else: raise TypeError( 'Must provide iterable over label candidates, ' 'not a single string.') if len(entry) > 4 and entry[4] is not None: new_entry.append(sys.intern(entry[4])) episode.append(tuple(new_entry)) if len(episode) > 0: yield tuple(episode)
from vcweb.core import signals, simplecache from vcweb.core.models import (DefaultValue, ExperimentMetadata, Parameter, ParticipantRoundDataValue, GroupRelationship, RoundConfiguration, get_participant_ready_parameter) from vcweb.experiment.forestry.models import ( get_harvest_decision_parameter, get_harvest_decision, get_group_harvest_parameter, get_reset_resource_level_parameter, get_regrowth_parameter, get_initial_resource_level_parameter, set_resource_level as forestry_set_resource_level, set_harvest_decision as forestry_set_harvest_decision, get_resource_level_parameter, get_resource_level_dv as get_unshared_resource_level_dv, get_group_harvest_dv, get_regrowth_dv,) logger = logging.getLogger(__name__) # FIXME: hacky, figure out a better way to bind module with its dependent # ExperimentMetadata instance EXPERIMENT_METADATA_NAME = sys.intern('bound') # constants that should live in configuration as well MAX_RESOURCE_LEVEL = 240 MAX_SHARED_RESOURCE_LEVEL = 480 INITIAL_RESOURCES_PER_PARTICIPANT_PER_ROUND = 3 ''' Experiment parameters and metadata accessors ''' set_harvest_decision = forestry_set_harvest_decision set_resource_level = forestry_set_resource_level @simplecache
"""Data structures and algorithms for generic expansion and refactorisation.""" from collections import Counter, OrderedDict, defaultdict, namedtuple from itertools import product from sys import intern from gem.node import Memoizer, traversal from gem.gem import Node, Zero, Product, Sum, Indexed, ListTensor, one from gem.optimise import (remove_componenttensors, sum_factorise, traverse_product, traverse_sum, unroll_indexsum, expand_conditional, make_rename_map, make_renamer) # Refactorisation labels ATOMIC = intern('atomic') """Label: the expression need not be broken up into smaller parts""" COMPOUND = intern('compound') """Label: the expression must be broken up into smaller parts""" OTHER = intern('other') """Label: the expression is irrelevant with regards to refactorisation""" Monomial = namedtuple('Monomial', ['sum_indices', 'atomics', 'rest']) """Monomial type, representation of a tensor product with some distinguished factors (called atomics). - sum_indices: indices to sum over - atomics: tuple of expressions classified as ATOMIC - rest: a single expression classified as OTHER
def intern_str(string): if six.PY3: return sys.intern(str(string)) return intern(str(string))
Created on Oct 5, 2010 @author: Mark V Systems Limited (c) Copyright 2010 Mark V Systems Limited, All rights reserved. ''' # initialize object from loaded linkbases from collections import defaultdict from arelle import ModelDtsObject, XbrlConst, XmlUtil, ModelValue from arelle.ModelObject import ModelObject from arelle.ModelDtsObject import ModelResource from arelle.PrototypeDtsObject import LocPrototype, PrototypeObject from arelle.XbrlConst import consecutiveArcrole import os, sys USING_EQUIVALENCE_KEY = sys.intern(_STR_8BIT( "using_equivalence_key")) # indicates hash entry replaced with keyed entry NoneType = type(None) def create(modelXbrl, arcrole, linkrole=None, linkqname=None, arcqname=None, includeProhibits=False): return ModelRelationshipSet(modelXbrl, arcrole, linkrole, linkqname, arcqname, includeProhibits) def ineffectiveArcs(baseSetModelLinks, arcrole, arcqname=None): hashEquivalentRels = defaultdict(list)
def plambda_intern(string): return sys.intern(string) if sys.version_info[0] > 2 else intern(string)
def load_interned(self): n = self.r_long() ret = intern(self._read(n)) self._stringtable.append(ret) return ret
def namedtuple(typename, field_names, *, rename=False, defaults=None, module=None): """Returns a new subclass of tuple with named fields. >>> Point = namedtuple('Point', ['x', 'y']) >>> Point.__doc__ # docstring for the new class 'Point(x, y)' >>> p = Point(11, y=22) # instantiate with positional args or keywords >>> p[0] + p[1] # indexable like a plain tuple 33 >>> x, y = p # unpack like a regular tuple >>> x, y (11, 22) >>> p.x + p.y # fields also accessible by name 33 >>> d = p._asdict() # convert to a dictionary >>> d['x'] 11 >>> Point(**d) # convert from a dictionary Point(x=11, y=22) >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields Point(x=100, y=22) """ # Validate the field names. At the user's option, either generate an error # message or automatically replace the field name with a valid name. if isinstance(field_names, str): field_names = field_names.replace(',', ' ').split() field_names = list(map(str, field_names)) typename = _sys.intern(str(typename)) if rename: seen = set() for index, name in enumerate(field_names): if (not name.isidentifier() or _iskeyword(name) or name.startswith('_') or name in seen): field_names[index] = f'_{index}' seen.add(name) for name in [typename] + field_names: if type(name) is not str: raise TypeError('Type names and field names must be strings') if not name.isidentifier(): raise ValueError('Type names and field names must be valid ' f'identifiers: {name!r}') if _iskeyword(name): raise ValueError('Type names and field names cannot be a ' f'keyword: {name!r}') seen = set() for name in field_names: if name.startswith('_') and not rename: raise ValueError('Field names cannot start with an underscore: ' f'{name!r}') if name in seen: raise ValueError(f'Encountered duplicate field name: {name!r}') seen.add(name) field_defaults = {} if defaults is not None: defaults = tuple(defaults) if len(defaults) > len(field_names): raise TypeError('Got more default values than field names') field_defaults = dict( reversed(list(zip(reversed(field_names), reversed(defaults))))) # Variables used in the methods and docstrings field_names = tuple(map(_sys.intern, field_names)) num_fields = len(field_names) arg_list = repr(field_names).replace("'", "")[1:-1] repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')' tuple_new = tuple.__new__ _len = len # Create all the named tuple methods to be added to the class namespace s = f'def __new__(_cls, {arg_list}): return _tuple_new(_cls, ({arg_list}))' namespace = {'_tuple_new': tuple_new, '__name__': f'namedtuple_{typename}'} # Note: exec() has the side-effect of interning the field names exec(s, namespace) __new__ = namespace['__new__'] __new__.__doc__ = f'Create new instance of {typename}({arg_list})' if defaults is not None: __new__.__defaults__ = defaults @classmethod def _make(cls, iterable): result = tuple_new(cls, iterable) if _len(result) != num_fields: raise TypeError( f'Expected {num_fields} arguments, got {len(result)}') return result _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence ' 'or iterable') def _replace(_self, **kwds): result = _self._make(map(kwds.pop, field_names, _self)) if kwds: raise ValueError(f'Got unexpected field names: {list(kwds)!r}') return result _replace.__doc__ = (f'Return a new {typename} object replacing specified ' 'fields with new values') def __repr__(self): 'Return a nicely formatted representation string' return self.__class__.__name__ + repr_fmt % self def _asdict(self): 'Return a new OrderedDict which maps field names to their values.' return OrderedDict(zip(self._fields, self)) def __getnewargs__(self): 'Return self as a plain tuple. Used by copy and pickle.' return tuple(self) # Modify function metadata to help with introspection and debugging for method in (__new__, _make.__func__, _replace, __repr__, _asdict, __getnewargs__): method.__qualname__ = f'{typename}.{method.__name__}' # Build-up the class namespace dictionary # and use type() to build the result class class_namespace = { '__doc__': f'{typename}({arg_list})', '__slots__': (), '_fields': field_names, '_fields_defaults': field_defaults, '__new__': __new__, '_make': _make, '_replace': _replace, '__repr__': __repr__, '_asdict': _asdict, '__getnewargs__': __getnewargs__, } cache = _nt_itemgetters for index, name in enumerate(field_names): try: itemgetter_object, doc = cache[index] except KeyError: itemgetter_object = _itemgetter(index) doc = f'Alias for field number {index}' cache[index] = itemgetter_object, doc class_namespace[name] = property(itemgetter_object, doc=doc) result = type(typename, (tuple, ), class_namespace) # For pickling to work, the __module__ variable needs to be set to the frame # where the named tuple is created. Bypass this step in environments where # sys._getframe is not defined (Jython for example) or sys._getframe is not # defined for arguments greater than 0 (IronPython), or where the user has # specified a particular module. if module is None: try: module = _sys._getframe(1).f_globals.get('__name__', '__main__') except (AttributeError, ValueError): pass if module is not None: result.__module__ = module return result
def intern_string(s): return intern(s)
def bin_fast5_file(f5_path, tax_annot_res_dir, sens, min_qual, min_qlen, min_pident, min_coverage, no_trash): # Function bins FAST5 file with untwisting. # # :param f5_path: path to FAST5 file meant to be processed; # :type f5_path: str; # :param tax_annot_res_dir: path to directory containing taxonomic annotation; # :type tax_annot_res_dir: str; # :param sens: binning sensitivity; # :type sens: str; # :param min_qual: threshold for quality filter; # :type min_qual: float; # :param min_qlen: threshold for length filter; # :type min_qlen: int (or None, if this filter is disabled); # :param min_pident: threshold for alignment identity filter; # :type min_pident: float (or None, if this filter is disabled); # :param min_coverage: threshold for alignment coverage filter; # :type min_coverage: float (or None, if this filter is disabled); # :param no_trash: loical value. True if user does NOT want to output trash files; # :type no_trash: bool; outdir_path = os.path.dirname( logging.getLoggerClass().root.handlers[0].baseFilename) seqs_pass = 0 # counter for sequences, which pass filters QL_seqs_fail = 0 # counter for too short or too low-quality sequences align_seqs_fail = 0 # counter for sequences, which align to their best hit with too low identity or coverage srt_file_dict = dict() index_dirpath = os.path.join( tax_annot_res_dir, index_name) # name of directory that will contain indicies # Configure path to "classification not found" file classif_not_found_fpath = get_classif_not_found_fpath(f5_path, outdir_path) not_fount_key = 'CLASSIF_NOT_FOUND' # Make filter for quality and length QL_filter = get_QL_filter(f5_path, min_qual, min_qlen) # Configure path to trash file if not no_trash: QL_trash_fpath = get_QL_trash_fpath( f5_path, outdir_path, min_qual, min_qlen, ) else: QL_trash_fpath = None # end if # Make filter for identity and coverage align_filter = get_align_filter(min_pident, min_coverage) # Configure path to this trash file if not no_trash: align_trash_fpath = get_align_trash_fpath(f5_path, outdir_path, min_pident, min_coverage) else: align_trash_fpath = None # end if # File validation: # RuntimeError will be raised if FAST5 file is broken. try: # File existance checking is performed while parsing CL arguments. # Therefore, this if-statement will trigger only if f5_path's file is not a valid HDF5 file. if not h5py.is_hdf5(f5_path): raise RuntimeError("file is not of HDF5 (i.e. not FAST5) format") # end if from_f5 = h5py.File(f5_path, 'r') for _ in from_f5: break # end for except RuntimeError as runterr: printlog_error_time("FAST5 file is broken") printlog_error("Reading the file `{}` crashed.".format( os.path.basename(f5_path))) printlog_error("Reason: {}".format(str(runterr))) printlog_error("Omitting this file...") print() # Return zeroes -- inc_val won't be incremented and this file will be omitted return (0, 0, 0) # end try # singleFAST5 and multiFAST5 files should be processed in different ways # "Raw" group always in singleFAST5 root and never in multiFAST5 root if "Raw" in from_f5.keys(): f5_cpy_func = copy_single_f5 else: f5_cpy_func = copy_read_f5_2_f5 # end if readids_to_seek = list(from_f5.keys()) # list of not-binned-yet read IDs # Fill the list 'readids_to_seek' for read_name in fast5_readids(from_f5): # Get rid of "read_" readids_to_seek.append(sys.intern(read_name)) # end for # Walk through the index index_f5_2_tsv = open_shelve(os.path.join(index_dirpath, index_name), 'r') if not f5_path in index_f5_2_tsv.keys(): printlog_error_time( "Source FAST5 file `{}` not found in index".format(f5_path)) printlog_error("Try to rebuild index") platf_depend_exit(1) # end if for tsv_path in index_f5_2_tsv[f5_path].keys(): read_names = index_f5_2_tsv[f5_path][tsv_path] if tsv_path == not_fount_key: for read_name in read_names: # Place this sequence into the "classification not found" file if classif_not_found_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, classif_not_found_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[classif_not_found_fpath]) # end for continue # end if taxonomy_path = os.path.join(tax_annot_res_dir, "taxonomy", "taxonomy.tsv") resfile_lines = configure_resfile_lines(tsv_path, sens, taxonomy_path) for read_name in read_names: try: hit_names, *vals_to_filter = resfile_lines[sys.intern( fmt_read_id(read_name)[1:])] except KeyError: # Place this sequence into the "classification not found" file if classif_not_found_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, classif_not_found_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[classif_not_found_fpath]) continue # end try if not QL_filter(vals_to_filter): # Get name of result FASTQ file to write this read in if QL_trash_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, QL_trash_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[QL_trash_fpath]) QL_seqs_fail += 1 elif not align_filter(vals_to_filter): # Get name of result FASTQ file to write this read in if align_trash_fpath not in srt_file_dict.keys(): srt_file_dict = update_file_dict(srt_file_dict, align_trash_fpath) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[align_trash_fpath]) align_seqs_fail += 1 else: for hit_name in hit_names.split( "&&" ): # there can be multiple hits for single query sequence # Get name of result FASTQ file to write this read in binned_file_path = os.path.join( outdir_path, "{}.fast5".format(hit_name)) if binned_file_path not in srt_file_dict.keys(): srt_file_dict = update_file_dict( srt_file_dict, binned_file_path) # end if f5_cpy_func(from_f5, read_name, srt_file_dict[binned_file_path]) # end for seqs_pass += 1 # end if # end for from_f5.close() index_f5_2_tsv.close() # Close all binned files for file_obj in filter(lambda x: not x is None, srt_file_dict.values()): file_obj.close() # end for return (seqs_pass, QL_seqs_fail, align_seqs_fail)
def __init__(self, type="none", actions=None): self.type = intern(type) self.actions = [] if actions is None else actions
def __init__(self, time, username, text): self.time = time.replace(microsecond=0, second=0, minute=0) self.username = intern(username) self.text = text.strip()
def _reference_intern(args): return sys.intern(args[0])
def __init__(self, type, value): if type not in self.VALID_TYPES: raise ValueError("invalid type %r" % type) self.type = intern(type) self.value = value
def testNoIntern(self): s = marshal.loads(marshal.dumps(self.strobj, 2)) self.assertEqual(s, self.strobj) self.assertNotEqual(id(s), id(self.strobj)) s2 = sys.intern(s) self.assertNotEqual(id(s2), id(s))
if typing.TYPE_CHECKING: import datetime import aiohttp.http_websocket import aiohttp.typedefs from hikari import channels from hikari import config from hikari import guilds from hikari import users as users_ from hikari.api import event_factory as event_factory_ from hikari.api import event_manager as event_manager_ # Important attributes _D: typing.Final[str] = sys.intern("d") _T: typing.Final[str] = sys.intern("t") _S: typing.Final[str] = sys.intern("s") _OP: typing.Final[str] = sys.intern("op") # Opcodes. _DISPATCH: typing.Final[int] = 0 _HEARTBEAT: typing.Final[int] = 1 _IDENTIFY: typing.Final[int] = 2 _PRESENCE_UPDATE: typing.Final[int] = 3 _VOICE_STATE_UPDATE: typing.Final[int] = 4 _RESUME: typing.Final[int] = 6 _RECONNECT: typing.Final[int] = 7 _REQUEST_GUILD_MEMBERS: typing.Final[int] = 8 _INVALID_SESSION: typing.Final[int] = 9 _HELLO: typing.Final[int] = 10
def __new__(cls, lineno, type, value): return tuple.__new__(cls, (lineno, sys.intern(str(type)), value))
def comb(point, value): 'Format a fact (a value assigned to a given point)' return intern(f'{point} {value}')
def _read_episode(self, data_loader): """Reads one episode at a time from the provided iterable over entries. :param data_loader: an iterable which returns tuples in the format described in the class docstring. """ episode = [] last_cands = None for entry, new in data_loader: if new and len(episode) > 0: yield tuple(episode) episode = [] last_cands = None # intern all strings so we don't store them more than once new_entry = [] if len(entry) > 0: # process text if available if entry[0] is not None: new_entry.append(sys.intern(entry[0])) else: new_entry.append(None) if len(entry) > 1: # process labels if available if entry[1] is None: new_entry.append(None) elif hasattr(entry[1], '__iter__') and type(entry[1]) is not str: # make sure iterable over labels, not single string new_entry.append(tuple(sys.intern(e) for e in entry[1])) else: raise TypeError( 'Must provide iterable over labels, not a single string.' ) if len(entry) > 2: # process reward if available if entry[2] is not None: new_entry.append(entry[2]) else: new_entry.append(None) if len(entry) > 3: # process label candidates if available if entry[3] is None: new_entry.append(None) elif last_cands and entry[3] is last_cands: # if cands are shared, say "same" so we # don't store them again new_entry.append( sys.intern('same as last time')) elif (hasattr(entry[3], '__iter__') and type(entry[3]) is not str): # make sure iterable over candidates, not single string last_cands = entry[3] new_entry.append(tuple( sys.intern(e) for e in entry[3])) else: raise TypeError( 'Must provide iterable over label candidates, ' 'not a single string.' ) if len(entry) > 4 and entry[4] is not None: new_entry.append(sys.intern(entry[4])) episode.append(tuple(new_entry)) if len(episode) > 0: yield tuple(episode)