Exemplo n.º 1
0
 def parse(self, response):
     if response.status == 200:
         # Replace the nested <a> tags
         new_body = sub(REPLACE, lambda x: x.group(1), response.body)
         response = response.replace(body = new_body)        
     url_params = search(EXPRESSION, response.url)
     chan = intern(url_params.group(1))
     ar_time_match = search(ARCHIVE_TIME, response.request.url)
     archive_time = datetime.datetime.strptime(ar_time_match.group(1),
                                               "%Y%m%d%H%M%S")
     for row in response.xpath('//div[@class="container"]//tr'):
         elements = row.xpath("td")
         time_string = elements[0].xpath("a/@href").extract()[0].replace("#l", "")
         pytime = datetime.datetime.fromtimestamp(float(time_string),
                                                  datetime.timezone.utc)
         try:
             username = intern(elements[1].xpath("text()").extract()[0])
         except IndexError:
             username = ""
         extracted_text = elements[2].xpath(".//text()").extract()
         text = "".join(extracted_text)
             
         item = WaybackArchive()
         item["time"] = pytime
         item["chan"] = chan
         item["username"] = username
         item["text"] = text
         item["archive_time"] = archive_time
         yield item
Exemplo n.º 2
0
    def __set_values_from_fields(self):

        """
        Private method that sets the correct values from the fields derived from the input line.
        :return:
        """
        self.chrom, self.start, self.end, \
            self.name, self.score, self.strand, \
            self.thick_start, self.thick_end, self.rgb, \
            self.block_count, block_sizes, block_starts = self._fields

        # Reduce memory usage
        intern(self.chrom)
        self.start = int(self.start) + 1
        self.end = int(self.end)
        self.score = float(self.score)
        self.thick_start = int(self.thick_start) + 1
        self.thick_end = int(self.thick_end)
        self.block_count = int(self.block_count)
        self.block_sizes = [int(x) for x in block_sizes.split(",")]
        self.block_starts = [int(x) for x in block_starts.split(",")]
        self.has_start_codon = None
        self.has_stop_codon = None
        self.start_codon = None
        self.stop_codon = None
        self.fasta_length = len(self)
        return
Exemplo n.º 3
0
def _mapf_signal(member):
	mname=member.name
	iname=member.interface._name
	mtype=member.signal_type
	mtype_read=mtype.unpack
	myKeyError=KeyError
	Bus_signal=Bus.signal

	def cb(callback, slot, msg):
		callback(*mtype_read(msg))
	cb_get=cb.__get__
	def cbm(callback, slot, msg):
		callback(msg, *mtype_read(msg))
	cbm_get=cbm.__get__

	def onsignal(_pybus_bound, callback):
		bus,destination,path,data=_pybus_bound
		return Bus_signal(bus, destination, path, iname, mname, cb_get(callback))

	def onsignalm(_pybus_bound, callback):
		bus,destination,path,data=_pybus_bound
		return Bus_signal(bus, destination, path, iname, mname, cbm_get(callback))

	onsignal.__name__=_sys.intern(mname+'__onsignal')
	onsignal.__qualname__=iname+'.'+mname+'__onsignal'
	onsignalm.__name__=_sys.intern(mname+'__onsignalm')
	onsignalm.__qualname__=iname+'.'+mname+'__onsignalm'

	return (onsignal,onsignalm)
Exemplo n.º 4
0
    def parse(self, response):
        if response.status == 200:
            # Replace the nested <a> tags
            new_body = sub(REPLACE, lambda x: x.group(1), response.body)
            response = response.replace(body = new_body)
        url_params = search(EXPRESSION, response.url)
        chan = intern(url_params.group(1))
        for row in response.xpath("//tr"):
            elements = row.xpath("td")
            time_string = elements[0].xpath("a/@href").extract()[0].replace("#l", "")
            pytime = datetime.datetime.fromtimestamp(float(time_string),
                                                     datetime.timezone.utc)
            try:
                username = intern(elements[1].xpath("text()").extract()[0])
            except IndexError:
                username = ""
            extracted_text = elements[2].xpath(".//text()").extract()
            text = "".join(extracted_text)

            item = BitcoinIrcItem()
            item["time"] = pytime
            item["chan"] = chan
            item["username"] = username
            item["text"] = text
            yield item
Exemplo n.º 5
0
    def __set_values_from_gff(self, fasta_length):
        """
        Private method that sets the correct values from the fields derived from an input GFF line.
        :return:
        """

        (self.chrom, self.thick_start,
         self.thick_end, self.strand, self.name) = (self._line.chrom,
                                                    self._line.start,
                                                    self._line.end, self._line.strand, self._line.id)
        intern(self.chrom)
        assert self.name is not None
        self.start = 1
        self.end = fasta_length
        self.score = self._line.score
        self.rgb = None
        self.block_count = 1
        self.block_sizes = [self.thick_end - self.thick_start +1]
        self.block_starts = [self.thick_start]
        self.has_start_codon = None
        self.has_stop_codon = None
        self.start_codon = None
        self.stop_codon = None
        self.fasta_length = fasta_length
        return
Exemplo n.º 6
0
def parse_GFF_attribute_string(attrStr, extra_return_first_value=False):
    """Parses a GFF attribute string and returns it as a dictionary.

    If 'extra_return_first_value' is set, a pair is returned: the dictionary
    and the value of the first attribute. This might be useful if this is the
    ID.
    """
    if attrStr.endswith("\n"):
        attrStr = attrStr[:-1]
    d = {}
    first_val = "_unnamed_"
    for (i, attr) in zip(
            itertools.count(),
            _HTSeq.quotesafe_split(attrStr.encode())):
        attr = attr.decode()
        if _re_attr_empty.match(attr):
            continue
        if attr.count('"') not in (0, 2):
            raise ValueError(
                "The attribute string seems to contain mismatched quotes.")
        mo = _re_attr_main.match(attr)
        if not mo:
            raise ValueError("Failure parsing GFF attribute line")
        val = mo.group(2)
        if val.startswith('"') and val.endswith('"'):
            val = val[1:-1]
        d[sys.intern(mo.group(1))] = sys.intern(val)
        if extra_return_first_value and i == 0:
            first_val = val
    if extra_return_first_value:
        return (d, first_val)
    else:
        return d
Exemplo n.º 7
0
    def load_dict(self, state, exclude_utr=False, protein_coding=False):

        for key in ["chrom", "source", "start", "end", "strand", "id"]:
            setattr(self, key, state[key])

        for tid, tvalues in state["transcripts"].items():
            transcript = Transcript(logger=self.logger)
            transcript.load_dict(tvalues)
            transcript.finalize()
            if protein_coding is True and transcript.is_coding is False:
                self.logger.debug("{0} is non coding ({1}, {2})".format(
                    transcript.id,
                    transcript.combined_cds,
                    transcript.segments))
                continue
            if exclude_utr is True:
                has_utrs = (transcript.utr_length > 0)
                transcript.remove_utrs()
                if has_utrs is True and (transcript.utr_length > 0):
                    raise AssertionError("Failed to remove the UTRs!")
            self.transcripts[tid] = transcript

        self.chrom = intern(self.chrom)
        self.source = intern(self.source)
        self.id = intern(self.id)

        return
Exemplo n.º 8
0
 def parse_parts(self, parts):
     parsed = []
     sep = self.sep
     altsep = self.altsep
     drv = root = ''
     it = reversed(parts)
     for part in it:
         if not part:
             continue
         if altsep:
             part = part.replace(altsep, sep)
         drv, root, rel = self.splitroot(part)
         if sep in rel:
             for x in reversed(rel.split(sep)):
                 if x and x != '.':
                     parsed.append(sys.intern(x))
         else:
             if rel and rel != '.':
                 parsed.append(sys.intern(rel))
         if drv or root:
             if not drv:
                 # If no drive is present, try to find one in the previous
                 # parts. This makes the result of parsing e.g.
                 # ("C:", "/", "a") reasonably intuitive.
                 for part in it:
                     drv = self.splitroot(part)[0]
                     if drv:
                         break
             break
     if drv or root:
         parsed.append(drv + root)
     parsed.reverse()
     return drv, root, parsed
Exemplo n.º 9
0
def read_sources_file(filename, sources=None, intern=sys.intern):
    """Parse a single Sources file into a hash

    Parse a single Sources file into a dict mapping a source package
    name to a SourcePackage object.  If there are multiple source
    packages with the same version, then highest versioned source
    package (that is not marked as "Extra-Source-Only") is the
    version kept in the dict.

    :param filename: Path to the Sources file.  Can be compressed by any algorithm supported by apt_pkg.TagFile
    :param sources: Optional dict to add the packages to.  If given, this is also the value returned.
    :param intern: Internal optimisation / implementation detail to avoid python's "LOAD_GLOBAL" instruction in a loop
    :return a dict mapping a name to a source package
    """
    if sources is None:
        sources = {}

    tag_file = apt_pkg.TagFile(filename)
    get_field = tag_file.section.get
    step = tag_file.step

    while step():
        if get_field('Extra-Source-Only', 'no') == 'yes':
            # Ignore sources only referenced by Built-Using
            continue
        pkg = get_field('Package')
        ver = get_field('Version')
        # There may be multiple versions of the source package
        # (in unstable) if some architectures have out-of-date
        # binaries.  We only ever consider the source with the
        # largest version for migration.
        if pkg in sources and apt_pkg.version_compare(sources[pkg][0], ver) > 0:
            continue
        maint = get_field('Maintainer')
        if maint:
            maint = intern(maint.strip())
        section = get_field('Section')
        if section:
            section = intern(section.strip())
        build_deps_arch = ", ".join(x for x in (get_field('Build-Depends'), get_field('Build-Depends-Arch'))
                                    if x is not None)
        if build_deps_arch != '':
            build_deps_arch = sys.intern(build_deps_arch)
        else:
            build_deps_arch = None
        build_deps_indep = get_field('Build-Depends-Indep')
        if build_deps_indep is not None:
            build_deps_indep = sys.intern(build_deps_indep)
        sources[intern(pkg)] = SourcePackage(intern(ver),
                                             section,
                                             set(),
                                             maint,
                                             False,
                                             build_deps_arch,
                                             build_deps_indep,
                                             get_field('Testsuite', '').split(),
                                             get_field('Testsuite-Triggers', '').replace(',', '').split(),
                                             )
    return sources
Exemplo n.º 10
0
    def _read_episode(self, data_generator):
        """Reads one episode at a time from the provided iterator over entries.
        """
        episode = []
        last_cands = None
        for entry, new in data_generator:
            if new and len(episode) > 0:
                yield tuple(episode)
                episode = []
                last_cands = None

            # intern all strings so we don't store them more than once
            new_entry = []
            if len(entry) > 0:
                # process text if available
                if entry[0] is not None:
                    new_entry.append(sys.intern(entry[0]))
                else:
                    new_entry.append(None)
                if len(entry) > 1:
                    # process labels if available
                    if entry[1] is None:
                        new_entry.append(None)
                    elif hasattr(entry[1], '__iter__') and type(entry[1]) is not str:
                        # make sure iterable over labels, not single string
                        new_entry.append(tuple(sys.intern(e) for e in entry[1]))
                    else:
                        raise TypeError('Must provide iterable over labels, not a single string.')
                    if len(entry) > 2:
                        # process reward if available
                        if entry[2] is not None:
                            new_entry.append(entry[2])
                        else:
                            new_entry.append(None)
                        if len(entry) > 3:
                            # process label candidates if available
                            if entry[3] is None:
                                new_entry.append(None)
                            elif last_cands and entry[3] is last_cands:
                                # if cands are shared, say "same" so we
                                # don't store them again
                                new_entry.append(
                                    sys.intern('same as last time'))
                            elif hasattr(entry[3], '__iter__') and type(entry[3]) is not str:
                                # make sure iterable over candidates, not single string
                                last_cands = entry[3]
                                new_entry.append(tuple(
                                    sys.intern(e) for e in entry[3]))
                            else:
                                raise TypeError('Must provide iterable over label candidates, not a single string.')
                            if len(entry) > 4 and entry[4] is not None:
                                new_entry.append(sys.intern(entry[4]))

            episode.append(tuple(new_entry))

        if len(episode) > 0:
            yield tuple(episode)
Exemplo n.º 11
0
 def set_identifier(self, identifier):
     self._identifier = str(identifier)
     sys.intern(self._identifier)
     # identifier_first_part represents the part of the name in front of the first dot (if any), eg. for myfamily.myvar it would represent myfamily
     if '.' in identifier:
         self.identifier_first_part = identifier[:identifier.index('.')]
         self.identifier_last_part  = identifier[identifier.index('.'):]
     else:
         self.identifier_first_part = identifier
         self.identifier_last_part = ''
 def test_sys_intern(self):
     """
     Py2's builtin intern() has been moved to the sys module. Tests
     whether sys.intern is available.
     """
     from sys import intern
     if utils.PY3:
         self.assertEqual(intern('hello'), 'hello')
     else:
         # intern() requires byte-strings on Py2:
         self.assertEqual(intern(b'hello'), b'hello')
Exemplo n.º 13
0
        def _reduce_memory_dict(old_dict):
            new_dict = dict()
            for (k, v) in iteritems(old_dict):
                if type(k) is str:
                    k = intern(k)

                if type(v) is str:
                    v = intern(v)
                elif type(v) is dict:
                    # This handles [{'Caller': ..., 'DebugLoc': { 'File': ... }}]
                    v = _reduce_memory_dict(v)
                new_dict[k] = v
            return tuple(new_dict.items())
Exemplo n.º 14
0
 def loadAlignedParts(self, db):
     "load lists of existing aligned partitions for the updated, if not already done"
     if self.alignDb != db:
         self.alignDb = db
         self.alignParts = []
         self.alignMap = {}
         alnDir = "data/aligned/" + self.rel + "/" + db + "/" + self
         for alIdx in globSort(alnDir + "/mrna.*.alidx"):
             names = os.path.basename(alIdx).split(".")
             self._addAlignedPart(MRNA, sys.intern(names[1]))
         for alIdx in globSort(alnDir + "/est.*.*.alidx"):
             names = os.path.basename(alIdx).split(".")
             self._addAlignedPart(EST, sys.intern(names[2]), sys.intern(names[1]))
Exemplo n.º 15
0
    def create_from_describe(vardict, index):
        """Create P4File from p4 describe

        Describe does not report the client path, but that will be
        reported later by p4 sync and set on the P4File at that time.
        """

        f = P4File()
        f.depot_path = sys.intern(vardict["depotFile"][index])
        f.type = sys.intern(vardict["type"][index])
        f.action = sys.intern(vardict["action"][index])
        f._revision = int(vardict["rev"][index])
        return f
def format_stack_trace(frame, thread_category):
    """Formats the frame obj into a list of stack trace tuples.
    """

    stack_trace = deque()

    while frame:
        # The value frame.f_code.co_firstlineno is the first line of
        # code in the file for the specified function. The value
        # frame.f_lineno is the actual line which is being executed
        # at the time the stack frame was being viewed.

        code = frame.f_code

        filename = intern(code.co_filename)
        func_name = intern(code.co_name)
        first_line = code.co_firstlineno

        real_line = frame.f_lineno

        # Set ourselves up to process next frame back up the stack.

        frame = frame.f_back

        # So as to make it more obvious to the user as to what their
        # code is doing, we drop out stack frames related to the
        # agent instrumentation. Don't do this for the agent threads
        # though as we still need to seem them in that case so can
        # debug what the agent itself is doing.

        if (thread_category != 'AGENT' and
                filename.startswith(AGENT_PACKAGE_DIRECTORY)):
            continue

        if not stack_trace:
            # Add the fake leaf node with line number of where the
            # code was executing at the point of the sample. This
            # could be actual Python code within the function, or
            # more likely showing the point where a call is being
            # made into a C function wrapped as Python object. The
            # latter can occur because we will not see stack frames
            # when calling into C functions.

            stack_trace.appendleft((filename, func_name, real_line, real_line))

        # Add the actual node for the function being called at this
        # level in the stack frames.

        stack_trace.appendleft((filename, func_name, first_line, real_line))

    return stack_trace
Exemplo n.º 17
0
    def __init__(self, next_attr_name=None, prev_attr_name=None):
        """Initializes this list.

        next_attr_name: The name of the attribute that holds a reference
                        to the next item in the list.

        prev_attr_name: the name of the attribute that holds a reference
                        to the previous item in the list.
        """

        # Keep an interned version of the attribute names. This should
        # speed up the process of looking up the attributes.
        self.next_name = intern(next_attr_name)
        self.prev_name = intern(prev_attr_name)
Exemplo n.º 18
0
 def __init__(self, row):
     self.taxId = int(row[0])
     self.parentTaxId = int(row[1])
     self.rank = sys.intern(row[2])
     self.emblCode = sys.intern(row[3])
     self.divisionId = int(row[4])
     self.inheritedDivFlag = bool(row[5])
     self.geneticCodeId = int(row[6])
     self.inheritedGCflag = bool(row[7])
     self.mitochondrialGeneticCodeId = int(row[8])
     self.inheritedMGCflag = bool(row[9])
     self.genBankHiddenFlag = bool(row[10])
     self.hiddenSubtreeRootFlag = bool(row[11])
     self.comments = row[12]
Exemplo n.º 19
0
 def dig_node(node, parent_summary_builder, child_literals_holder):
     if node is None:
         return
     elif isinstance(node, list):
         n0 = node[0]
         assert n0 in (ct.ORDERED_AND, ct.ORDERED_OR)
         for subn in node[1:]:
             dig_node(subn, parent_summary_builder, child_literals_holder)
         return
     elif isinstance(node, ct.CallNode):
         invoked = node.invoked
         lits = invoked.literals
         lits and parent_summary_builder.extend_literal(lits)
         lbl = callnode_label(node)
         if lbl not in parent_summary_builder.already_appended_callnodes:
             stack.append(lbl)
             nodesum = summary_table.get(lbl)
             if nodesum is None:
                 sb = SummaryBuilder()
                 clh = []
                 subnode = node.body
                 if subnode is None:
                     pass
                 elif isinstance(subnode, (list, ct.CallNode)):
                     dig_node(subnode, sb, clh)
                 elif isinstance(subnode, ct.Invoked):
                     sb.append_callee(intern(subnode.callee))
                     lits = subnode.literals
                     if lits:
                         sb.extend_literal(lits)
                         clh.append(lits)
                 else:
                     assert False
                 nodesum = sb.to_summary()
                 nodesum.literals = intern_literals(nodesum.literals, clh)
                 summary_table[lbl] = nodesum
             parent_summary_builder.append_summary(nodesum, lbl)
             parent_summary_builder.append_callee(invoked.callee)
             child_literals_holder.append(nodesum.literals)
             stack.pop()
         return
     elif isinstance(node, ct.Invoked):
         parent_summary_builder.append_callee(intern(node.callee))
         if node.literals:
             parent_summary_builder.extend_literal(node.literals)
             child_literals_holder.append(node.literals)
     else:
         assert False
Exemplo n.º 20
0
    def parse_full(cls, line_string):
        match = cls._line_regex.match(line_string.decode('utf8', errors='replace'))
        if match is None:
            # raise ValueError ("not a valid log line (%r)" % (line_string,))
            groups = [0, 0, 0, 0, "", "", 0, "", "", 0]
            return cls(groups)

        line = cls(match.groups())
        # Timestamp.
        line[0] = parse_time(line[0])
        # PID.
        line[1] = int(line[1])
        # Thread.
        line[2] = int(line[2], 16)
        # Level (this is handled in LineCache).
        line[3] = 0
        # Line.
        line[6] = int(line[6])
        # Message start offset.
        line[9] = match.start(9 + 1)

        for col_id in (4,   # COL_CATEGORY
                       5,   # COL_FILENAME
                       7,   # COL_FUNCTION,
                       8,):  # COL_OBJECT
            line[col_id] = sys.intern(line[col_id] or "")

        return line
Exemplo n.º 21
0
def string_from_print(d):
    """create a string from p4 print dict

    This is a noop for unicode servers, because p4python returns strings.

    But for non-unicode servers, when running 'p4 print' we use "raw" encoding
    with p4python to avoid mangling file content, so we get back bytes from
    p4python, which need to be decoded according to the locale encoding"""
    if type(d) == str:
        return sys.intern(d)
    try:
        return sys.intern(d.decode(locale.nl_langinfo(locale.CODESET)))
    except UnicodeDecodeError:
        replaced = d.decode(locale.nl_langinfo(locale.CODESET), 'replace').replace('\ufffd', '?')
        msg = _('Error decoding file path: {}').format(replaced)
        raise RuntimeError(msg)
Exemplo n.º 22
0
def read_phones(path, dialect, sr = None):
    output = []
    with open(path,'r') as file_handle:
        if dialect == 'timit':
            if sr is None:
                sr = 16000
            for line in file_handle:

                l = line.strip().split(' ')
                start = float(l[0])
                end = float(l[1])
                label = l[2]
                if sr is not None:
                    start /= sr
                    end /= sr
                output.append(BaseAnnotation(label, begin, end))
        elif dialect == 'buckeye':
            header_pattern = re.compile("#\r{0,1}\n")
            line_pattern = re.compile("\s+\d{3}\s+")
            label_pattern = re.compile(" {0,1};| {0,1}\+")
            f = header_pattern.split(file_handle.read())[1]
            flist = f.splitlines()
            begin = 0.0
            for l in flist:
                line = line_pattern.split(l.strip())
                end = float(line[0])
                label = sys.intern(label_pattern.split(line[1])[0])
                output.append(BaseAnnotation(label, begin, end))
                begin = end

        else:
            raise(NotImplementedError)
    return output
Exemplo n.º 23
0
def read_phones(path):
    """
    From a buckeye file, reads the phone lines, appends label, begin, and end to output
    
    Parameters
    ----------
    path : str
        path to file
    
    Returns
    -------
    output : list of tuples
        each tuple is label, begin, end for a phone

    """
    output = []
    with open(path,'r') as file_handle:
        header_pattern = re.compile("#\r{0,1}\n")
        line_pattern = re.compile("\s+\d{3}\s+")
        label_pattern = re.compile(" {0,1};| {0,1}\+")
        f = header_pattern.split(file_handle.read())[1]
        flist = f.splitlines()
        begin = 0.0
        for l in flist:
            line = line_pattern.split(l.strip())
            try:
                end = float(line[0])
            except ValueError: # Missing phone label
                print('Warning: no label found in line: \'{}\''.format(l))
                continue
            label = sys.intern(label_pattern.split(line[1])[0])
            output.append((label, begin, end))
            begin = end
    return output
Exemplo n.º 24
0
 def __init__(self, name):
     if isinstance(name, Property):
         self._name = name._name
         self._hash = name._hash
     else:
         self._name = intern(name)
         self._hash = id(self._name)
def read_words(path, dialect, sr = None):
    output = list()
    with open(path,'r') as file_handle:
        if dialect == 'buckeye':
            f = re.split(r"#\r{0,1}\n",file_handle.read())[1]
            line_pattern = re.compile("; | \d{3} ")
            begin = 0.0
            flist = f.splitlines()
            for l in flist:
                line = line_pattern.split(l.strip())
                end = float(line[0])
                word = sys.intern(line[1])
                if word[0] != "<" and word[0] != "{":
                    try:
                        citation = line[2].split(' ')
                        phonetic = line[3].split(' ')
                        category = line[4]
                    except:
                        citation = None
                        phonetic = None
                        category = None
                else:
                    citation = None
                    phonetic = None
                    category = None
                if word in FILLERS:
                    category = 'UH'
                line = {'spelling':word,'begin':begin,'end':end,
                        'transcription':citation,'surface_transcription':phonetic,
                        'category':category}
                output.append(line)
                begin = end
        else:
            raise(NotImplementedError)
    return output
Exemplo n.º 26
0
 def __init__(self, name, positive_properties=None,
              negative_properties=None):
     if not isinstance(name, str):
         raise TypeError(name, str)
     self._name = intern(name)
     self._positive_properties = (
         frozenset([Property(prop) for prop in positive_properties])
         if positive_properties
         else frozenset()
     )
     self._negative_properties = (
         frozenset([Property(prop) for prop in negative_properties])
         if negative_properties
         else frozenset()
     )
     if self._positive_properties & self._negative_properties:
         raise ValueError("Property is both positive and negative.")
     # This works because we intern the name & properties beforehand:
     self._hash = (
         id(self._name) ^
         reduce(
             lambda a, b: a ^ hash(b),
             self._positive_properties,
             0) ^
         reduce(
             lambda a, b: a ^ -hash(b),
             self._negative_properties,
             0
         )
     )
Exemplo n.º 27
0
 def _read_section_data(self, leaf, data):
     records = []
     while data:
         contig, start, end = unpack("=III", data[:12])
         data = data[12:]
         _null = data.find(b'\0')
         fields = data[:_null].split(b'\t')
         name = fields[0] if fields else sys.intern('.')
         score = float(fields[1]) if len(fields) > 1 else numpy.nan
         strand = fields[2] if len(fields) > 2 else sys.intern('.')
         rest = tuple(fields[3:]) if len(fields) > 3 else None
         record = BED(self._contig_by_id[contig].name, 
                      start, end, name, score, strand, rest)
         records.append(record)
         data = data[(_null+1):]
     return records
Exemplo n.º 28
0
 def _extract_path(s, encoding):
     pos = len(s)
     count = 3
     while count > 0 and pos > 0 and s[pos - 1] == 0:
         pos -= 1
         count -= 1
     return intern(s[0:pos].decode(encoding))
Exemplo n.º 29
0
 def unique_names(self):
   names = set()
   for t in self.techniques:
     while t.name in names:
       t.name += '~'
     t.name = sys.intern(t.name)
     names.add(t.name)
Exemplo n.º 30
0
    def tokenize(self, doc, stop_rule=lambda token: False):
        """
        Tokenizes a document.
        This is a very naive tokenizer;
        i.e. it has no stop words,
        since we need those words to generate convincing speech.
        It also strips punctuation from the beginning and end of tokens,
        except for '@' at the beginning of a token.

        Optionally provide a `stop_rule` function,
        which should return True if a token should be stopped on.
        """
        tokens = []
        punctuation = string.punctuation.replace('@', '') + '“”‘’–"'

        for token in doc.split(' '):
            # This saves memory by having
            # duplicate strings just point to the same memory.
            token = sys.intern(token.strip(punctuation))

            # Ignore punctuation and stopwords
            if not token or stop_rule(token):
                continue

            tokens.append(token.lower())
        return tokens
Exemplo n.º 31
0
# we use the unicode identifier rule if this python version is able
# to handle unicode identifiers, otherwise the standard ASCII one.
try:
    compile('föö', '<unknown>', 'eval')
except SyntaxError:
    name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
else:
    from jinja2 import _stringdefs
    name_re = re.compile(r'[%s][%s]*' %
                         (_stringdefs.xid_start, _stringdefs.xid_continue))

float_re = re.compile(r'(?<!\.)\d+\.\d+')
newline_re = re.compile(r'(\r\n|\r|\n)')

# internal the tokens and keep references to them
TOKEN_ADD = sys.intern('add')
TOKEN_ASSIGN = sys.intern('assign')
TOKEN_COLON = sys.intern('colon')
TOKEN_COMMA = sys.intern('comma')
TOKEN_DIV = sys.intern('div')
TOKEN_DOT = sys.intern('dot')
TOKEN_EQ = sys.intern('eq')
TOKEN_FLOORDIV = sys.intern('floordiv')
TOKEN_GT = sys.intern('gt')
TOKEN_GTEQ = sys.intern('gteq')
TOKEN_LBRACE = sys.intern('lbrace')
TOKEN_LBRACKET = sys.intern('lbracket')
TOKEN_LPAREN = sys.intern('lparen')
TOKEN_LT = sys.intern('lt')
TOKEN_LTEQ = sys.intern('lteq')
TOKEN_MOD = sys.intern('mod')
Exemplo n.º 32
0
def read_data(
    input_file,
    method,
    omit_bias=False,
    omit_lexemes=False,
    max_lines=None,
    bins=None,
    seed=-1,
    training_fraction=1.0,
):
    # read learning trace data in specified format, see README for details
    sys.stderr.write("reading data...")

    if method == "hlr-pw":
        num_quantiles = len(bins) - 1
        quantile_intervals = list(zip(bins[:-1], bins[1:]))
    else:
        num_quantiles, quantile_intervals = None, []

    instances = list()
    if input_file.endswith("gz"):
        f = gzip.open(input_file, "rb")
    else:
        f = open(input_file, "r")
    reader = csv.DictReader(f)
    for i, row in enumerate(reader):
        if max_lines is not None and i >= max_lines:
            break
        p = pclip(float(row["p_recall"]))
        t = float(row["delta"]) / (60 * 60 * 24)  # convert time delta to days
        h = hclip(-t / (math.log(p, 2)))
        lang = "%s->%s" % (row["ui_language"], row["learning_language"])
        # lexeme_id = row['lexeme_id']
        lexeme_string = row["lexeme_string"]
        timestamp = int(row["timestamp"])
        user_id = row["user_id"]
        seen = int(row["history_seen"])
        right = int(row["history_correct"])
        wrong = seen - right
        right_this = int(row["session_correct"])
        wrong_this = int(row["session_seen"]) - right_this
        # feature vector is a list of (feature, value) tuples
        fv = []
        # core features based on method
        if method == "leitner":
            fv.append((intern("diff"), right - wrong))
        elif method == "pimsleur":
            fv.append((intern("total"), right + wrong))
        elif method == "hlr" or method == "power":
            fv.append((intern("right"), right))
            fv.append((intern("wrong"), wrong))
            # fv.append((intern('right'), math.sqrt(1+right)))
            # fv.append((intern('wrong'), math.sqrt(1+wrong)))
        elif method == "hlr-pw":
            # Now need to fill in the right_{quantile} for each row.
            for q in range(num_quantiles):
                in_this_quantile = (quantile_intervals[q][0] <= t <
                                    quantile_intervals[q][1])
                fv.append(("right_%d" % q, right if in_this_quantile else 0))
                fv.append(("wrong_%d" % q, wrong if in_this_quantile else 0))
        else:
            raise Exception("Unknown method {}".format(method))

        # optional flag features
        if method == "lr":
            fv.append((intern("time"), t))
        if not omit_bias:
            fv.append((intern("bias"), 1.0))
        if not omit_lexemes:
            # fv.append((intern('%s:%s' % (row['learning_language'], lexeme_string)), 1.))
            # Remove the 'de:' prefix.
            fv.append((intern(lexeme_string), 1.0))
        instances.append(
            Instance(
                p,
                t,
                fv,
                h,
                (right + 2.0) / (seen + 4.0),
                lang,
                right_this,
                wrong_this,
                timestamp,
                user_id,
                lexeme_string,
            ))
        if i % 1000000 == 0:
            sys.stderr.write("%d..." % i)
    sys.stderr.write("done!\n")
    splitpoint = int(0.9 * len(instances))

    if seed > 0:
        sys.stderr.write("Shuffling with seed %d.\n" % seed)
        random.seed(seed)
        random.shuffle(instances)

    training = instances[:int(splitpoint * training_fraction)]
    testing = instances[splitpoint:]

    return training, testing
Exemplo n.º 33
0
        def apply_transform(bug):

            is_couple = isinstance(bug, tuple)

            if not is_couple:
                bug_id = bug["id"]

                if self.rollback and bug_id not in already_rollbacked:
                    bug = bug_snapshot.rollback(bug, self.rollback_when)
                    already_rollbacked.add(bug_id)

            else:
                bug1_id = bug[0]["id"]
                bug2_id = bug[1]["id"]

                if self.rollback:
                    if bug1_id not in already_rollbacked:
                        bug[0] = bug_snapshot.rollback(bug[0],
                                                       self.rollback_when)
                        already_rollbacked.add(bug1_id)
                    if bug2_id not in already_rollbacked:
                        bug[1] = bug_snapshot.rollback(bug[1],
                                                       self.rollback_when)
                        already_rollbacked.add(bug2_id)

            data = {}

            for feature_extractor in self.feature_extractors:
                res = None
                if isinstance(feature_extractor,
                              single_bug_feature) and not is_couple:
                    res = feature_extractor(
                        bug,
                        reporter_experience=reporter_experience_map[
                            bug["creator"]],
                        author_ids=author_ids,
                    )

                elif isinstance(feature_extractor,
                                couple_bug_feature) and is_couple:
                    res = feature_extractor(bug)

                if hasattr(feature_extractor, "name"):
                    feature_extractor_name = feature_extractor.name
                else:
                    feature_extractor_name = feature_extractor.__class__.__name__

                if res is None:
                    continue

                if isinstance(res, (list, set)):
                    for item in res:
                        data[sys.intern(
                            f"{item} in {feature_extractor_name}")] = "True"
                    continue

                if isinstance(res, bool):
                    res = str(res)

                data[feature_extractor_name] = res

            if is_couple:
                reporter_experience_map[bug[0]["creator"]] += 1
                reporter_experience_map[bug[1]["creator"]] += 1

                return {"data": data}

            else:
                reporter_experience_map[bug["creator"]] += 1

                # TODO: Try simply using all possible fields instead of extracting features manually.

                summary = bug["summary"]
                comments = [c["text"] for c in bug["comments"]]
                for cleanup_function in self.cleanup_functions:
                    summary = cleanup_function(summary)
                    comments = [
                        cleanup_function(comment) for comment in comments
                    ]

                return {
                    "data": data,
                    "title": summary,
                    "first_comment": "" if len(comments) == 0 else comments[0],
                    "comments": " ".join(comments),
                }
Exemplo n.º 34
0
    class_namespace = {
        '__doc__': f'{typename}({arg_list})',
        '__slots__': (),
        '_fields': field_names,
        '_field_defaults': field_defaults,
        # alternate spelling for backward compatibility
        '_fields_defaults': field_defaults,
        '__new__': __new__,
        '_make': _make,
        '_replace': _replace,
        '__repr__': __repr__,
        '_asdict': _asdict,
        '__getnewargs__': __getnewargs__,
    }
    for index, name in enumerate(field_names):
        doc = _sys.intern(f'Alias for field number {index}')
        class_namespace[name] = _tuplegetter(index, doc)

    result = type(typename, (tuple, ), class_namespace)

    # For pickling to work, the __module__ variable needs to be set to the frame
    # where the named tuple is created.  Bypass this step in environments where
    # sys._getframe is not defined (Jython for example) or sys._getframe is not
    # defined for arguments greater than 0 (IronPython), or where the user has
    # specified a particular module.
    if module is None:
        try:
            module = _sys._getframe(1).f_globals.get('__name__', '__main__')
        except (AttributeError, ValueError):
            pass
    if module is not None:
Exemplo n.º 35
0
    def _unpack(self, execute=EX_CONSTRUCT):
        typ, n, obj = self._read_header(execute)

        if execute == EX_READ_ARRAY_HEADER:
            if typ != TYPE_ARRAY:
                raise ValueError("Expected array")
            return n
        if execute == EX_READ_MAP_HEADER:
            if typ != TYPE_MAP:
                raise ValueError("Expected map")
            return n
        # TODO should we eliminate the recursion?
        if typ == TYPE_ARRAY:
            if execute == EX_SKIP:
                for i in xrange(n):
                    # TODO check whether we need to call `list_hook`
                    self._unpack(EX_SKIP)
                return
            ret = newlist_hint(n)
            for i in xrange(n):
                ret.append(self._unpack(EX_CONSTRUCT))
            if self._list_hook is not None:
                ret = self._list_hook(ret)
            # TODO is the interaction between `list_hook` and `use_list` ok?
            return ret if self._use_list else tuple(ret)
        if typ == TYPE_MAP:
            if execute == EX_SKIP:
                for i in xrange(n):
                    # TODO check whether we need to call hooks
                    self._unpack(EX_SKIP)
                    self._unpack(EX_SKIP)
                return
            if self._object_pairs_hook is not None:
                ret = self._object_pairs_hook(
                    (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT))
                    for _ in xrange(n)
                )
            else:
                ret = {}
                for _ in xrange(n):
                    key = self._unpack(EX_CONSTRUCT)
                    if self._strict_map_key and type(key) not in (unicode, bytes):
                        raise ValueError(
                            "%s is not allowed for map key" % str(type(key))
                        )
                    if not PY2 and type(key) is str:
                        key = sys.intern(key)
                    ret[key] = self._unpack(EX_CONSTRUCT)
                if self._object_hook is not None:
                    ret = self._object_hook(ret)
            return ret
        if execute == EX_SKIP:
            return
        if typ == TYPE_RAW:
            if self._raw:
                obj = bytes(obj)
            else:
                obj = obj.decode("utf_8", self._unicode_errors)
            return obj
        if typ == TYPE_BIN:
            return bytes(obj)
        if typ == TYPE_EXT:
            if n == -1:  # timestamp
                ts = Timestamp.from_bytes(bytes(obj))
                if self._timestamp == 1:
                    return ts.to_unix()
                elif self._timestamp == 2:
                    return ts.to_unix_nano()
                elif self._timestamp == 3:
                    return ts.to_datetime()
                else:
                    return ts
            else:
                return self._ext_hook(n, bytes(obj))
        assert typ == TYPE_IMMEDIATE
        return obj
Exemplo n.º 36
0
float_re = re.compile(
    r"""
    (?<!\.)  # doesn't start with a .
    (\d+_)*\d+  # digits, possibly _ separated
    (
        (\.(\d+_)*\d+)?  # optional fractional part
        e[+\-]?(\d+_)*\d+  # exponent part
    |
        \.(\d+_)*\d+  # required fractional part
    )
    """,
    re.IGNORECASE | re.VERBOSE,
)

# internal the tokens and keep references to them
TOKEN_ADD = intern("add")
TOKEN_ASSIGN = intern("assign")
TOKEN_COLON = intern("colon")
TOKEN_COMMA = intern("comma")
TOKEN_DIV = intern("div")
TOKEN_DOT = intern("dot")
TOKEN_EQ = intern("eq")
TOKEN_FLOORDIV = intern("floordiv")
TOKEN_GT = intern("gt")
TOKEN_GTEQ = intern("gteq")
TOKEN_LBRACE = intern("lbrace")
TOKEN_LBRACKET = intern("lbracket")
TOKEN_LPAREN = intern("lparen")
TOKEN_LT = intern("lt")
TOKEN_LTEQ = intern("lteq")
TOKEN_MOD = intern("mod")
Exemplo n.º 37
0
"""String interning machinery."""

import sys

last_letter = "d"

a = sys.intern("Hello World")
b = sys.intern("Hello Worl" + last_letter)

print("The ID of a: {}".format(id(a)))
print("The ID of b: {}".format(id(b)))
print("a is b? {}".format(a is b))
Exemplo n.º 38
0
 def position(i):
     return sys.intern('p%d' % (i, ))
Exemplo n.º 39
0
 def value(i):
     return sys.intern('n%d' % (i, ))
Exemplo n.º 40
0
while True:

    N,=read_vals()
    if N == 0:
        break
    
    start, target  = input().split()


    
    G = defaultdict(list)

    for _ in range(N):
        toks  = input().split()
        toks = [sys.intern(t) for t in toks]
        node = toks[0]
        toks = toks[2:-1]
        cost  = len(list(filter(lambda x: x[0] == '"', toks)))
        ch = list(filter(lambda x: x[0] != '"', toks))
        goal = target in toks
        G[node].append((cost,ch,goal))

    #print("G", G)
    mincosts = {n:-2 for n in G}
    gcosts = {n:-2 for n in G}

    visited = set()#start]

    def visit(n,visited_,mincosts_):
        ##print("visit", n,  "visited=", visited)
Exemplo n.º 41
0
def namedtuple(typename,
               field_names,
               *,
               rename=False,
               defaults=None,
               module=None):
    """Returns a new subclass of tuple with named fields.

    >>> Point = namedtuple('Point', ['x', 'y'])
    >>> Point.__doc__                   # docstring for the new class
    'Point(x, y)'
    >>> p = Point(11, y=22)             # instantiate with positional args or keywords
    >>> p[0] + p[1]                     # indexable like a plain tuple
    33
    >>> x, y = p                        # unpack like a regular tuple
    >>> x, y
    (11, 22)
    >>> p.x + p.y                       # fields also accessible by name
    33
    >>> d = p._asdict()                 # convert to a dictionary
    >>> d['x']
    11
    >>> Point(**d)                      # convert from a dictionary
    Point(x=11, y=22)
    >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
    Point(x=100, y=22)

    """

    # Validate the field names.  At the user's option, either generate an error
    # message or automatically replace the field name with a valid name.
    if isinstance(field_names, str):
        field_names = field_names.replace(',', ' ').split()
    field_names = list(map(str, field_names))
    typename = _sys.intern(str(typename))

    if rename:
        seen = set()
        for index, name in enumerate(field_names):
            if (not name.isidentifier() or _iskeyword(name)
                    or name.startswith('_') or name in seen):
                field_names[index] = f'_{index}'
            seen.add(name)

    for name in [typename] + field_names:
        if type(name) is not str:
            raise TypeError('Type names and field names must be strings')
        if not name.isidentifier():
            raise ValueError('Type names and field names must be valid '
                             f'identifiers: {name!r}')
        if _iskeyword(name):
            raise ValueError('Type names and field names cannot be a '
                             f'keyword: {name!r}')

    seen = set()
    for name in field_names:
        if name.startswith('_') and not rename:
            raise ValueError('Field names cannot start with an underscore: '
                             f'{name!r}')
        if name in seen:
            raise ValueError(f'Encountered duplicate field name: {name!r}')
        seen.add(name)

    field_defaults = {}
    if defaults is not None:
        defaults = tuple(defaults)
        if len(defaults) > len(field_names):
            raise TypeError('Got more default values than field names')
        field_defaults = dict(
            reversed(list(zip(reversed(field_names), reversed(defaults)))))

    # Variables used in the methods and docstrings
    field_names = tuple(map(_sys.intern, field_names))
    num_fields = len(field_names)
    arg_list = repr(field_names).replace("'", "")[1:-1]
    repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')'
    tuple_new = tuple.__new__
    _dict, _tuple, _len, _map, _zip = dict, tuple, len, map, zip

    # Create all the named tuple methods to be added to the class namespace

    s = f'def __new__(_cls, {arg_list}): return _tuple_new(_cls, ({arg_list}))'
    namespace = {'_tuple_new': tuple_new, '__name__': f'namedtuple_{typename}'}
    # Note: exec() has the side-effect of interning the field names
    exec(s, namespace)
    __new__ = namespace['__new__']
    __new__.__doc__ = f'Create new instance of {typename}({arg_list})'
    if defaults is not None:
        __new__.__defaults__ = defaults

    @classmethod
    def _make(cls, iterable):
        result = tuple_new(cls, iterable)
        if _len(result) != num_fields:
            raise TypeError(
                f'Expected {num_fields} arguments, got {len(result)}')
        return result

    _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence '
                              'or iterable')

    def _replace(self, /, **kwds):
        result = self._make(_map(kwds.pop, field_names, self))
        if kwds:
            raise ValueError(f'Got unexpected field names: {list(kwds)!r}')
        return result
    def _read_episode(self, data_loader):
        """
        Read one episode at a time from the provided iterable over entries.

        :param data_loader:
            an iterable which returns tuples in the format described in the
            class docstring.
        """
        episode = []
        last_cands = None
        for entry, new in data_loader:
            if new and len(episode) > 0:
                yield tuple(episode)
                episode = []
                last_cands = None

            # intern all strings so we don't store them more than once
            # TODO: clean up the if .. sys.intern else None by refactoring
            new_entry = []
            if len(entry) > 0:
                # process text if available
                if entry[0] is not None:
                    new_entry.append(sys.intern(entry[0]))
                else:
                    new_entry.append(None)
                # TODO: unindent all of these one level.
                if len(entry) > 1:
                    # process labels if available
                    if entry[1] is None:
                        new_entry.append(None)
                    elif hasattr(entry[1], '__iter__') and type(
                            entry[1]) is not str:
                        # TODO: this could use the abc collections
                        # make sure iterable over labels, not single string
                        new_entry.append(tuple(
                            sys.intern(e) for e in entry[1]))
                    else:
                        raise TypeError(
                            'Must provide iterable over labels, not a single string.'
                        )
                if len(entry) > 2:
                    # process reward if available
                    if entry[2] is not None:
                        new_entry.append(entry[2])
                    else:
                        new_entry.append(None)
                if len(entry) > 3:
                    # process label candidates if available
                    if entry[3] is None:
                        new_entry.append(None)
                    elif last_cands and entry[3] is last_cands:
                        # if cands are shared, say "same" so we
                        # don't store them again
                        # TODO: This is bad, and it's not actually used anywhere
                        # DEPRECATIONDAY: make this more rational
                        new_entry.append(sys.intern('same as last time'))
                    elif hasattr(entry[3], '__iter__') and type(
                            entry[3]) is not str:
                        # make sure iterable over candidates, not single string
                        last_cands = entry[3]
                        new_entry.append(tuple(
                            sys.intern(e) for e in entry[3]))
                    else:
                        raise TypeError(
                            'Must provide iterable over label candidates, '
                            'not a single string.')
                if len(entry) > 4 and entry[4] is not None:
                    new_entry.append(sys.intern(entry[4]))

            episode.append(tuple(new_entry))

        if len(episode) > 0:
            yield tuple(episode)
Exemplo n.º 43
0
from vcweb.core import signals, simplecache
from vcweb.core.models import (DefaultValue, ExperimentMetadata, Parameter, ParticipantRoundDataValue,
                               GroupRelationship, RoundConfiguration, get_participant_ready_parameter)
from vcweb.experiment.forestry.models import (
    get_harvest_decision_parameter, get_harvest_decision, get_group_harvest_parameter,
    get_reset_resource_level_parameter, get_regrowth_parameter, get_initial_resource_level_parameter,
    set_resource_level as forestry_set_resource_level,
    set_harvest_decision as forestry_set_harvest_decision,
    get_resource_level_parameter, get_resource_level_dv as get_unshared_resource_level_dv,
    get_group_harvest_dv, get_regrowth_dv,)

logger = logging.getLogger(__name__)

# FIXME: hacky, figure out a better way to bind module with its dependent
# ExperimentMetadata instance
EXPERIMENT_METADATA_NAME = sys.intern('bound')
# constants that should live in configuration as well
MAX_RESOURCE_LEVEL = 240
MAX_SHARED_RESOURCE_LEVEL = 480

INITIAL_RESOURCES_PER_PARTICIPANT_PER_ROUND = 3

'''
Experiment parameters and metadata accessors
'''

set_harvest_decision = forestry_set_harvest_decision
set_resource_level = forestry_set_resource_level


@simplecache
Exemplo n.º 44
0
"""Data structures and algorithms for generic expansion and
refactorisation."""

from collections import Counter, OrderedDict, defaultdict, namedtuple
from itertools import product
from sys import intern

from gem.node import Memoizer, traversal
from gem.gem import Node, Zero, Product, Sum, Indexed, ListTensor, one
from gem.optimise import (remove_componenttensors, sum_factorise,
                          traverse_product, traverse_sum, unroll_indexsum,
                          expand_conditional, make_rename_map, make_renamer)

# Refactorisation labels

ATOMIC = intern('atomic')
"""Label: the expression need not be broken up into smaller parts"""

COMPOUND = intern('compound')
"""Label: the expression must be broken up into smaller parts"""

OTHER = intern('other')
"""Label: the expression is irrelevant with regards to refactorisation"""

Monomial = namedtuple('Monomial', ['sum_indices', 'atomics', 'rest'])
"""Monomial type, representation of a tensor product with some
distinguished factors (called atomics).

- sum_indices: indices to sum over
- atomics: tuple of expressions classified as ATOMIC
- rest: a single expression classified as OTHER
Exemplo n.º 45
0
def intern_str(string):
    if six.PY3:
        return sys.intern(str(string))

    return intern(str(string))
Exemplo n.º 46
0
Created on Oct 5, 2010

@author: Mark V Systems Limited
(c) Copyright 2010 Mark V Systems Limited, All rights reserved.
'''

# initialize object from loaded linkbases
from collections import defaultdict
from arelle import ModelDtsObject, XbrlConst, XmlUtil, ModelValue
from arelle.ModelObject import ModelObject
from arelle.ModelDtsObject import ModelResource
from arelle.PrototypeDtsObject import LocPrototype, PrototypeObject
from arelle.XbrlConst import consecutiveArcrole
import os, sys

USING_EQUIVALENCE_KEY = sys.intern(_STR_8BIT(
    "using_equivalence_key"))  # indicates hash entry replaced with keyed entry
NoneType = type(None)


def create(modelXbrl,
           arcrole,
           linkrole=None,
           linkqname=None,
           arcqname=None,
           includeProhibits=False):
    return ModelRelationshipSet(modelXbrl, arcrole, linkrole, linkqname,
                                arcqname, includeProhibits)


def ineffectiveArcs(baseSetModelLinks, arcrole, arcqname=None):
    hashEquivalentRels = defaultdict(list)
Exemplo n.º 47
0
def plambda_intern(string):
    return sys.intern(string) if sys.version_info[0] > 2 else intern(string)
Exemplo n.º 48
0
 def load_interned(self):
     n = self.r_long()
     ret = intern(self._read(n))
     self._stringtable.append(ret)
     return ret
Exemplo n.º 49
0
def namedtuple(typename,
               field_names,
               *,
               rename=False,
               defaults=None,
               module=None):
    """Returns a new subclass of tuple with named fields.

    >>> Point = namedtuple('Point', ['x', 'y'])
    >>> Point.__doc__                   # docstring for the new class
    'Point(x, y)'
    >>> p = Point(11, y=22)             # instantiate with positional args or keywords
    >>> p[0] + p[1]                     # indexable like a plain tuple
    33
    >>> x, y = p                        # unpack like a regular tuple
    >>> x, y
    (11, 22)
    >>> p.x + p.y                       # fields also accessible by name
    33
    >>> d = p._asdict()                 # convert to a dictionary
    >>> d['x']
    11
    >>> Point(**d)                      # convert from a dictionary
    Point(x=11, y=22)
    >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
    Point(x=100, y=22)

    """

    # Validate the field names.  At the user's option, either generate an error
    # message or automatically replace the field name with a valid name.
    if isinstance(field_names, str):
        field_names = field_names.replace(',', ' ').split()
    field_names = list(map(str, field_names))
    typename = _sys.intern(str(typename))

    if rename:
        seen = set()
        for index, name in enumerate(field_names):
            if (not name.isidentifier() or _iskeyword(name)
                    or name.startswith('_') or name in seen):
                field_names[index] = f'_{index}'
            seen.add(name)

    for name in [typename] + field_names:
        if type(name) is not str:
            raise TypeError('Type names and field names must be strings')
        if not name.isidentifier():
            raise ValueError('Type names and field names must be valid '
                             f'identifiers: {name!r}')
        if _iskeyword(name):
            raise ValueError('Type names and field names cannot be a '
                             f'keyword: {name!r}')

    seen = set()
    for name in field_names:
        if name.startswith('_') and not rename:
            raise ValueError('Field names cannot start with an underscore: '
                             f'{name!r}')
        if name in seen:
            raise ValueError(f'Encountered duplicate field name: {name!r}')
        seen.add(name)

    field_defaults = {}
    if defaults is not None:
        defaults = tuple(defaults)
        if len(defaults) > len(field_names):
            raise TypeError('Got more default values than field names')
        field_defaults = dict(
            reversed(list(zip(reversed(field_names), reversed(defaults)))))

    # Variables used in the methods and docstrings
    field_names = tuple(map(_sys.intern, field_names))
    num_fields = len(field_names)
    arg_list = repr(field_names).replace("'", "")[1:-1]
    repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')'
    tuple_new = tuple.__new__
    _len = len

    # Create all the named tuple methods to be added to the class namespace

    s = f'def __new__(_cls, {arg_list}): return _tuple_new(_cls, ({arg_list}))'
    namespace = {'_tuple_new': tuple_new, '__name__': f'namedtuple_{typename}'}
    # Note: exec() has the side-effect of interning the field names
    exec(s, namespace)
    __new__ = namespace['__new__']
    __new__.__doc__ = f'Create new instance of {typename}({arg_list})'
    if defaults is not None:
        __new__.__defaults__ = defaults

    @classmethod
    def _make(cls, iterable):
        result = tuple_new(cls, iterable)
        if _len(result) != num_fields:
            raise TypeError(
                f'Expected {num_fields} arguments, got {len(result)}')
        return result

    _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence '
                              'or iterable')

    def _replace(_self, **kwds):
        result = _self._make(map(kwds.pop, field_names, _self))
        if kwds:
            raise ValueError(f'Got unexpected field names: {list(kwds)!r}')
        return result

    _replace.__doc__ = (f'Return a new {typename} object replacing specified '
                        'fields with new values')

    def __repr__(self):
        'Return a nicely formatted representation string'
        return self.__class__.__name__ + repr_fmt % self

    def _asdict(self):
        'Return a new OrderedDict which maps field names to their values.'
        return OrderedDict(zip(self._fields, self))

    def __getnewargs__(self):
        'Return self as a plain tuple.  Used by copy and pickle.'
        return tuple(self)

    # Modify function metadata to help with introspection and debugging

    for method in (__new__, _make.__func__, _replace, __repr__, _asdict,
                   __getnewargs__):
        method.__qualname__ = f'{typename}.{method.__name__}'

    # Build-up the class namespace dictionary
    # and use type() to build the result class
    class_namespace = {
        '__doc__': f'{typename}({arg_list})',
        '__slots__': (),
        '_fields': field_names,
        '_fields_defaults': field_defaults,
        '__new__': __new__,
        '_make': _make,
        '_replace': _replace,
        '__repr__': __repr__,
        '_asdict': _asdict,
        '__getnewargs__': __getnewargs__,
    }
    cache = _nt_itemgetters
    for index, name in enumerate(field_names):
        try:
            itemgetter_object, doc = cache[index]
        except KeyError:
            itemgetter_object = _itemgetter(index)
            doc = f'Alias for field number {index}'
            cache[index] = itemgetter_object, doc
        class_namespace[name] = property(itemgetter_object, doc=doc)

    result = type(typename, (tuple, ), class_namespace)

    # For pickling to work, the __module__ variable needs to be set to the frame
    # where the named tuple is created.  Bypass this step in environments where
    # sys._getframe is not defined (Jython for example) or sys._getframe is not
    # defined for arguments greater than 0 (IronPython), or where the user has
    # specified a particular module.
    if module is None:
        try:
            module = _sys._getframe(1).f_globals.get('__name__', '__main__')
        except (AttributeError, ValueError):
            pass
    if module is not None:
        result.__module__ = module

    return result
Exemplo n.º 50
0
def intern_string(s):
    return intern(s)
def bin_fast5_file(f5_path, tax_annot_res_dir, sens, min_qual, min_qlen,
                   min_pident, min_coverage, no_trash):
    # Function bins FAST5 file with untwisting.
    #
    # :param f5_path: path to FAST5 file meant to be processed;
    # :type f5_path: str;
    # :param tax_annot_res_dir: path to directory containing taxonomic annotation;
    # :type tax_annot_res_dir: str;
    # :param sens: binning sensitivity;
    # :type sens: str;
    # :param min_qual: threshold for quality filter;
    # :type min_qual: float;
    # :param min_qlen: threshold for length filter;
    # :type min_qlen: int (or None, if this filter is disabled);
    # :param min_pident: threshold for alignment identity filter;
    # :type min_pident: float (or None, if this filter is disabled);
    # :param min_coverage: threshold for alignment coverage filter;
    # :type min_coverage: float (or None, if this filter is disabled);
    # :param no_trash: loical value. True if user does NOT want to output trash files;
    # :type no_trash: bool;

    outdir_path = os.path.dirname(
        logging.getLoggerClass().root.handlers[0].baseFilename)

    seqs_pass = 0  # counter for sequences, which pass filters
    QL_seqs_fail = 0  # counter for too short or too low-quality sequences
    align_seqs_fail = 0  # counter for sequences, which align to their best hit with too low identity or coverage

    srt_file_dict = dict()

    index_dirpath = os.path.join(
        tax_annot_res_dir,
        index_name)  # name of directory that will contain indicies

    # Configure path to "classification not found" file
    classif_not_found_fpath = get_classif_not_found_fpath(f5_path, outdir_path)
    not_fount_key = 'CLASSIF_NOT_FOUND'

    # Make filter for quality and length
    QL_filter = get_QL_filter(f5_path, min_qual, min_qlen)
    # Configure path to trash file
    if not no_trash:
        QL_trash_fpath = get_QL_trash_fpath(
            f5_path,
            outdir_path,
            min_qual,
            min_qlen,
        )
    else:
        QL_trash_fpath = None
    # end if

    # Make filter for identity and coverage
    align_filter = get_align_filter(min_pident, min_coverage)
    # Configure path to this trash file
    if not no_trash:
        align_trash_fpath = get_align_trash_fpath(f5_path, outdir_path,
                                                  min_pident, min_coverage)
    else:
        align_trash_fpath = None
    # end if

    # File validation:
    #   RuntimeError will be raised if FAST5 file is broken.
    try:
        # File existance checking is performed while parsing CL arguments.
        # Therefore, this if-statement will trigger only if f5_path's file is not a valid HDF5 file.
        if not h5py.is_hdf5(f5_path):
            raise RuntimeError("file is not of HDF5 (i.e. not FAST5) format")
        # end if

        from_f5 = h5py.File(f5_path, 'r')

        for _ in from_f5:
            break
        # end for
    except RuntimeError as runterr:
        printlog_error_time("FAST5 file is broken")
        printlog_error("Reading the file `{}` crashed.".format(
            os.path.basename(f5_path)))
        printlog_error("Reason: {}".format(str(runterr)))
        printlog_error("Omitting this file...")
        print()
        # Return zeroes -- inc_val won't be incremented and this file will be omitted
        return (0, 0, 0)
    # end try

    # singleFAST5 and multiFAST5 files should be processed in different ways
    # "Raw" group always in singleFAST5 root and never in multiFAST5 root
    if "Raw" in from_f5.keys():
        f5_cpy_func = copy_single_f5
    else:
        f5_cpy_func = copy_read_f5_2_f5
    # end if

    readids_to_seek = list(from_f5.keys())  # list of not-binned-yet read IDs

    # Fill the list 'readids_to_seek'
    for read_name in fast5_readids(from_f5):
        # Get rid of "read_"
        readids_to_seek.append(sys.intern(read_name))
    # end for

    # Walk through the index
    index_f5_2_tsv = open_shelve(os.path.join(index_dirpath, index_name), 'r')

    if not f5_path in index_f5_2_tsv.keys():
        printlog_error_time(
            "Source FAST5 file `{}` not found in index".format(f5_path))
        printlog_error("Try to rebuild index")
        platf_depend_exit(1)
    # end if

    for tsv_path in index_f5_2_tsv[f5_path].keys():

        read_names = index_f5_2_tsv[f5_path][tsv_path]

        if tsv_path == not_fount_key:
            for read_name in read_names:
                # Place this sequence into the "classification not found" file
                if classif_not_found_fpath not in srt_file_dict.keys():
                    srt_file_dict = update_file_dict(srt_file_dict,
                                                     classif_not_found_fpath)
                # end if
                f5_cpy_func(from_f5, read_name,
                            srt_file_dict[classif_not_found_fpath])
            # end for
            continue
        # end if

        taxonomy_path = os.path.join(tax_annot_res_dir, "taxonomy",
                                     "taxonomy.tsv")
        resfile_lines = configure_resfile_lines(tsv_path, sens, taxonomy_path)

        for read_name in read_names:
            try:
                hit_names, *vals_to_filter = resfile_lines[sys.intern(
                    fmt_read_id(read_name)[1:])]
            except KeyError:
                # Place this sequence into the "classification not found" file
                if classif_not_found_fpath not in srt_file_dict.keys():
                    srt_file_dict = update_file_dict(srt_file_dict,
                                                     classif_not_found_fpath)
                # end if
                f5_cpy_func(from_f5, read_name,
                            srt_file_dict[classif_not_found_fpath])
                continue
            # end try

            if not QL_filter(vals_to_filter):
                # Get name of result FASTQ file to write this read in
                if QL_trash_fpath not in srt_file_dict.keys():
                    srt_file_dict = update_file_dict(srt_file_dict,
                                                     QL_trash_fpath)
                # end if
                f5_cpy_func(from_f5, read_name, srt_file_dict[QL_trash_fpath])
                QL_seqs_fail += 1
            elif not align_filter(vals_to_filter):
                # Get name of result FASTQ file to write this read in
                if align_trash_fpath not in srt_file_dict.keys():
                    srt_file_dict = update_file_dict(srt_file_dict,
                                                     align_trash_fpath)
                # end if
                f5_cpy_func(from_f5, read_name,
                            srt_file_dict[align_trash_fpath])
                align_seqs_fail += 1
            else:
                for hit_name in hit_names.split(
                        "&&"
                ):  # there can be multiple hits for single query sequence
                    # Get name of result FASTQ file to write this read in
                    binned_file_path = os.path.join(
                        outdir_path, "{}.fast5".format(hit_name))
                    if binned_file_path not in srt_file_dict.keys():
                        srt_file_dict = update_file_dict(
                            srt_file_dict, binned_file_path)
                    # end if
                    f5_cpy_func(from_f5, read_name,
                                srt_file_dict[binned_file_path])
                # end for
                seqs_pass += 1
            # end if
        # end for

    from_f5.close()
    index_f5_2_tsv.close()

    # Close all binned files
    for file_obj in filter(lambda x: not x is None, srt_file_dict.values()):
        file_obj.close()
    # end for

    return (seqs_pass, QL_seqs_fail, align_seqs_fail)
Exemplo n.º 52
0
 def __init__(self, type="none", actions=None):
     self.type = intern(type)
     self.actions = [] if actions is None else actions
Exemplo n.º 53
0
 def __init__(self, time, username, text):
     self.time = time.replace(microsecond=0, second=0, minute=0)
     self.username = intern(username)
     self.text = text.strip()
Exemplo n.º 54
0
def _reference_intern(args):
    return sys.intern(args[0])
Exemplo n.º 55
0
 def __init__(self, type, value):
     if type not in self.VALID_TYPES:
         raise ValueError("invalid type %r" % type)
     self.type = intern(type)
     self.value = value
Exemplo n.º 56
0
 def testNoIntern(self):
     s = marshal.loads(marshal.dumps(self.strobj, 2))
     self.assertEqual(s, self.strobj)
     self.assertNotEqual(id(s), id(self.strobj))
     s2 = sys.intern(s)
     self.assertNotEqual(id(s2), id(s))
Exemplo n.º 57
0
if typing.TYPE_CHECKING:
    import datetime

    import aiohttp.http_websocket
    import aiohttp.typedefs

    from hikari import channels
    from hikari import config
    from hikari import guilds
    from hikari import users as users_
    from hikari.api import event_factory as event_factory_
    from hikari.api import event_manager as event_manager_

# Important attributes
_D: typing.Final[str] = sys.intern("d")
_T: typing.Final[str] = sys.intern("t")
_S: typing.Final[str] = sys.intern("s")
_OP: typing.Final[str] = sys.intern("op")

# Opcodes.
_DISPATCH: typing.Final[int] = 0
_HEARTBEAT: typing.Final[int] = 1
_IDENTIFY: typing.Final[int] = 2
_PRESENCE_UPDATE: typing.Final[int] = 3
_VOICE_STATE_UPDATE: typing.Final[int] = 4
_RESUME: typing.Final[int] = 6
_RECONNECT: typing.Final[int] = 7
_REQUEST_GUILD_MEMBERS: typing.Final[int] = 8
_INVALID_SESSION: typing.Final[int] = 9
_HELLO: typing.Final[int] = 10
Exemplo n.º 58
0
 def __new__(cls, lineno, type, value):
     return tuple.__new__(cls, (lineno, sys.intern(str(type)), value))
Exemplo n.º 59
0
def comb(point, value):
    'Format a fact (a value assigned to a given point)'
    return intern(f'{point} {value}')
Exemplo n.º 60
0
    def _read_episode(self, data_loader):
        """Reads one episode at a time from the provided iterable over entries.

        :param data_loader: an iterable which returns tuples in the format
                            described in the class docstring.
        """
        episode = []
        last_cands = None
        for entry, new in data_loader:
            if new and len(episode) > 0:
                yield tuple(episode)
                episode = []
                last_cands = None

            # intern all strings so we don't store them more than once
            new_entry = []
            if len(entry) > 0:
                # process text if available
                if entry[0] is not None:
                    new_entry.append(sys.intern(entry[0]))
                else:
                    new_entry.append(None)
                if len(entry) > 1:
                    # process labels if available
                    if entry[1] is None:
                        new_entry.append(None)
                    elif hasattr(entry[1], '__iter__') and type(entry[1]) is not str:
                        # make sure iterable over labels, not single string
                        new_entry.append(tuple(sys.intern(e) for e in entry[1]))
                    else:
                        raise TypeError(
                            'Must provide iterable over labels, not a single string.'
                        )
                    if len(entry) > 2:
                        # process reward if available
                        if entry[2] is not None:
                            new_entry.append(entry[2])
                        else:
                            new_entry.append(None)
                        if len(entry) > 3:
                            # process label candidates if available
                            if entry[3] is None:
                                new_entry.append(None)
                            elif last_cands and entry[3] is last_cands:
                                # if cands are shared, say "same" so we
                                # don't store them again
                                new_entry.append(
                                    sys.intern('same as last time'))
                            elif (hasattr(entry[3], '__iter__') and
                                    type(entry[3]) is not str):
                                # make sure iterable over candidates, not single string
                                last_cands = entry[3]
                                new_entry.append(tuple(
                                    sys.intern(e) for e in entry[3]))
                            else:
                                raise TypeError(
                                    'Must provide iterable over label candidates, '
                                    'not a single string.'
                                )
                            if len(entry) > 4 and entry[4] is not None:
                                new_entry.append(sys.intern(entry[4]))

            episode.append(tuple(new_entry))

        if len(episode) > 0:
            yield tuple(episode)