예제 #1
0
파일: ktable.py 프로젝트: cooiky/simDHT
    def get_neighbors(self, target):
        nodes = []
        if len(self.buckets) == 0: return nodes
        if len(target) != 20 : return nodes

        index = self.bucket_index(target)
        try:
            nodes = self.buckets[index].nodes
            min = index - 1
            max = index + 1

            while len(nodes) < K and ((min >= 0) or (max < len(self.buckets))):
                if min >= 0:
                    nodes.extend(self.buckets[min].nodes)

                if max < len(self.buckets):
                    nodes.extend(self.buckets[max].nodes)

                min -= 1
                max += 1

            num = intify(target)
            nodes.sort(lambda a, b, num=num: cmp(num^intify(a.nid), num^intify(b.nid)))
            return nodes[:K]
        except IndexError:
            return nodes
예제 #2
0
    def findCloseNodes(self, target, n=K):
        """
        找出离目标node ID或infohash最近的前n个node
        """
        nodes = []
        if len(self.buckets) == 0: return nodes

        index = self.bucketIndex(target)
        try:
            nodes = self.buckets[index].nodes
            min = index - 1
            max = index + 1

            while len(nodes) < n and ((min >= 0) or (max < len(self.buckets))):
                #如果还能往前走
                if min >= 0:
                    nodes.extend(self.buckets[min].nodes)

                #如果还能往后走
                if max < len(self.buckets):
                    nodes.extend(self.buckets[max].nodes)

                min -= 1
                max += 1

            #按异或值从小到大排序
            num = intify(target)
            nodes.sort(lambda a, b, num=num: cmp(num ^ intify(a.nid), num ^
                                                 intify(b.nid)))
            return nodes[:n]
        except IndexError:
            return nodes
예제 #3
0
  def _solve(self):
    # Use 8 bits for efficiency
    set_bits(8)

    horizontalFences = utils.makeGrid(self.cols, self.rows+1, lambda: BoolVar())
    verticalFences = utils.makeGrid(self.rows, self.cols+1, lambda: BoolVar())

    utils.require_single_closed_loop_v2(horizontalFences, verticalFences)
   
    # Require numbers are surrounded by that many fences.
    for y in range(self.rows):
      for x in range(self.cols):
        if self.board.getCell(x,y) is not None:
          edges = [
            horizontalFences[y][x],
            horizontalFences[y+1][x],
            verticalFences[x][y],
            verticalFences[x+1][y]
          ]
          require(sum_bools(self.board.getCell(x,y), edges))
    
    num_solutions = solve(quiet=True)
    solution = {
      'horizontalFences': [[utils.intify(x) for x in r] for r in horizontalFences],
      'verticalFences': [[utils.intify(x) for x in r] for r in verticalFences],
    }
    return (num_solutions, solution)
예제 #4
0
파일: ktable.py 프로젝트: Gitsyshk/simDHT
    def findCloseNodes(self, target, n=K):
        """
        找出离目标node ID或infohash最近的前n个node
        """
        nodes = []
        if len(self.buckets) == 0: return nodes

        index = self.bucketIndex(target)
        try:
            nodes = self.buckets[index].nodes
            min = index - 1
            max = index + 1

            while len(nodes) < n and ((min >= 0) or (max < len(self.buckets))):
                #如果还能往前走
                if min >= 0:
                    nodes.extend(self.buckets[min].nodes)

                #如果还能往后走
                if max < len(self.buckets):
                    nodes.extend(self.buckets[max].nodes)

                min -= 1
                max += 1

            #按异或值从小到大排序
            num = intify(target)
            nodes.sort(lambda a, b, num=num: cmp(num^intify(a.nid), num^intify(b.nid)))
            return nodes[:n]
        except IndexError:
            return nodes
예제 #5
0
def get_seqfile_info(fname, is_data, germline_seqs=None, cyst_positions=None, tryp_positions=None, n_max_queries=-1, queries=None, reco_ids=None):
    """ return list of sequence info from files of several types """
    if not is_data:
        assert germline_seqs is not None
        assert cyst_positions is not None
        assert tryp_positions is not None

    if '.csv' in fname:
        delimiter = ','
        name_column = 'unique_id'
        seq_column = 'seq'
        seqfile = opener('r')(fname)
        reader = csv.DictReader(seqfile, delimiter=delimiter)
    elif '.tsv' in fname:
        delimiter = '\t'
        name_column = 'name'
        seq_column = 'nucleotide'
        seqfile = opener('r')(fname)
        reader = csv.DictReader(seqfile, delimiter=delimiter)
    elif '.fasta' in fname or '.fa' in fname or '.fastq' in fname or '.fq' in fname:
        name_column = 'unique_id'
        seq_column = 'seq'
        reader = []
        n_fasta_queries = 0
        ftype = 'fasta' if ('.fasta' in fname or '.fa' in fname) else 'fastq'
        for seq_record in SeqIO.parse(fname, ftype):
            reader.append({})
            reader[-1][name_column] = seq_record.name
            reader[-1][seq_column] = str(seq_record.seq).upper()
            n_fasta_queries += 1
            if n_max_queries > 0 and n_fasta_queries >= n_max_queries:
                break
    else:
        print 'ERROR unrecognized file format %s' % fname
        assert False

    input_info, reco_info = OrderedDict(), OrderedDict()
    n_queries = 0
    for line in reader:
        utils.intify(line)
        # if command line specified query or reco ids, skip other ones
        if queries is not None and line[name_column] not in queries:
            continue
        if reco_ids is not None and line['reco_id'] not in reco_ids:
            continue

        input_info[line[name_column]] = {'unique_id':line[name_column], 'seq':line[seq_column]}
        if not is_data:
            reco_info[line['unique_id']] = line
            utils.add_match_info(germline_seqs, line, cyst_positions, tryp_positions)
        n_queries += 1
        if n_max_queries > 0 and n_queries >= n_max_queries:
            break

    if len(input_info) == 0:
        print 'ERROR didn\'t end up pulling any input info out of %s' % fname
        assert False
    return (input_info, reco_info)
예제 #6
0
    def _solve(self):
        horizontalFences = utils.makeGrid(self.cols - 1, self.rows,
                                          lambda: BoolVar())
        verticalFences = utils.makeGrid(self.rows - 1, self.cols,
                                        lambda: BoolVar())
        dbg = utils.require_single_closed_loop_v2(horizontalFences,
                                                  verticalFences)

        def getHori(x, y):
            if y < 0 or y >= self.rows:
                return False
            if x < 0 or x >= self.cols - 1:
                return False
            return horizontalFences[y][x]

        def getVert(x, y):
            if y < 0 or y >= self.rows - 1:
                return False
            if x < 0 or x >= self.cols:
                return False
            return verticalFences[x][y]

        for y in range(self.rows):
            for x in range(self.cols):
                # Requirements on white circles
                if self.board.getCell(x, y) == 1:
                    horiLeft = getHori(x - 1, y) & getHori(
                        x, y) & (getVert(x - 1, y - 1) | getVert(x - 1, y))
                    horiRight = getHori(x - 1, y) & getHori(
                        x, y) & (getVert(x + 1, y - 1) | getVert(x + 1, y))
                    vertTop = getVert(x, y - 1) & getVert(
                        x, y) & (getHori(x - 1, y - 1) | getHori(x, y - 1))
                    vertBottom = getVert(x, y - 1) & getVert(
                        x, y) & (getHori(x - 1, y + 1) | getHori(x, y + 1))
                    require(horiLeft | horiRight | vertTop | vertBottom)
                # Requirements on black circles
                if self.board.getCell(x, y) == 2:
                    topLeft = getHori(x - 2, y) & getHori(x - 1, y) & getVert(
                        x, y - 1) & getVert(x, y - 2)
                    topRight = getHori(x, y) & getHori(x + 1, y) & getVert(
                        x, y - 1) & getVert(x, y - 2)
                    bottomLeft = getHori(x - 2, y) & getHori(
                        x - 1, y) & getVert(x, y) & getVert(x, y + 1)
                    bottomRight = getHori(x, y) & getHori(x + 1, y) & getVert(
                        x, y) & getVert(x, y + 1)
                    require(topLeft | topRight | bottomLeft | bottomRight)

        num_solutions = solve(quiet=True)
        solution = {
            'horizontalFences':
            [[utils.intify(x) for x in r] for r in horizontalFences],
            'verticalFences':
            [[utils.intify(x) for x in r] for r in verticalFences],
        }
        return (num_solutions, solution)
예제 #7
0
 def _mean(soup):
     """Calculates the mean of this row and return an indicator for
     the period, i.e. whether its for a monthly period, as opposed
     to yearly.
     """
     selector_td = {'class': 'mean'}
     selector_span = {'class': 'minor'}
     mean_td = soup.findAll('td', selector_td)[0]
     mean_span = mean_td.findAll('span', selector_span)[0]
     mean = mean_span.text
     monthly = True if "mo" in mean else False
     mean = intify(mean) * 12 if monthly else intify(mean)
     return mean, monthly
예제 #8
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _mean(soup):
     """Calculates the mean of this row and return an indicator for
     the period, i.e. whether its for a monthly period, as opposed
     to yearly.
     """
     selector_td = {'class': 'mean'}
     selector_span = {'class': 'minor'}
     mean_td = soup.findAll('td', selector_td)[0]
     mean_span = mean_td.findAll('span', selector_span)[0]
     mean = mean_span.text
     monthly = True if "mo" in mean else False
     mean = intify(mean) * 12 if monthly else intify(mean)
     return mean, monthly
예제 #9
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _connections(soup):
     selector_div = {'id': 'OverviewInsideConnections'}
     selector_tt = {'class': 'notranslate'}
     connections_div = soup.findAll('div', selector_div)
     connections_tt = connections_div[0].findAll('tt', selector_tt)
     connections = connections_tt[0].text
     return intify(connections)
예제 #10
0
 def _approval(soup):
     selector_span = {'class': 'approvalPercent'}
     selector_tt = {'class': 'notranslate'}
     approval_span = soup.findAll('span', selector_span)[0]
     approval_tt = approval_span.findAll('tt', selector_tt)[0]
     approval = approval_tt.text
     return intify(approval)
예제 #11
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _reviews(soup):
     selector_span = {'class': 'numCEORatings minor'}
     selector_tt = {'class': 'notranslate'}        
     reviews_span = soup.findAll('span', selector_span)[0]
     reviews_tt = reviews_span.findAll('tt', selector_tt)[0]
     reviews = reviews_tt.text
     return intify(reviews)
예제 #12
0
 def _connections(soup):
     selector_div = {'id': 'OverviewInsideConnections'}
     selector_tt = {'class': 'notranslate'}
     connections_div = soup.findAll('div', selector_div)
     connections_tt = connections_div[0].findAll('tt', selector_tt)
     connections = connections_tt[0].text
     return intify(connections)
예제 #13
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _approval(soup):
     selector_span = {'class': 'approvalPercent'}
     selector_tt = {'class': 'notranslate'}
     approval_span = soup.findAll('span', selector_span)[0]
     approval_tt = approval_span.findAll('tt', selector_tt)[0]
     approval = approval_tt.text
     return intify(approval)
예제 #14
0
 def _reviews(soup):
     selector_span = {'class': 'numCEORatings minor'}
     selector_tt = {'class': 'notranslate'}        
     reviews_span = soup.findAll('span', selector_span)[0]
     reviews_tt = reviews_span.findAll('tt', selector_tt)[0]
     reviews = reviews_tt.text
     return intify(reviews)
예제 #15
0
 def bucketIndex(self, target):
     """
     定位指定node ID 或 infohash 所在的bucket的索引
     """
     try:
         return bisect_left(self.buckets, intify(target))
     except HashError:
         raise HashError
예제 #16
0
파일: ktable.py 프로젝트: ryancmu/simDHT
 def bucketIndex(self, target):
     """
     定位指定node ID 或 infohash 所在的bucket的索引
     """
     try:
         return bisect_left(self.buckets, intify(target))
     except HashError:
         raise HashError
예제 #17
0
    def _solve(self):
        stars = utils.makeGrid(self.cols, self.rows, lambda: BoolVar())
        # starCount stars per row
        for y in range(self.rows):
            require(sum_bools(self.starCount, stars[y]))
        # starCount stars per column
        for x in range(self.cols):
            require(
                sum_bools(self.starCount,
                          [stars[y][x] for y in range(self.rows)]))
        # stars can't be adjacent (including diagonal)
        for y in range(self.rows):
            for x in range(self.cols):
                threeByThree = [
                    stars[y + dy][x + dx] if 0 <= y + dy < self.rows
                    and 0 <= x + dx < self.cols else False
                    for (dx,
                         dy) in list(itertools.product([-1, 0, 1], [-1, 0, 1]))
                ]
                require(cond(stars[y][x], sum_bools(1, threeByThree), True))
        # starCount stars per region.
        usedInRegion = utils.makeGrid(self.cols, self.rows, lambda: False)
        for y in range(self.rows):
            for x in range(self.cols):
                if usedInRegion[y][x]: continue
                toProcess = [(x, y)]
                region = []
                while len(toProcess):
                    newToProcess = []
                    for p in toProcess:
                        usedInRegion[p[1]][p[0]] = True
                        region.append(stars[p[1]][p[0]])
                    for p in toProcess:
                        if p[0] - 1 >= 0 and not usedInRegion[p[1]][
                                p[0] -
                                1] and not self.board.border[1][p[1]][p[0] -
                                                                      1]:
                            newToProcess.append((p[0] - 1, p[1]))
                        if p[0] + 1 < self.cols and not usedInRegion[p[1]][
                                p[0] +
                                1] and not self.board.border[1][p[1]][p[0]]:
                            newToProcess.append((p[0] + 1, p[1]))
                        if p[1] - 1 >= 0 and not usedInRegion[p[1] - 1][
                                p[0]] and not self.board.border[0][p[1] -
                                                                   1][p[0]]:
                            newToProcess.append((p[0], p[1] - 1))
                        if p[1] + 1 < self.rows and not usedInRegion[p[1] + 1][
                                p[0]] and not self.board.border[0][p[1]][p[0]]:
                            newToProcess.append((p[0], p[1] + 1))
                    toProcess = list(set(newToProcess))
                require(sum_bools(self.starCount, region))

        num_solutions = solve(quiet=True)
        solution = [[utils.intify(x) for x in r] for r in stars]
        return (num_solutions, solution)
예제 #18
0
    def _solve(self):
        ans = utils.makeGrid(self.cols, self.rows, lambda: BoolVar())
        lit = utils.makeGrid(self.cols, self.rows, lambda: Atom())
        for y in range(self.rows):
            for x in range(self.cols):
                # Lights can't be on black squares
                cell = self.board.getCell(x, y)
                if cell != None:
                    require(~ans[y][x])
                # Require the right number of neighbors for lights
                if cell != None and cell >= 0:
                    neighbors = [
                        ans[y - 1][x] if y - 1 >= 0 else False,
                        ans[y + 1][x] if y + 1 < self.rows else False,
                        ans[y][x - 1] if x - 1 >= 0 else False,
                        ans[y][x + 1] if x + 1 < self.cols else False,
                    ]
                    require(sum_bools(cell, neighbors))
                # Prevent lights from shining on each other and ensure each non-black cell is lit
                if cell == None:
                    require(lit[y][x])
                    if y == 0 or self.board.getCell(x, y - 1) != None:
                        arr = []
                        litArr = []
                        yy = y
                        while yy < self.rows and self.board.getCell(
                                x, yy) == None:
                            arr.append(ans[yy][x])
                            litArr.append(lit[yy][x])
                            yy += 1
                        require(at_most(1, arr))
                        for a in arr:
                            for l in litArr:
                                l.prove_if(a)
                    if x == 0 or self.board.getCell(x - 1, y) != None:
                        arr = []
                        litArr = []
                        xx = x
                        while xx < self.cols and self.board.getCell(xx,
                                                                    y) == None:
                            arr.append(ans[y][xx])
                            litArr.append(lit[y][xx])
                            xx += 1
                        require(at_most(1, arr))
                        for a in arr:
                            for l in litArr:
                                l.prove_if(a)

        num_solutions = solve(quiet=True)
        solution = [
            utils.intify(ans[i / self.cols][i % self.cols])
            for i in range(self.cols * self.rows)
        ]
        return (num_solutions, solution)
예제 #19
0
파일: sudoku.py 프로젝트: kcaze/claspy
  def _solve(self):
    ans = utils.makeGrid(9,9,lambda: IntVar(1,9))

    for i in range(9):
      require_all_diff(ans[i])
      require_all_diff([ans[j][i] for j in range(9)])
      require_all_diff([ans[j/3+i/3*3][j%3+i%3*3] for j in range(9)])

    for y in range(9):
      for x in range(9):
        if self.board.getCell(x,y) is not None:
          require(ans[y][x] == self.board.getCell(x,y))

    num_solutions = solve(quiet=True)
    solution = [utils.intify(ans[i/9][i%9]) for i in range(81)]
    return (num_solutions, solution)
예제 #20
0
    def read_hmm_output(self, algorithm, hmm_csv_outfname, make_clusters=True, count_parameters=False, parameter_out_dir=None, plotdir=None):
        print '    read output'
        if count_parameters:
            assert parameter_out_dir is not None
            assert plotdir is not None
        pcounter = ParameterCounter(self.germline_seqs) if count_parameters else None
        true_pcounter = ParameterCounter(self.germline_seqs) if (count_parameters and not self.args.is_data) else None
        perfplotter = PerformancePlotter(self.germline_seqs, plotdir + '/hmm/performance', 'hmm') if self.args.plot_performance else None

        n_processed = 0
        hmminfo = []
        with opener('r')(hmm_csv_outfname) as hmm_csv_outfile:
            reader = csv.DictReader(hmm_csv_outfile)
            last_key = None
            boundary_error_queries = []
            for line in reader:
                utils.intify(line, splitargs=('unique_ids', 'seqs'))
                ids = line['unique_ids']
                this_key = utils.get_key(ids)
                same_event = from_same_event(self.args.is_data, True, self.reco_info, ids)
                id_str = ''.join(['%20s ' % i for i in ids])

                # check for errors
                if last_key != this_key:  # if this is the first line for this set of ids (i.e. the best viterbi path or only forward score)
                    if line['errors'] != None and 'boundary' in line['errors'].split(':'):
                        boundary_error_queries.append(':'.join([str(uid) for uid in ids]))
                    else:
                        assert len(line['errors']) == 0

                if algorithm == 'viterbi':
                    line['seq'] = line['seqs'][0]  # add info for the best match as 'seq'
                    line['unique_id'] = ids[0]
                    utils.add_match_info(self.germline_seqs, line, self.cyst_positions, self.tryp_positions, debug=(self.args.debug > 0))

                    if last_key != this_key or self.args.plot_all_best_events:  # if this is the first line (i.e. the best viterbi path) for this query (or query pair), print the true event
                        n_processed += 1
                        if self.args.debug:
                            print '%s   %d' % (id_str, same_event)
                        if line['cdr3_length'] != -1 or not self.args.skip_unproductive:  # if it's productive, or if we're not skipping unproductive rearrangements
                            hmminfo.append(dict([('unique_id', line['unique_ids'][0]), ] + line.items()))
                            if pcounter is not None:  # increment counters (but only for the best [first] match)
                                pcounter.increment(line)
                            if true_pcounter is not None:  # increment true counters
                                true_pcounter.increment(self.reco_info[ids[0]])
                            if perfplotter is not None:
                                perfplotter.evaluate(self.reco_info[ids[0]], line)

                    if self.args.debug:
                        self.print_hmm_output(line, print_true=(last_key != this_key), perfplotter=perfplotter)
                    line['seq'] = None
                    line['unique_id'] = None

                else:  # for forward, write the pair scores to file to be read by the clusterer
                    if not make_clusters:  # self.args.debug or 
                        print '%3d %10.3f    %s' % (same_event, float(line['score']), id_str)
                    if line['score'] == '-nan':
                        print '    WARNING encountered -nan, setting to -999999.0'
                        score = -999999.0
                    else:
                        score = float(line['score'])
                    if len(ids) == 2:
                        hmminfo.append({'id_a':line['unique_ids'][0], 'id_b':line['unique_ids'][1], 'score':score})
                    n_processed += 1

                last_key = utils.get_key(ids)

        if pcounter is not None:
            pcounter.write(parameter_out_dir)
            if not self.args.no_plot:
                pcounter.plot(plotdir, subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions)
        if true_pcounter is not None:
            true_pcounter.write(parameter_out_dir + '/true')
            if not self.args.no_plot:
                true_pcounter.plot(plotdir + '/true', subset_by_gene=True, cyst_positions=self.cyst_positions, tryp_positions=self.tryp_positions)
        if perfplotter is not None:
            perfplotter.plot()

        print '  processed %d queries' % n_processed
        if len(boundary_error_queries) > 0:
            print '    %d boundary errors (%s)' % (len(boundary_error_queries), ', '.join(boundary_error_queries))

        return hmminfo
예제 #21
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _ratings(soup):
     """Number of times this company has been rated by employees"""
     ratings = soup.findAll('h3')[0]
     selector = {'class': 'notranslate'}
     ratings = ratings.findAll('span', selector)[0]
     return intify(ratings.text.strip())
예제 #22
0
 def _size(soup):
     selector_div = {'class': 'moreData margTop5 subtle'}
     selector = {'class': 'notranslate'}
     size_div = soup.findAll('div', selector_div)[0]
     sizes = size_div.findAll('tt', selector)
     return [intify(size.text) for size in sizes]
예제 #23
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _reviews(soup):
     selector_outer = {'class': 'numReviews subtle'}
     selector = {'class': 'txtShadowWhite'}
     reviews_outer = soup.findAll('span', selector_outer)[0]
     reviews = reviews_outer.findAll('span', selector)[0]
     return intify(reviews.text)
예제 #24
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _size(soup):
     selector_div = {'class': 'moreData margTop5 subtle'}
     selector = {'class': 'notranslate'}
     size_div = soup.findAll('div', selector_div)[0]
     sizes = size_div.findAll('tt', selector)
     return [intify(size.text) for size in sizes]
예제 #25
0
파일: building.py 프로젝트: kcaze/claspy
    def _solve(self):
        top = self.board.excell[0:self.cols]
        bottom = self.board.excell[self.cols:2 * self.cols]
        left = self.board.excell[2 * self.cols:2 * self.cols + self.rows]
        right = self.board.excell[2 * self.cols + self.rows:]

        ans = utils.makeGrid(self.cols, self.rows,
                             lambda: IntVar(1, self.cols))
        topViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom())
        bottomViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom())
        leftViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom())
        rightViewable = utils.makeGrid(self.cols, self.rows, lambda: Atom())

        # All numbers in columns and rows are distinct
        for i in range(self.rows):
            require_all_diff(ans[i])
        for i in range(self.cols):
            require_all_diff([ans[j][i] for j in range(self.rows)])
        # Prove skyscraper viewability
        for y in range(self.rows):
            for x in range(self.cols):
                t = True
                for k in range(y):
                    t = t & (ans[y][x] > ans[k][x])
                topViewable[y][x].prove_if(t)
                b = True
                for k in range(y + 1, self.rows):
                    b = b & (ans[y][x] > ans[k][x])
                bottomViewable[y][x].prove_if(b)
                l = True
                for k in range(x):
                    l = l & (ans[y][x] > ans[y][k])
                leftViewable[y][x].prove_if(l)
                r = True
                for k in range(x + 1, self.cols):
                    r = r & (ans[y][x] > ans[y][k])
                rightViewable[y][x].prove_if(r)
        # Ensure skyscraper viewable numbers
        for x in range(self.cols):
            if top[x] != None:
                require(
                    sum_bools(top[x],
                              [topViewable[y][x] for y in range(self.rows)]))
            if bottom[x] != None:
                require(
                    sum_bools(bottom[x],
                              [bottomViewable[y][x]
                               for y in range(self.rows)]))
        for y in range(self.rows):
            if left[y] != None:
                require(
                    sum_bools(left[y],
                              [leftViewable[y][x] for x in range(self.cols)]))
            if right[y] != None:
                require(
                    sum_bools(right[y],
                              [rightViewable[y][x] for x in range(self.cols)]))

        num_solutions = solve(quiet=True)
        solution = [
            utils.intify(ans[i / self.cols][i % self.cols])
            for i in range(self.rows * self.cols)
        ]
        return (num_solutions, solution)
예제 #26
0
 def bucketIndex(self, target):
     """
     定位指定node ID 或 infohash 所在的bucket的索引
     """
     return bisect_left(self.buckets, intify(target))
예제 #27
0
파일: ktable.py 프로젝트: cooiky/simDHT
 def in_range(self, target):
     return self.min <= intify(target) < self.max
예제 #28
0
파일: ktable.py 프로젝트: Gitsyshk/simDHT
 def inRange(self, target):
     """目标node ID是否在该范围里"""
     return self.min <= intify(target) < self.max
    def read_hmm_output(self,
                        algorithm,
                        hmm_csv_outfname,
                        make_clusters=True,
                        count_parameters=False,
                        parameter_out_dir=None,
                        plotdir=None):
        print '    read output'
        if count_parameters:
            assert parameter_out_dir is not None
            assert plotdir is not None
        pcounter = ParameterCounter(
            self.germline_seqs) if count_parameters else None
        true_pcounter = ParameterCounter(self.germline_seqs) if (
            count_parameters and not self.args.is_data) else None
        perfplotter = PerformancePlotter(
            self.germline_seqs, plotdir +
            '/hmm/performance', 'hmm') if self.args.plot_performance else None

        n_processed = 0
        hmminfo = []
        with opener('r')(hmm_csv_outfname) as hmm_csv_outfile:
            reader = csv.DictReader(hmm_csv_outfile)
            last_key = None
            boundary_error_queries = []
            for line in reader:
                utils.intify(line, splitargs=('unique_ids', 'seqs'))
                ids = line['unique_ids']
                this_key = utils.get_key(ids)
                same_event = from_same_event(self.args.is_data, True,
                                             self.reco_info, ids)
                id_str = ''.join(['%20s ' % i for i in ids])

                # check for errors
                if last_key != this_key:  # if this is the first line for this set of ids (i.e. the best viterbi path or only forward score)
                    if line['errors'] != None and 'boundary' in line[
                            'errors'].split(':'):
                        boundary_error_queries.append(':'.join(
                            [str(uid) for uid in ids]))
                    else:
                        assert len(line['errors']) == 0

                if algorithm == 'viterbi':
                    line['seq'] = line['seqs'][
                        0]  # add info for the best match as 'seq'
                    line['unique_id'] = ids[0]
                    utils.add_match_info(self.germline_seqs,
                                         line,
                                         self.cyst_positions,
                                         self.tryp_positions,
                                         debug=(self.args.debug > 0))

                    if last_key != this_key or self.args.plot_all_best_events:  # if this is the first line (i.e. the best viterbi path) for this query (or query pair), print the true event
                        n_processed += 1
                        if self.args.debug:
                            print '%s   %d' % (id_str, same_event)
                        if line['cdr3_length'] != -1 or not self.args.skip_unproductive:  # if it's productive, or if we're not skipping unproductive rearrangements
                            hmminfo.append(
                                dict([
                                    ('unique_id', line['unique_ids'][0]),
                                ] + line.items()))
                            if pcounter is not None:  # increment counters (but only for the best [first] match)
                                pcounter.increment(line)
                            if true_pcounter is not None:  # increment true counters
                                true_pcounter.increment(self.reco_info[ids[0]])
                            if perfplotter is not None:
                                perfplotter.evaluate(self.reco_info[ids[0]],
                                                     line)

                    if self.args.debug:
                        self.print_hmm_output(
                            line,
                            print_true=(last_key != this_key),
                            perfplotter=perfplotter)
                    line['seq'] = None
                    line['unique_id'] = None

                else:  # for forward, write the pair scores to file to be read by the clusterer
                    if not make_clusters:  # self.args.debug or
                        print '%3d %10.3f    %s' % (
                            same_event, float(line['score']), id_str)
                    if line['score'] == '-nan':
                        print '    WARNING encountered -nan, setting to -999999.0'
                        score = -999999.0
                    else:
                        score = float(line['score'])
                    if len(ids) == 2:
                        hmminfo.append({
                            'id_a': line['unique_ids'][0],
                            'id_b': line['unique_ids'][1],
                            'score': score
                        })
                    n_processed += 1

                last_key = utils.get_key(ids)

        if pcounter is not None:
            pcounter.write(parameter_out_dir)
            if not self.args.no_plot:
                pcounter.plot(plotdir,
                              subset_by_gene=True,
                              cyst_positions=self.cyst_positions,
                              tryp_positions=self.tryp_positions)
        if true_pcounter is not None:
            true_pcounter.write(parameter_out_dir + '/true')
            if not self.args.no_plot:
                true_pcounter.plot(plotdir + '/true',
                                   subset_by_gene=True,
                                   cyst_positions=self.cyst_positions,
                                   tryp_positions=self.tryp_positions)
        if perfplotter is not None:
            perfplotter.plot()

        print '  processed %d queries' % n_processed
        if len(boundary_error_queries) > 0:
            print '    %d boundary errors (%s)' % (
                len(boundary_error_queries), ', '.join(boundary_error_queries))

        return hmminfo
예제 #30
0
 def _ratings(soup):
     """Number of times this company has been rated by employees"""
     ratings = soup.findAll('h3')[0]
     selector = {'class': 'notranslate'}
     ratings = ratings.findAll('span', selector)[0]
     return intify(ratings.text.strip())
예제 #31
0
    def _solve(self):
        set_max_val(self.rows * self.cols)
        ans = utils.makeGrid(self.cols, self.rows, lambda: IntVar())
        for y in range(self.rows):
            for x in range(self.cols):
                if self.board.getCell(x, y) != None:
                    require(ans[y][x] == self.board.getCell(x, y))
        # Count number of same neighbors for special restrictions on 1's, 2's, 3's, and 4's.
        edgesHorizontal = [[] for i in range(self.rows)]
        edgesVertical = [[] for i in range(self.rows - 1)]
        sameNeighbors = [[] for i in range(self.rows)]
        for y in range(self.rows):
            for x in range(self.cols):
                if y + 1 < self.rows:
                    edgesVertical[y].append(ans[y][x] == ans[y + 1][x])
                if x + 1 < self.cols:
                    edgesHorizontal[y].append(ans[y][x] == ans[y][x + 1])
        for y in range(self.rows):
            for x in range(self.cols):
                sameVars = []
                if y - 1 >= 0:
                    sameVars.append(edgesVertical[y - 1][x])
                if y + 1 < self.rows:
                    sameVars.append(edgesVertical[y][x])
                if x - 1 >= 0:
                    sameVars.append(edgesHorizontal[y][x - 1])
                if x + 1 < self.cols:
                    sameVars.append(edgesHorizontal[y][x])
                sameNeighbors[y].append(sum_vars(sameVars))
        for y in range(self.rows):
            for x in range(self.cols):
                require(~((ans[y][x] == 1) ^ (sameNeighbors[y][x] == 0)))
                require(~((ans[y][x] == 2) & (sameNeighbors[y][x] != 1)))
                require(~((ans[y][x] == 3) & (sameNeighbors[y][x] != 1)
                          & (sameNeighbors[y][x] != 2)))
                require(~((ans[y][x] == 4) & (sameNeighbors[y][x] == 4)))

        flow = utils.makeGrid(self.cols, self.rows,
                              lambda: MultiVar('.', '>', '<', 'v', '^'))
        # Require cells that flow into each other to have the same value.
        for y in range(self.rows):
            for x in range(self.cols):
                if y - 1 >= 0:
                    require(
                        cond(flow[y][x] == '^', edgesVertical[y - 1][x], True))
                else:
                    require(flow[y][x] != '^')
                if y + 1 < self.rows:
                    require(cond(flow[y][x] == 'v', edgesVertical[y][x], True))
                else:
                    require(flow[y][x] != 'v')
                if x - 1 >= 0:
                    require(
                        cond(flow[y][x] == '<', edgesHorizontal[y][x - 1],
                             True))
                else:
                    require(flow[y][x] != '<')
                if x + 1 < self.cols:
                    require(
                        cond(flow[y][x] == '>', edgesHorizontal[y][x], True))
                else:
                    require(flow[y][x] != '>')
        # Require each cell connected to a root
        connected = utils.makeGrid(self.cols, self.rows, lambda: Atom())
        for y in range(self.rows):
            for x in range(self.cols):
                require(connected[y][x])
                connected[y][x].prove_if(flow[y][x] == '.')
                if y - 1 >= 0:
                    connected[y][x].prove_if((flow[y][x] == '^')
                                             & connected[y - 1][x])
                if y + 1 < self.rows:
                    connected[y][x].prove_if((flow[y][x] == 'v')
                                             & connected[y + 1][x])
                if x - 1 >= 0:
                    connected[y][x].prove_if((flow[y][x] == '<')
                                             & connected[y][x - 1])
                if x + 1 < self.cols:
                    connected[y][x].prove_if((flow[y][x] == '>')
                                             & connected[y][x + 1])
        # Count each spanning tree and make sure the cell numbers match
        counts = utils.makeGrid(self.cols, self.rows, lambda: IntVar())
        for y in range(self.rows):
            for x in range(self.cols):
                count = 1
                if y - 1 >= 0:
                    count += cond((flow[y - 1][x] == 'v'), counts[y - 1][x], 0)
                if y + 1 < self.rows:
                    count += cond((flow[y + 1][x] == '^'), counts[y + 1][x], 0)
                if x - 1 >= 0:
                    count += cond((flow[y][x - 1] == '>'), counts[y][x - 1], 0)
                if x + 1 < self.cols:
                    count += cond((flow[y][x + 1] == '<'), counts[y][x + 1], 0)
                require(counts[y][x] == count)
                require(~((flow[y][x] == '.') ^ (counts[y][x] == ans[y][x])))
        # Ensure different groups with the same number don't touch
        rootIdxs = [[IntVar(0, y * self.cols + x) for x in range(self.cols)]
                    for y in range(self.rows)]
        for y in range(self.rows):
            for x in range(self.cols):
                require(~((flow[y][x] == '.')
                          ^ (rootIdxs[y][x] == y * self.cols + x)))
                if y - 1 >= 0:
                    require(~((ans[y][x] == ans[y - 1][x])
                              ^ (rootIdxs[y][x] == rootIdxs[y - 1][x])))
                if y + 1 < self.rows:
                    require(~((ans[y][x] == ans[y + 1][x])
                              ^ (rootIdxs[y][x] == rootIdxs[y + 1][x])))
                if x - 1 >= 0:
                    require(~((ans[y][x] == ans[y][x - 1])
                              ^ (rootIdxs[y][x] == rootIdxs[y][x - 1])))
                if x + 1 < self.cols:
                    require(~((ans[y][x] == ans[y][x + 1])
                              ^ (rootIdxs[y][x] == rootIdxs[y][x + 1])))

        num_solutions = solve(quiet=True)
        solution = [
            utils.intify(ans[i / self.cols][i % self.cols])
            for i in range(self.rows * self.cols)
        ]
        return (num_solutions, solution)
예제 #32
0
파일: ktable.py 프로젝트: Gitsyshk/simDHT
 def bucketIndex(self, target):
     """
     定位指定node ID 或 infohash 所在的bucket的索引
     """
     return bisect_left(self.buckets, intify(target))
예제 #33
0
 def _normalize(range_, monthly):
     """nomalize: multiply ranges by # months or by $1k"""
     period = 12 if monthly else 1000
     low, high = (intify(v) * period for v in range_)
     return low, high
예제 #34
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _samples(soup):
     selector = {'class': 'rowCounts'}
     rows = soup.findAll('p', selector)[0]
     samples = rows.findAll('tt')[0].text
     return intify(samples)
예제 #35
0
 def _reviews(soup):
     selector_outer = {'class': 'numReviews subtle'}
     selector = {'class': 'txtShadowWhite'}
     reviews_outer = soup.findAll('span', selector_outer)[0]
     reviews = reviews_outer.findAll('span', selector)[0]
     return intify(reviews.text)
예제 #36
0
파일: gd.py 프로젝트: exshin/glassdoor
 def _normalize(range_, monthly):
     """nomalize: multiply ranges by # months or by $1k"""
     period = 12 if monthly else 1000
     low, high = (intify(v) * period for v in range_)
     return low, high
예제 #37
0
파일: ktable.py 프로젝트: cooiky/simDHT
 def bucket_index(self, target):
     return bisect_left(self.buckets, intify(target))
예제 #38
0
 def _samples(soup):
     selector = {'class': 'rowCounts'}
     rows = soup.findAll('p', selector)[0]
     samples = rows.findAll('tt')[0].text
     return intify(samples)
예제 #39
0
파일: ktable.py 프로젝트: ryancmu/simDHT
 def inRange(self, target):
     """目标node ID是否在该范围里"""
     return self.min <= intify(target) < self.max
예제 #40
0
def get_seqfile_info(fname,
                     is_data,
                     germline_seqs=None,
                     cyst_positions=None,
                     tryp_positions=None,
                     n_max_queries=-1,
                     queries=None,
                     reco_ids=None):
    """ return list of sequence info from files of several types """
    if not is_data:
        assert germline_seqs is not None
        assert cyst_positions is not None
        assert tryp_positions is not None

    if '.csv' in fname:
        delimiter = ','
        name_column = 'unique_id'
        seq_column = 'seq'
        seqfile = opener('r')(fname)
        reader = csv.DictReader(seqfile, delimiter=delimiter)
    elif '.tsv' in fname:
        delimiter = '\t'
        name_column = 'name'
        seq_column = 'nucleotide'
        seqfile = opener('r')(fname)
        reader = csv.DictReader(seqfile, delimiter=delimiter)
    elif '.fasta' in fname or '.fa' in fname or '.fastq' in fname or '.fq' in fname:
        name_column = 'unique_id'
        seq_column = 'seq'
        reader = []
        n_fasta_queries = 0
        ftype = 'fasta' if ('.fasta' in fname or '.fa' in fname) else 'fastq'
        for seq_record in SeqIO.parse(fname, ftype):
            reader.append({})
            reader[-1][name_column] = seq_record.name
            reader[-1][seq_column] = str(seq_record.seq).upper()
            n_fasta_queries += 1
            if n_max_queries > 0 and n_fasta_queries >= n_max_queries:
                break
    else:
        print 'ERROR unrecognized file format %s' % fname
        assert False

    input_info, reco_info = OrderedDict(), OrderedDict()
    n_queries = 0
    for line in reader:
        utils.intify(line)
        # if command line specified query or reco ids, skip other ones
        if queries is not None and line[name_column] not in queries:
            continue
        if reco_ids is not None and line['reco_id'] not in reco_ids:
            continue

        input_info[line[name_column]] = {
            'unique_id': line[name_column],
            'seq': line[seq_column]
        }
        if not is_data:
            reco_info[line['unique_id']] = line
            utils.add_match_info(germline_seqs, line, cyst_positions,
                                 tryp_positions)
        n_queries += 1
        if n_max_queries > 0 and n_queries >= n_max_queries:
            break

    if len(input_info) == 0:
        print 'ERROR didn\'t end up pulling any input info out of %s' % fname
        assert False
    return (input_info, reco_info)
예제 #41
0
파일: hashi.py 프로젝트: kcaze/claspy
    def _solve(self):
        set_max_val(3)
        horiAns = utils.makeGrid(self.cols - 1, self.rows,
                                 lambda: IntVar(0, 2))
        vertAns = utils.makeGrid(self.cols, self.rows - 1,
                                 lambda: IntVar(0, 2))
        # Require bridges extend all the way and don't intersect
        for y in range(self.rows):
            for x in range(self.cols - 1):
                require(cond(
                  horiAns[y][x] > 0,
                  ((self.board.getCell(x,y) != None) | (horiAns[y][x-1] == horiAns[y][x] if x-1 >= 0 else False)) &\
                  ((self.board.getCell(x+1,y) != None) | (horiAns[y][x+1] == horiAns[y][x] if x+1 < self.cols-1 else False)),
                  True))
                require(
                    cond(horiAns[y][x] > 0, (self.board.getCell(x, y) == None)
                         | (self.board.getCell(x + 1, y) == None), True))
        for y in range(self.rows - 1):
            for x in range(self.cols):
                require(cond(
                  vertAns[y][x] > 0,
                  ((self.board.getCell(x,y) != None) | (vertAns[y-1][x] == vertAns[y][x] if y-1 >= 0 else False)) &\
                  ((self.board.getCell(x,y+1) != None) | (vertAns[y+1][x] == vertAns[y][x] if y+1 < self.rows-1 else False)),
                  True))
                require(
                    cond(vertAns[y][x] > 0, (self.board.getCell(x, y) == None)
                         | (self.board.getCell(x, y + 1) == None), True))
        # Require numbers to match up
        for y in range(self.rows):
            for x in range(self.cols):
                cell = self.board.getCell(x, y)
                connections = [
                    horiAns[y][x - 1] if x - 1 >= 0 else 0,
                    horiAns[y][x] if x < self.cols - 1 else 0,
                    vertAns[y - 1][x] if y - 1 >= 0 else 0,
                    vertAns[y][x] if y < self.rows - 1 else 0,
                ]
                if cell != None:
                    require(sum_vars(connections) == cell)
        # Disallow bridge intersections
        for y in range(1, self.rows - 1):
            for x in range(1, self.cols - 1):
                if self.board.getCell(x, y) == None:
                    require((horiAns[y][x - 1] == 0) | (horiAns[y][x] == 0)
                            | (vertAns[y - 1][x] == 0) | (vertAns[y][x] == 0))
        # Require all islands to be connected
        isConnected = utils.makeGrid(self.cols, self.rows, lambda: Atom())
        firstIsland = False
        for y in range(self.rows):
            for x in range(self.cols):
                if self.board.getCell(x, y):
                    require(isConnected[y][x])
                    if not firstIsland:
                        isConnected[y][x].prove_if(True)
                        firstIsland = True
                    # Left
                    for xx in range(x - 1, -1, -1):
                        if self.board.getCell(xx, y):
                            isConnected[y][x].prove_if(
                                isConnected[y][xx] & horiAns[y][x - 1] > 0)
                            break
                    # Right
                    for xx in range(x + 1, self.cols):
                        if self.board.getCell(xx, y):
                            isConnected[y][x].prove_if(
                                isConnected[y][xx] & horiAns[y][x] > 0)
                            break
                    # Top
                    for yy in range(y - 1, -1, -1):
                        if self.board.getCell(x, yy):
                            isConnected[y][x].prove_if(
                                isConnected[yy][x] & vertAns[y - 1][x] > 0)
                            break
                    # Down
                    for yy in range(y + 1, self.rows):
                        if self.board.getCell(x, yy):
                            isConnected[y][x].prove_if(
                                isConnected[yy][x] & vertAns[y][x] > 0)
                            break

        num_solutions = solve(quiet=True)
        solution = [
            [
                utils.intify(horiAns[i / (self.cols - 1)][i % (self.cols - 1)])
                for i in range((self.cols - 1) * self.rows)
            ],
            [
                utils.intify(vertAns[i / self.cols][i % self.cols])
                for i in range(self.cols * (self.rows - 1))
            ],
        ]
        return (num_solutions, solution)