def getHits(self, timestamp: int) -> int: """ Return the number of hits in the past 5 minutes. @param timestamp - The current timestamp (in seconds granularity). """ i = bisect(self.t, [timestamp-300, float('inf')])-1 j = bisect(self.t, [timestamp,float('inf')])-1 return self.t[j][1] - self.t[i][1]
def blah(s, e): print bisect_left(s, e) print bisect_left(s, e, 0) print bisect_left(s, e, 0, len(s)) print bisect_right(s, e) print bisect(s, e) insort_left(s, e) insort_right(s, e) insort(s, e) print s
def getMachine(self,key): h = self.hashfn(key) i = bisect(self._hring, h) if i >= len(self._hring): i = 0 m = self._hm[self._hring[i]] #print 'key %s hash %s picking machine %s machine_hash %s index %s' % (key,h,m,self._hring[i],i) return m
def findMin(end_dict=[], var=[1, 1, 1, 1]): """Main algorithm for solving problem input 'end_dict': dictionary where keys are the unique packet end points and value is a list of associated starting points @input 'var': [n, size cost, constant cost, #of packets] @output: min time to download data of size var[0] rounded to 3 decimals also prints out this time (if it exists) rounded to 3 decimals """ times = {} times[0] = float(0) ends = end_dict.keys() ends.sort() for elem in ends: times[elem] = float("inf") right = bisect(ends, elem) for ii in end_dict[elem]: if(ii == 0): # for any n, the best solution will always be (0,n) times[elem] = float(elem)/var[2] + 2*var[1] else: ii_const = float(elem - ii)/var[2]+2*var[1] for j in range(bisect_left(ends, ii), bisect_left(ends, elem)): j_const = ii_const + times[ends[j]] if(j_const < times[elem]): times[elem] = j_const if((var[0] in times) and (times[var[0]] != float("inf"))): print '{0:.3f}'.format(times[var[0]]) return(round(times[var[0]], 3)) else: return(-1)
def p_e(): s = input() t = input() l = defaultdict(list) for i, c in enumerate(list(s)): l[c].append(i + 1) for c in list(t): if c not in l.keys(): print(-1) exit() N = 0 current = -1 for c in list(t): R = l[c] x = bisect(R, current) if len(R) == x: N += 1 current = R[0] else: current = R[x] print(N * len(s) + current)
def closest_sorted(a, x, k): boundaryValue = sys.maxint #add boundary values into the list for right and left a.append(boundaryValue) a.insert(0, boundaryValue*-1) #insert the query element into the list #use build in binary search to get the index that it should be inserted into insertPosition=bisect(a, x) a.insert(insertPosition, x) result = [] left = insertPosition - 1 right = insertPosition + 1 while len(result) != k: if abs(a[right] - x) <= abs(a[left] - x): result.append(a[right]) right += 1 else: result.append(a[left]) left -= 1 return result
def __init__(self, waveReader, onsetSamples): self.samples = waveReader.channels[0] self.segments = [] self.onsets = [0] crossings = [i for i in xrange(len(self.samples) - 1) if self.samples[i] < 0 and self.samples[i+1] >= 0] for onset in onsetSamples: self.onsets.append(crossings[bisect(crossings, onset) - 1]) self.onsets = sorted(list(set(self.onsets))) for i in xrange(len(self.onsets) - 1): s = Segment(self.samples[self.onsets[i]:self.onsets[i+1]]) self.segments.append(s) simMatrix = self.findNeighborMatrix() smCopy = array(simMatrix, copy=True) fill_diagonal(smCopy, 0) sims = sorted(list(smCopy.reshape(-1)), reverse=True)[:TARGET_NUM_JUMPS] SIMILARITY_THRESHOLD = sims[-1] print 'Using similarity threshold = ', SIMILARITY_THRESHOLD for i in xrange(len(self.segments)): self.segments[i].neighbors = [j for j in xrange(len(simMatrix[i])) if simMatrix[i][j] >= SIMILARITY_THRESHOLD and abs(i-j) > 10]
def prkdtree(points, xylim, center=None, depth=0): if len(points)==1: return PRKDTreeNode(xyrange=xylim, center=center, point=points[0], left=None, right=None) if len(points)==0: return PRKDTreeNode(xyrange=xylim, center=center, point=None, left=None, right=None) k = len(points[0]) axis = depth % k pmid = (xylim[axis][1]+xylim[axis][0])/2.0 points.sort(key=lambda points:points[axis]) P = [p[axis] for p in points] pivot = bisect(P, pmid) # includes all <= xmin,xmax=xylim[0][0], xylim[0][1] ymin,ymax=xylim[1][0], xylim[1][1] rangel = [ [xmin, xmax], [ymin, ymax] ] rangel[axis][1] = pmid ranger = [ [xmin, xmax], [ymin, ymax] ] ranger[axis][0] = pmid axis2 = (depth+1) % k pmidl = (rangel[axis2][0]+rangel[axis2][1])/2.0 pmidr = (ranger[axis2][0]+ranger[axis2][1])/2.0 return PRKDTreeNode(xyrange = xylim, center = pmid, point = None, left=prkdtree(points[:pivot], rangel,pmidl,depth+1), right=prkdtree(points[pivot:], ranger,pmidr,depth+1))
def insertion_sort(A): for j in range(1, len(A)): key = A[j] index = bisect(A,key,hi=j) for i in range(j,index,-1): A[i] = A[i-1] A[index] = key return A
def setValue(self, time, value): i = bisect(self.times, time) if i < len(self.times) and self.times[i] == time: self.values[i] = value else: self.times.insert(i, time) self.values.insert(i, value)
def find_next_joystick_release_time(trigger_time, mission): start_idx = bisect(mission.joy_msgs.times, trigger_time) for idx in range(start_idx, len(mission.joy_msgs) - 1): if mission.joy_msgs.msgs[idx + 1].buttons[4] == 1 and mission.joy_msgs.msgs[ idx + 1].buttons[4] == 0: return mission.joy_msgs.times[idx] return mission.joy_msgs.times[-1]
def f(): r, z = input, int M, N = r().split() s = [z(r()) for x in '0' * z(M)] m = max(s) p = [y for y in [z(r()) for x in '0' * z(N)] if y < m] l = sorted(y * p[x] for x in range(len(p)) for y in p[x:]) for i in s: print(l[bisect(l, i - 1)] - i)
def bisect(targetWord): fin = open('words.txt') tempList = [] length = len(tempList) for line in fin: word = line.strip() tempList.append(word) #if the word is exactly in the middle, first try: if targetWord == tempList[length/2]: return tempList[length/2] elif targetWord > tempList[length/2]: bisect(targetWord) elif targetWord < tempList[length/2]: bisect(targetWord) elif targetWord == tempList[i]: return i else: return None
def solve(): primes = find_prime5(10 ** n) primes = primes[bisect(primes, 10 ** (n - 1)):] set_of_primes = set(primes) pow_of_10 = [10 ** i for i in range(n)] #mul_of_pow_of_10 = [[pow_of_10[i] * x for i in range(n)] for x in range(10)] ls = [x for x in range(n - 1, -1, -1)] def swap(n, combo, i): pass families = [] processed = set() for p in primes: ttsp = [] for i in range(n): ttsp.append(p % 10 * pow_of_10[i]) p //= 10 ttsp.reverse() cool = False for combo in combinations(ls, k): tsp = ttsp[:] mask = 0 for x in combo: mask += pow_of_10[n - x - 1] tsp[x] = 0 masked = sum(tsp) if (masked, mask) in processed: continue else: processed.add((masked, mask)) family = [] for i in range(10): pp = masked + mask * i if pp in set_of_primes: family.append(pp) if len(family) == l: families.append(family) cool = True break if cool: break if families: smallest = [] for family in families: print(family) if not smallest or family[0] < smallest[0]: smallest = family #if len(family) == l: for prime in smallest: #print(prime, end=' ') print(prime)
def send_lines_at(time, channel, data): """send_line_at(time, channel, data) send out data at time on channel""" g = DfGlobal() buffer = g["data_channels_buffer"] lines = [(time+g["viz_lag"],channel,dta) for dta in data.split("\n")] position = bisect(buffer,(time+g["viz_lag"],channel,"")) new = buffer[:position] new.extend(lines) new.extend(buffer[position:]) g["data_channels_buffer"] = new
def insertion_sort(A): for j in range(1, len(A)): key = A[j] k = bisect(A, key, 0, j) for i in range(j, k - 1, -1): A[i] = A[i - 1] A[k] = key #A = A[:k] + [key] + A[k:j] + A[j+1:] return A
def solve(xs, field_length, num_ships, ship_length): def num_accomodate(size): return (size + 1) // (ship_length + 1) bombed = [0, field_length+1] holes = [field_length] for i, x in enumerate(xs, 1): pos = bisect(bombed, x) holes = bomed[max(q - p - 1, 0) for (p, q) in zip(h, h[1:])]
def nth(n): if n < 10: return n n -= 1 idx = bisect(base, n) b = base[idx - 1] remaining = n - b position = remaining // (idx + 1) num = 10 ** idx + position return int(str(num)[remaining % (idx + 1)])
def nth(n): if n < 10: return n n -= 1 idx = bisect(base, n) b = base[idx - 1] remaining = n - b position = remaining // (idx + 1) num = 10**idx + position return int(str(num)[remaining % (idx + 1)])
def GetAccTime(tick, tempoList, tpb, prefixSum): ret = 0 if (not hasattr(GetAccTime, "tickTable")): GetAccTime.tickTable = [] for i in tempoList: GetAccTime.tickTable.append(i[1]) index = bisect(GetAccTime.tickTable, tick) ret = prefixSum[index - 1] + mido.tick2second( tick - tempoList[index - 1][1], tpb, tempoList[index - 1][0]) return ret
def createrect(curdistincthlines): curdistinctpaths = [] curdistincthlines = sorted(curdistincthlines) length = len(curdistincthlines) used = length * [0] curdistincthlinesy = map(attrgetter('start.imag'), curdistincthlines) for i in range(0, length): if used[i] == 0: intersectlines = [] intersectlines_ind = [] for k in range(i + 1, length): if curdistincthlines[k].start.real == curdistincthlines[ i].start.real and curdistincthlines[ i].end.real == curdistincthlines[k].end.real: intersectlines.append(curdistincthlines[k]) intersectlines_ind.append(k) intersectlinesy = map(attrgetter('start.imag'), intersectlines) nextstart = bisect(intersectlinesy, curdistincthlinesy[i]) lens = len(intersectlines) if nextstart >= lens: continue nextend = bisect(intersectlinesy, intersectlinesy[nextstart]) if nextend > lens: continue for j in range(nextstart, nextend): if used[intersectlines_ind[j]] == 0 and used[i] == 0: if curdistincthlines[i].start.real == intersectlines[j].start.real and \ curdistincthlines[ i].end.real == \ intersectlines[j].end.real: UperLine = curdistincthlines[i] Lowerline = intersectlines[j] newPath = Pattern( UperLine, MicroLine(UperLine.end, Lowerline.end), MicroLine(Lowerline.end, Lowerline.start), MicroLine(Lowerline.start, UperLine.start)) used[intersectlines_ind[j]] = 1 used[i] = 1 curdistinctpaths.append(newPath) return curdistinctpaths
def getValue(self, time): if 0 == len(self.times): return None t = time % self.getPeriod() i = bisect(self.times, t) if i == len(self.times): return lerp(self.times[i - 1], self.values[i - 1], self.times[0] + self.getPeriod(), self.values[0], t) return lerp(self.times[i - 1], self.values[i - 1], self.times[i], self.values[i], t)
def xMasKranz(self): dnitime = PtGetDniTime() sdlName = xMasKranzsdl sdl = PtGetAgeSDL() sdl.setFlags(sdlName, 1, 1) sdl.sendToClients(sdlName) dayNum = int(time.strftime('%d', time.gmtime(dnitime))) monthNum = int(time.strftime('%m', time.gmtime(dnitime))) yearNum = int(time.strftime('%Y', time.gmtime(dnitime))) dates = self.get_sunday_in_advent() adventweek = bisect(dates, datetime.date(yearNum, monthNum, dayNum)) sdl.setIndex(sdlName, 0, adventweek) PtDebugPrint(('codxMas: Current Adventweek is %d' % (adventweek)))
def get_testing_result(self, contours, color_img, bw_img): width, height = bw_img.shape #contours, hierarchy = cv2.findContours(bw_img,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE) #idx = 0 #just for writing to file needs is_text_flags = [] # used for sorting contour_left = [] filtered_contours = [] for cnt in contours: area = cv2.contourArea(cnt) if area > self.TEXT_AREA_THRESHOLD_LOWER and area < self.TEXT_AREA_THRESHOLD_UPPER: x,y,w,h = cv2.boundingRect(cnt) #sort contours by their left bound i = bisect(contour_left, x) contour_left.insert(i, x) filtered_contours.insert(i, cnt) feature1, feature2, feature3 = self.get_features(cnt, width, height) crossings = self.get_horizontal_crossing(cnt, bw_img, color_img) feature1 = np.array([feature1], np.float32) feature2 = np.array([feature2], np.float32) feature3 = np.array([feature3], np.float32) feature4 = np.array(crossings, np.float32) is_shape1 = self.svm1.predict(feature1) is_shape2 = self.svm2.predict(feature2) is_shape3 = self.svm3.predict(feature3) is_shape4 = self.svm4.predict(feature4) #is_text = not (is_shape1 and is_shape2 and is_shape3) is_text = not (is_shape3 and is_shape4 and is_shape1 and is_shape2) is_text_flags.append(is_text) return filtered_contours, is_text_flags
from math import tan from bisect import * from rootsearch import * f = lambda x: x - tan(x) a, b, dx = (0, 20, 0.01) print('The roots are:') while True: x1, x2 = rootsearch(f, a, b, dx) if x1 != None: a = x2 root = bisect(f, x1, x2, 1) if root != None: print(root) else: print('\ndone!') break input('Press return to exit')
def FindPeakMutation(inputfile,outputfile,outputfileUTR3,tmp_ref_file,search_radius,promoter_radius,promoter_radius2,genome,adjacent,pwd,SNP,PromoterStop,NearestTwoDirection,UTR3): print 'python process start:', datetime.now() print 'Load Reference' ####################################################### #update 05182015, reverse strand of reference gtf for UTR3 fout_reftmp = open(tmp_ref_file, 'w') for line in open(pwd + 'gencode.' + genome + '.annotation_GENE_GTF.txt', 'r'): line = line.strip().split('\t') if line[6]=='+': line[6] = '-' elif line[6]=='-': line[6] = '+' else: line[6] = '.' for i in xrange(0,(len(line)-1)): fout_reftmp.write(line[i] + '\t') fout_reftmp.write(line[(len(line)-1)] + '\n') fout_reftmp.close() ####################################################### # Formatted output. def out_string(peak, M, record, comment): if record[6] == '+': distance = a - M else: distance = b - M return out_string_v2(peak, distance, record, comment) def out_string_exon(peak, M, record, comment): ''' if m<exona and n<exonb: distance = n-exona if m>=exona and n<exonb: distance = n-m if m>=exona and n>=exonb: distance = exonb-m if m<exona and n>=exonb: distance = exonb-exona ''' distance = [exona,exonb] return out_string_v2(peak, distance, record, comment) def out_string_intron(peak, M, record, comment): ''' if m<introna and n<intronb: distance = n-introna if m>=introna and n<intronb: distance = n-m if m>=introna and n>=intronb: distance = intronb-m if m<introna and n>=intronb: distance = intronb-introna ''' distance = [introna,intronb] return out_string_v2(peak, distance, record, comment) def out_string_cds(peak, M, record, comment): distance = [cdsa,cdsb] return out_string_v2(peak, distance, record, comment) def out_string_utr(peak, M, record, comment): distance = [utra,utrb] return out_string_v2(peak, distance, record, comment) def out_string_v2(peak, distance, record, comment): chromo, source, TSS, TTS, strand = record[0], record[1], record[3], record[4], record[6] gene_name = record[8].strip().split(';')[1].strip().split(' ')[1][1:-1] transcript_id = record[8].strip().split(';')[0].strip().split(' ')[1][1:-1] gnote = reduce(lambda x, y: x + '\t' + y, [chromo, TSS, TTS, strand, gene_name, source, transcript_id]) return (peak + '\t' + str(n - m) + '\t' + str(distance) + '\t' + comment + '\t' + gnote) ######load exon, intron, geneid index information geneid_set =[] exonband =[] intronband =[] cdsband =[] utrband =[] for line in open(pwd + r'/GENCODE_' + genome + '_EXONinfo.txt','r'): line = ast.literal_eval(line.strip().split('\t')[1]) exonband.append(line) for line in open(pwd + r'/GENCODE_' + genome + '_INTRONinfo.txt','r'): line = ast.literal_eval(line.strip().split('\t')[1]) intronband.append(line) for line in open(pwd + r'/GENCODE_' + genome + '_CDSinfo.txt','r'): line = ast.literal_eval(line.strip().split('\t')[1]) cdsband.append(line) for line in open(pwd + r'/GENCODE_' + genome + '_UTRinfo.txt','r'): line = ast.literal_eval(line.strip().split('\t')[1]) utrband.append(line) for line in open(pwd + r'/GENCODE_' + genome + '_GENEID_index.txt','r'): line = ast.literal_eval(line.strip().split('\t')[0]) geneid_set.append(line) #index gemeid_set genemap=dict() geneset=set() for i in xrange(len(geneid_set)): key=geneid_set[i][1] genemap[key] = geneid_set[i][0] geneset.add(key) print 'Check Reference files' ''' PURPOSE Read and parse transcript file. PARAMETERS TRANSCRIPT List of chromosomes; each chromosome itself is a list of transcript strand. TRANSCRIPTLeft List of chromosomes; each chromosome itself is a list of lower index of transcript strand. ''' TRANSCRIPT = [] TRANSCRIPTLeft = [] transcript_chrm = {} ichrm = newChrmId = -1 ##########update 05182015,UTR3 reference if UTR3 == False: reference_basedon_UTR = pwd + 'gencode.' + genome + '.annotation_GENE_GTF.txt' elif UTR3 == True: reference_basedon_UTR = tmp_ref_file ########## for line in open(reference_basedon_UTR, 'r'): line = line.strip().split('\t') if line[0][3:] in transcript_chrm: ichrm = transcript_chrm[ line[0][3:] ] else: newChrmId += 1 ichrm = newChrmId transcript_chrm[ line[0][3:] ] = ichrm TRANSCRIPT.append([]) TRANSCRIPTLeft.append([]) TRANSCRIPT[ichrm].append( line ) TRANSCRIPTLeft[ichrm].append( int(line[3]) ) ''' PURPOSE Sort transcript by strand end id. PARAMETERS TRANSCRIPTRightSort List of chromosomes; each chromosome sorted by strand right location. TRANSCRIPTRight List of chromosomes; each chromosome itself is a list of right location. TRANSCRIPTRightId List of chromosomes; each chromosome itself is a list of exon id in the file. ''' TRANSCRIPTRightSort = [] TRANSCRIPTRight = [] TRANSCRIPTRightID = [] for transcript in TRANSCRIPT: TRANSCRIPTRightSort.append([]) TRANSCRIPTRight.append([]) TRANSCRIPTRightID.append([]) for i in xrange(len(transcript)): TRANSCRIPTRightSort[-1].append( (int(transcript[i][4]), i) ) TRANSCRIPTRightSort[-1].sort(key = lambda x: x[0]) for key in TRANSCRIPTRightSort[-1]: TRANSCRIPTRight[-1].append(key[0]) TRANSCRIPTRightID[-1].append(key[1]) ''' PURPOSE Prepare lists of begin id & key for all genes and for protein genes. PARAMETERS TSBEGIN List of chromosome; each chromosome itself a list of (arrow begin index, line number in the transcript). TSBEGINKEY List of chromosome; each chromosome itself a list of (arrow begin index). TSLINEID List of chromosome; each chromosome itself a list of (line number in the transcript). PCBEGIN, PCBEGINKEY, PCLINEID Same as above, but for protein codings. ''' TSBEGIN = [] TSBEGINKEY = [] TSLINEID = [] PCBEGIN = [] PCBEGINKEY = [] PCLINEID = [] for transcript in TRANSCRIPT: TSBEGIN.append([]) TSBEGINKEY.append([]) TSLINEID.append([]) PCBEGIN.append([]) PCBEGINKEY.append([]) PCLINEID.append([]) for i in xrange(len(transcript)): record = transcript[i] # Append to gene list. if record[6] == '+': begin = int(record[3]) else: begin = int(record[4]) TSBEGIN[-1].append( (begin, i) ) # Append to protein coding gene list. if record[1] == 'protein_coding': PCBEGIN[-1].append( (begin, i) ) TSBEGIN[-1].sort(key = lambda x: x[0]) for key in TSBEGIN[-1]: TSBEGINKEY[-1].append(key[0]) TSLINEID[-1].append(key[1]) PCBEGIN[-1].sort(key = lambda x: x[0]) for key in PCBEGIN[-1]: PCBEGINKEY[-1].append(key[0]) PCLINEID[-1].append(key[1]) ''' PURPOSE Parse the peak file. ALGORITHM (1) Identify exon; (2) Identify intron; CDS;UTR (3) Find nearest protein coding; (4) Find neighbors within a range. ''' print 'fixed reference done:', datetime.now() fout = open(outputfile,'w') print 'Start Annotation' count = -1 for line in open(inputfile, 'r'): # Neglect the comment line. count += 1 if count == 0: sline =line.strip().split('\t') fout.write(sline[0] + '\t' +sline[1] + '\t' +sline[2] + '\t' +sline[3] + '\t' ) fout.write('PeakLength' + '\t' + 'peakMtoStart_Overlap' + '\t' + 'type' + '\t' + 'BidirenctionalRegion' + '\t') fout.write('Chr' + '\t' + 'TSS' + '\t' + 'TTS' + '\t' + 'strand' + '\t' + 'gene_name' + '\t' + 'source'+ '\t' + 'transID' + '\n') continue # --------------------------------------- # Parse the peak information (m < n). # # middle # | # ---m---------n--- # # --------------------------------------- line = line.strip().split() pkchrm = line[1] if pkchrm[0:3].upper() == 'CHR': pkchrm = pkchrm[3:] m, n = int(line[2]), int(line[3]) middle = (m + n) / 2 peakLeft = m peakRight = n # The information about the peak to be printed. pkhd = reduce(lambda x, y: x + '\t' + y, line[0:4]) # --------------------------------------- # Annotate exon;intron # --------------------------------------- # Check if the chromosome has been registered. if pkchrm not in transcript_chrm: print pkchrm, 'Chromosome not registered' continue else: ichrm = transcript_chrm[ pkchrm ] transcript = TRANSCRIPT[ichrm] transcriptLeft = TRANSCRIPTLeft[ichrm] transcriptRight = TRANSCRIPTRight[ichrm] transcriptRightID = TRANSCRIPTRightID[ichrm] # A set holding everything that has been marked. myNeighbor = set() # Find the search range. iMin = bisect_left(transcriptRight, peakLeft) setRight = set() for i in xrange(iMin, len(transcriptRight)): setRight.add(transcriptRightID[i]) iMax = bisect_right(transcriptLeft, peakRight, lo=iMin+1) setLeft = set(range(iMax)) # Search the range. found_exon_protein = False found_intron_protein = False #search exon region for transcriptID in setRight.intersection(setLeft): record = transcript[transcriptID] a, b = int(record[3]), int(record[4]) if n < a or m > b: continue else: geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1] if len(exonband[genemap[geneid_B]])>0: for i in xrange(len(exonband[genemap[geneid_B]])): exona=exonband[genemap[geneid_B]][i][0] exonb=exonband[genemap[geneid_B]][i][1] if n < exona or m > exonb: pass else: fout.write(out_string_exon(pkhd, middle, record, 'Exon\tNA')+ '\n') #Check when exon finds, make stop label for SNP and adjacent=True if record[1] == 'protein_coding': if adjacent == True or SNP == True: found_exon_protein = True myNeighbor.add(transcriptID) #search intron region if found_exon_protein == False: for transcriptID in setRight.intersection(setLeft): record = transcript[transcriptID] a, b = int(record[3]), int(record[4]) if n < a or m > b: continue else: geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1] if len(intronband[genemap[geneid_B]])>0: for i in xrange(len(intronband[genemap[geneid_B]])): introna=intronband[genemap[geneid_B]][i][0] intronb=intronband[genemap[geneid_B]][i][1] if n < introna or m > intronb: pass else: fout.write(out_string_intron(pkhd, middle, record, 'Intron\tNA')+ '\n') #Check when intron finds, make stop label for adjacent=True if record[1] == 'protein_coding': if adjacent == True: found_intron_protein = True myNeighbor.add(transcriptID) #search cds region if found_exon_protein == False and found_intron_protein == False: for transcriptID in setRight.intersection(setLeft): record = transcript[transcriptID] a, b = int(record[3]), int(record[4]) if n < a or m > b: continue else: geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1] if len(cdsband[genemap[geneid_B]])>0: for i in xrange(len(cdsband[genemap[geneid_B]])): cdsa=cdsband[genemap[geneid_B]][i][0] cdsb=cdsband[genemap[geneid_B]][i][1] if n < cdsa or m > cdsb: pass else: fout.write(out_string_cds(pkhd, middle, record, 'cds\tNA')+ '\n') myNeighbor.add(transcriptID) #search utr region if found_exon_protein == False and found_intron_protein == False: for transcriptID in setRight.intersection(setLeft): record = transcript[transcriptID] a, b = int(record[3]), int(record[4]) if n < a or m > b: continue else: geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1] if len(utrband[genemap[geneid_B]])>0: for i in xrange(len(utrband[genemap[geneid_B]])): utra=utrband[genemap[geneid_B]][i][0] utrb=utrband[genemap[geneid_B]][i][1] if n < utra or m > utrb: pass else: fout.write(out_string_utr(pkhd, middle, record, 'utr\tNA')+ '\n') myNeighbor.add(transcriptID) ############################# #search internal-promoter region if found_exon_protein == False and found_intron_protein == False: for transcriptID in setRight.intersection(setLeft): record = transcript[transcriptID] a, b = int(record[3]), int(record[4]) if n < a or m > b: continue else: if record[6] == '+': if m < a + promoter_radius2: fout.write(out_string(pkhd, middle, record, 'Promoter_internal\tNA')+ '\n') myNeighbor.add(transcriptID) elif record[6] == '-': if n > b - promoter_radius2: fout.write(out_string(pkhd, middle, record, 'Promoter_internal\tNA')+ '\n') myNeighbor.add(transcriptID) ##################################### # If found protein_coding. Annotate the next peak. if found_exon_protein or found_intron_protein: continue # -------------------------------------------- # Search promoter in protein coding genes. # -------------------------------------------- pcbeginkey = PCBEGINKEY[ichrm] pclineid = PCLINEID[ichrm] pcid = bisect(pcbeginkey, middle) # Find the left promoter. i = pcid - 1 found_left_promoter, left_d = False, 0 left_promoter = [] while i >= 0 and not found_left_promoter and left_d < promoter_radius: record = transcript[ pclineid[i] ] a, b = int(record[3]), int(record[4]) left_d = m - b if left_d < promoter_radius: if record[6] == '-': found_left_promoter = True left_promoter.append( record ) i = i - 1 # Print the non-nearest left promoters. for record in left_promoter[1:]: a, b = int(record[3]), int(record[4]) #print out_string(pkhd, middle, record, 'Promotor_L\tN') fout.write(out_string(pkhd, middle, record, 'Promoter_L\tN')+ '\n') # Find the right promoter. i = pcid found_right_promoter, right_d = False, 0 right_promoter = [] while i < len(pcbeginkey) and not found_right_promoter and right_d < promoter_radius: record = transcript[ pclineid[i] ] a, b = int(record[3]), int(record[4]) right_d = a - n if right_d < promoter_radius: if record[6] == '+': found_right_promoter = True right_promoter.append( record ) i = i + 1 # Print the non-nearest right promoters. for record in right_promoter[1:]: a, b = int(record[3]), int(record[4]) #print out_string(pkhd, middle, record, 'Promotor_R\tN') fout.write(out_string(pkhd, middle, record, 'Promoter_R\tN') + '\n') # If promoters are found on both sides. if found_left_promoter and found_right_promoter: record = left_promoter[0] a, b = int(record[3]), int(record[4]) #print out_string(pkhd, middle, record, 'Promotor_L\tY') fout.write(out_string(pkhd, middle, record, 'Promoter_L\tY') + '\n') record = right_promoter[0] a, b = int(record[3]), int(record[4]) #print out_string(pkhd, middle, record, 'Promotor_R\tY') fout.write(out_string(pkhd, middle, record, 'Promoter_R\tY')+ '\n') # Find the right nearest neighbor if no right promoter found. if found_left_promoter and not found_right_promoter: is_right_bidirectional = False i = pcid found_right = False while i < len(pcbeginkey) and not found_right: record = transcript[ pclineid[i] ] a, b = int(record[3]), int(record[4]) if min(a, b) > n: found_right = True if record[6] == '+': is_right_bidirectional = True #print out_string(pkhd, middle, record, 'Nearest_R\tY') fout.write(out_string(pkhd, middle, record, 'Nearest_R\tY') + '\n') else: #print out_string(pkhd, middle, record, 'Nearest_R\tN') fout.write(out_string(pkhd, middle, record, 'Nearest_R\tN') + '\n') else: i = i + 1 record = left_promoter[0] a, b = int(record[3]), int(record[4]) if is_right_bidirectional: #print out_string(pkhd, middle, record, 'Promotor_L\tY') fout.write(out_string(pkhd, middle, record, 'Promoter_L\tY') + '\n') else: #print out_string(pkhd, middle, record, 'Promotor_L\tN') fout.write(out_string(pkhd, middle, record, 'Promoter_L\tN') + '\n') # Find the left nearest neighbor if no left promoter found. if not found_left_promoter and found_right_promoter: is_left_bidirectional = False i = pcid - 1 found_left = False while i >= 0 and not found_left: record = transcript[ pclineid[i] ] a, b = int(record[3]), int(record[4]) if max(a, b) < m: found_left = True if record[6] == '-': is_left_bidirectional = True #print out_string(pkhd, middle, record, 'Nearest_L\tY') fout.write(out_string(pkhd, middle, record, 'Nearest_L\tY') + '\n') else: #print out_string(pkhd, middle, record, 'Nearest_L\tN') fout.write(out_string(pkhd, middle, record, 'Nearest_L\tN') + '\n') else: i = i - 1 record = right_promoter[0] a, b = int(record[3]), int(record[4]) if is_left_bidirectional: #print out_string(pkhd, middle, record, 'Promotor_R\tY') fout.write(out_string(pkhd, middle, record, 'Promoter_R\tY') + '\n') else: #print out_string(pkhd, middle, record, 'Promotor_R\tN') fout.write(out_string(pkhd, middle, record, 'Promoter_R\tN') + '\n') # Stop here if any promoter is found and if PromoterStop index is True, else if no promoter is found/or PromoterStop index is False, contitue for further # search in search radius. if PromoterStop == False: found_left_promoter = PromoterStop found_right_promoter = PromoterStop elif PromoterStop == True: pass if found_left_promoter or found_right_promoter: continue # ---------------------------------------------------- # Search nearest neighbor in protein coding genes. # ---------------------------------------------------- # Find the left nearest transcript. i = pcid - 1 found_left = False while i >= 0 and not found_left: lineL = pclineid[i] record = transcript[lineL] a, b = int(record[3]), int(record[4]) if max(a, b) < m: found_left = True else: i = i - 1 # Find the right nearest transcript. i = pcid found_right = False while i < len(pcbeginkey) and not found_right: lineR = pclineid[i] record = transcript[lineR] a, b = int(record[3]), int(record[4]) if min(a, b) > n: found_right = True else: i = i + 1 # Check if is bidirectional. if found_left and found_right: recordL = transcript[lineL] recordR = transcript[lineR] if recordL[6] == '-' and recordR[6] == '+': a, b = int(recordL[3]), int(recordL[4]) myNeighbor.add(lineL) #print out_string(pkhd, middle, recordL, 'Nearest_L\tY') fout.write(out_string(pkhd, middle, recordL, 'Nearest_L\tY') + '\n') a, b = int(recordR[3]), int(recordR[4]) myNeighbor.add(lineR) #print out_string(pkhd, middle, recordR, 'Nearest_R\tY') fout.write(out_string(pkhd, middle, recordR, 'Nearest_R\tY') + '\n') else: a, b = int(recordL[3]), int(recordL[4]) if recordL[6] == '+': dL = a - middle else: dL = b - middle a, b = int(recordR[3]), int(recordR[4]) if recordR[6] == '+': dR = a - middle else: dR = b - middle if NearestTwoDirection == False: if abs(dL) < abs(dR): #print out_string_v2(pkhd, dL, recordL, 'Nearest_L\tN') fout.write(out_string_v2(pkhd, dL, recordL, 'Nearest\tN') + '\n') myNeighbor.add(lineL) else: #print out_string_v2(pkhd, dR, recordR, 'Nearest_R\tN') fout.write(out_string_v2(pkhd, dR, recordR, 'Nearest\tN') + '\n') myNeighbor.add(lineR) if NearestTwoDirection == True: fout.write(out_string_v2(pkhd, dL, recordL, 'Nearest_L\tN') + '\n') fout.write(out_string_v2(pkhd, dR, recordR, 'Nearest_R\tN') + '\n') myNeighbor.add(lineL) myNeighbor.add(lineR) elif found_left: myNeighbor.add(lineL) record = transcript[lineL] a, b = int(record[3]), int(record[4]) myNeighbor.add(lineL) #print out_string(pkhd, middle, record, 'Nearest_L\tN') fout.write(out_string(pkhd, middle, record, 'Nearest\tN') + '\n') elif found_right: myNeighbor.add(lineR) record = transcript[lineR] a, b = int(record[3]), int(record[4]) myNeighbor.add(lineR) #print out_string(pkhd, middle, record, 'Nearest_R\tN') fout.write(out_string(pkhd, middle, record, 'Nearest\tN')+ '\n') # ------------------------------------------- # Print everything within searching radius. # ------------------------------------------- tsbeginkey = TSBEGINKEY[ichrm] tslineid = TSLINEID[ichrm] lower_bound = max(0, middle - search_radius) upper_bound = min(tsbeginkey[-1], middle + search_radius) lower_id = bisect(tsbeginkey, lower_bound) upper_id = bisect(tsbeginkey, upper_bound, lo = lower_id) for key_id in xrange(lower_id, upper_id): line_id = tslineid[key_id] if line_id not in myNeighbor: distance = middle - tsbeginkey[key_id] record = transcript[line_id] #print out_string_v2(pkhd, distance, record, 'Neighbor\tN') fout.write(out_string_v2(pkhd, distance, record, 'Neighbor\tN') + '\n') fout.close() ####update 05182015 UTR3 fout_utr = open(outputfileUTR3,'w') if UTR3 == False: for line in open(outputfile,'r'): fout_utr.write(line) elif UTR3 == True: nline =0 for line in open(outputfile,'r'): nline +=1 if nline ==1: fout_utr.write(line) else: line = line.strip().split('\t') if line[11]=='+': line[11] = '-' elif line[11]=='-': line[11] = '+' else: line[11] = '.' for i in xrange(0,(len(line)-1)): fout_utr.write(line[i] + '\t') fout_utr.write(line[(len(line)-1)] + '\n') fout_utr.close() #### print 'Finish Annotation' print 'python process end:', datetime.now()
print >> sys.stderr, "failed extracting acc info for %s" % r.id for file in os.listdir(igr_dirname): #acc = file[:file.find('.')] for r in SeqIO.parse(open(igr_dirname+'/'+file),'fasta'): m = rex.match(r.id) acc = m.group(1) i = acc.find('.') if i > 0: acc = acc[:i] ok_random = False if acc in ribo_acc_set: if no_ambiguous_code(r.seq.tostring()): start = int(m.group(2)) end = int(m.group(3)) if min_len <= end-start <= max_len: i = bisect(ribo_acc_set[acc],(start,end)) if i == 0: ok_random = end < ribo_acc_set[acc][0][0] elif i == len(ribo_acc_set[acc])-1: ok_random = ribo_acc_set[acc][-1][1] < start elif i <= len(ribo_acc_set[acc])-2: ok_random = ribo_acc_set[acc][i-1][1] < start and end < ribo_acc_set[acc][i][0] else: ok_random = ribo_acc_set[acc][i-1][1] < start else: ok_random = True if ok_random: print ">%s\n%s" % (r.id,r.seq.tostring())
from bisect import * p = [1]*300003 p[0] = p[1] = 0 for i in xrange(2,300003): if p[i] and i%7 in (1,6): p[2*i::i] = [0]*len(p[2*i::i]) else: p[i] = 0 p = [i for i in xrange(2,300003) if p[i]] while 1: N = int(raw_input()) if N == 1: break ans = [i for i in p[:bisect(p,N+1)] if N%i == 0] print "%d: %s"%(N," ".join(map(str,ans)))
#coding=utf-8 import bisect ''' 利用二分法维持sorted的list bisect(list, item[, lo[, hi]]) list为sorted的, lo和hi为list的边界, 默认为整个list ''' aray = [1, 2, 3, 4, 5] index = bisect.bisect_left(aray, 3) #遇到相等的item返回左边的index index = bisect.bisect_right(aray, 3) #遇到相等的item返回右边的index #通过bisect得到index后,调用list.insert()实现 bisect.insort_left(aray, 2.5) #遇到相等的item插入左边的index bisect.insort_right(aray, 3) #遇到相等的item插入右边的index
def add(self, value): i = bisect(self, value) if i==0 or self[i-1] <> value: self.insert(i, value)
#!/usr/bin/env python def bisect(alist, word): left = 0 right = len(alist) - 1 while left < right: now = (left + right) / 2 #print left, ' ', right, ' ', now, ' ', alist[now], ' ', word if alist[now] == word: return now elif alist[now] > word: right = now - 1 else: left = now + 1 return None word_list = [] dict_file = open('../data/words.txt') for e in dict_file: word_list.append(e.strip()) print bisect(word_list, 'boy') import bisect print bisect.bisect(word_list, 'boy') - 1
from bisect import * n = [1, 2, 3, 4, 5] print bisect(n, 1) print bisect(n, 5)
from bisect import * R = 200001 p = [1]*R p[0] = p[1] = 0 for i in xrange(2,int(R**0.5)): if p[i]: p[2*i::i] = [0]*(len(p[2*i::i])) p = [i for i in xrange(2,R) if p[i]] K = int(raw_input()) N = int(raw_input()) ans = mx = 0 H = [] for i in xrange(bisect(p,K),bisect(p,N)): s = str(p[i]) while len(s) > 1: s = str(sum(map(int,list(s)))) if s in H: if len(H) > mx: mx,ans = len(H),p[i-len(H)] elif len(H) == mx: ans = max(ans, p[i-len(H)]) while s in H: H.pop(0) H.append(s) else: if len(H) > mx: ans = p[i+1-len(H)] elif len(H) == mx: ans = max(ans, p[i+1-len(H)]) print ans
def grade(score, breakpoints=[60,70,80,90], grades="FDCBA"): i=bisect(breakpoints, score) return grades[i]
def getValue(self, time): i = bisect(self.times, time % self.getPeriod()) return self.values[i - 1]
def triplets(a, b, c): a, c = sorted(set(a)), sorted(set(c)) return sum([bisect(a, x) * bisect(c, x) for x in set(b)])
def generate(self): for i in range(0, len(self.list3)): pos = bisect(self.list4, self.list3[i]) for e in range(pos, len(self.list4)): yield self.list1[i] + self.list2[e]
return primes from bisect import * times = input() params = [] m = 0 for i in range(times): a = input() m = max(m, a) params.append(a) ar = get_primes(m) ar.sort() ar2 = [] accu = 0 for i in ar: accu += i ar2.append(accu) for item in params: n = item index = bisect(ar, n) if index <= 0: print 0 else: print ar2[index - 1]