Пример #1
0
 def getHits(self, timestamp: int) -> int:
     """
     Return the number of hits in the past 5 minutes.
     @param timestamp - The current timestamp (in seconds granularity).
     """
     i = bisect(self.t, [timestamp-300, float('inf')])-1
     j = bisect(self.t, [timestamp,float('inf')])-1
     return self.t[j][1] - self.t[i][1]
Пример #2
0
def blah(s, e):
    print bisect_left(s, e)
    print bisect_left(s, e, 0)
    print bisect_left(s, e, 0, len(s))
    print bisect_right(s, e)
    print bisect(s, e)

    insort_left(s, e)
    insort_right(s, e)
    insort(s, e)
    print s
Пример #3
0
def blah(s, e):
    print bisect_left(s, e)
    print bisect_left(s, e, 0)
    print bisect_left(s, e, 0, len(s))
    print bisect_right(s, e)
    print bisect(s, e)

    insort_left(s, e)
    insort_right(s, e)
    insort(s, e)
    print s
Пример #4
0
 def getMachine(self,key):
     h = self.hashfn(key)
     i = bisect(self._hring, h)
     if i >= len(self._hring): i = 0
     m = self._hm[self._hring[i]]
     #print 'key %s hash %s picking machine %s machine_hash %s index %s' % (key,h,m,self._hring[i],i)
     return m
Пример #5
0
def findMin(end_dict=[], var=[1, 1, 1, 1]):
    """Main algorithm for solving problem
    input 'end_dict': dictionary where keys are the unique packet end points
    and value is a list of associated starting points
    @input 'var': [n, size cost, constant cost, #of packets]
    @output: min time to download data of size var[0] rounded to 3 decimals
    also prints out this time (if it exists) rounded to 3 decimals
    """

    times = {}
    times[0] = float(0)
    ends = end_dict.keys()
    ends.sort()

    for elem in ends:
        times[elem] = float("inf")
        right = bisect(ends, elem)
        for ii in end_dict[elem]:
            if(ii == 0):  # for any n, the best solution will always be (0,n)
                times[elem] = float(elem)/var[2] + 2*var[1]
            else:
                ii_const = float(elem - ii)/var[2]+2*var[1]
                for j in range(bisect_left(ends, ii), bisect_left(ends, elem)):
                    j_const = ii_const + times[ends[j]]
                    if(j_const < times[elem]):
                        times[elem] = j_const

    if((var[0] in times) and (times[var[0]] != float("inf"))):
        print '{0:.3f}'.format(times[var[0]])
        return(round(times[var[0]], 3))
    else:
        return(-1)
Пример #6
0
def p_e():
    s = input()
    t = input()
    l = defaultdict(list)

    for i, c in enumerate(list(s)):
        l[c].append(i + 1)

    for c in list(t):
        if c not in l.keys():
            print(-1)
            exit()

    N = 0
    current = -1
    for c in list(t):
        R = l[c]

        x = bisect(R, current)

        if len(R) == x:
            N += 1
            current = R[0]
        else:
            current = R[x]

    print(N * len(s) + current)
def closest_sorted(a, x, k):

	boundaryValue = sys.maxint

	#add boundary values into the list for right and left
	a.append(boundaryValue)
	a.insert(0, boundaryValue*-1)

	#insert the query element into the list
	#use build in binary search to get the index that it should be inserted into
	insertPosition=bisect(a, x)
	a.insert(insertPosition, x)

	result = []
	left = insertPosition - 1
	right = insertPosition + 1

	
	while len(result) != k:

		if abs(a[right] - x) <= abs(a[left] - x):
			result.append(a[right])
			right += 1
		else:
			result.append(a[left])
			left -= 1

	return result
Пример #8
0
    def __init__(self, waveReader, onsetSamples):
        self.samples = waveReader.channels[0]
        self.segments = []
        self.onsets = [0]
        
        crossings = [i for i in xrange(len(self.samples) - 1) 
            if self.samples[i] < 0 and self.samples[i+1] >= 0]
        
        for onset in onsetSamples:
            self.onsets.append(crossings[bisect(crossings, onset) - 1])

        self.onsets = sorted(list(set(self.onsets)))

        for i in xrange(len(self.onsets) - 1):
            s = Segment(self.samples[self.onsets[i]:self.onsets[i+1]])
            self.segments.append(s)

        simMatrix = self.findNeighborMatrix()

        smCopy = array(simMatrix, copy=True)
        fill_diagonal(smCopy, 0)

        sims = sorted(list(smCopy.reshape(-1)), reverse=True)[:TARGET_NUM_JUMPS]
        SIMILARITY_THRESHOLD = sims[-1]

        print 'Using similarity threshold = ', SIMILARITY_THRESHOLD

        for i in xrange(len(self.segments)):
            self.segments[i].neighbors = [j for j in xrange(len(simMatrix[i])) 
                if simMatrix[i][j] >= SIMILARITY_THRESHOLD and abs(i-j) > 10]
Пример #9
0
def prkdtree(points, xylim, center=None, depth=0):
    if len(points)==1:
        return PRKDTreeNode(xyrange=xylim,
                            center=center,
                            point=points[0],
                            left=None,
                            right=None)
    if len(points)==0:
        return PRKDTreeNode(xyrange=xylim,
                            center=center,
                            point=None,
                            left=None,
                            right=None)
    k = len(points[0])
    axis = depth % k
    pmid = (xylim[axis][1]+xylim[axis][0])/2.0
    points.sort(key=lambda points:points[axis])
    P = [p[axis] for p in points]
    pivot = bisect(P, pmid) # includes all <=
    xmin,xmax=xylim[0][0], xylim[0][1]
    ymin,ymax=xylim[1][0], xylim[1][1]
    rangel = [ [xmin, xmax], [ymin, ymax] ]
    rangel[axis][1] = pmid
    ranger = [ [xmin, xmax], [ymin, ymax] ]
    ranger[axis][0] = pmid
    axis2 = (depth+1) % k
    pmidl = (rangel[axis2][0]+rangel[axis2][1])/2.0
    pmidr = (ranger[axis2][0]+ranger[axis2][1])/2.0
    return PRKDTreeNode(xyrange = xylim,
                        center = pmid,
                        point = None,
                        left=prkdtree(points[:pivot],
                                      rangel,pmidl,depth+1),
                        right=prkdtree(points[pivot:],
                                       ranger,pmidr,depth+1))
def insertion_sort(A):
    for j in range(1, len(A)):
        key = A[j]
        index = bisect(A,key,hi=j)
        for i in range(j,index,-1):
            A[i] = A[i-1]
        A[index] = key
    return A
Пример #11
0
    def setValue(self, time, value):
        i = bisect(self.times, time)

        if i < len(self.times) and self.times[i] == time:
            self.values[i] = value
        else:
            self.times.insert(i, time)
            self.values.insert(i, value)
Пример #12
0
def find_next_joystick_release_time(trigger_time, mission):
    start_idx = bisect(mission.joy_msgs.times, trigger_time)
    for idx in range(start_idx, len(mission.joy_msgs) - 1):
        if mission.joy_msgs.msgs[idx +
                                 1].buttons[4] == 1 and mission.joy_msgs.msgs[
                                     idx + 1].buttons[4] == 0:
            return mission.joy_msgs.times[idx]
    return mission.joy_msgs.times[-1]
Пример #13
0
def f():
    r, z = input, int
    M, N = r().split()
    s = [z(r()) for x in '0' * z(M)]
    m = max(s)
    p = [y for y in [z(r()) for x in '0' * z(N)] if y < m]
    l = sorted(y * p[x] for x in range(len(p)) for y in p[x:])
    for i in s:
        print(l[bisect(l, i - 1)] - i)
Пример #14
0
def bisect(targetWord):
	fin = open('words.txt')
	tempList = []
	length = len(tempList)
	for line in fin:
	    word = line.strip()
	    tempList.append(word)
	#if the word is exactly in the middle, first try:
	if targetWord == tempList[length/2]:
		return tempList[length/2]
	elif targetWord > tempList[length/2]:
		bisect(targetWord)
	elif targetWord < tempList[length/2]:
		bisect(targetWord)
	elif targetWord == tempList[i]:
		return i
	else:
		return None	
Пример #15
0
def solve():
    primes = find_prime5(10 ** n)
    primes = primes[bisect(primes, 10 ** (n - 1)):]
    set_of_primes = set(primes)
    pow_of_10 = [10 ** i for i in range(n)]
    #mul_of_pow_of_10 = [[pow_of_10[i] * x for i in range(n)] for x in range(10)]
    ls = [x for x in range(n - 1, -1, -1)]

    def swap(n, combo, i):
        pass
        
    families = []
    processed = set()
    
    for p in primes:
        ttsp = []
        for i in range(n):
            ttsp.append(p % 10 * pow_of_10[i])
            p //= 10
        ttsp.reverse()
        
        cool = False
        for combo in combinations(ls, k):
            tsp = ttsp[:]
            mask = 0
            for x in combo:
                mask += pow_of_10[n - x - 1]
                tsp[x] = 0
            masked = sum(tsp)
            if (masked, mask) in processed:
                continue
            else:
                processed.add((masked, mask))
            family = []
            for i in range(10):
                pp = masked + mask * i
                if pp in set_of_primes:
                    family.append(pp)
                    if len(family) == l:
                        families.append(family)
                        cool = True
                        break
            if cool:
                break
                
    if families:
        smallest = []
        for family in families:
            print(family)
            if not smallest or family[0] < smallest[0]:
                smallest = family
        
        #if len(family) == l:                
        for prime in smallest:
            #print(prime, end=' ')
            print(prime)
Пример #16
0
def send_lines_at(time, channel,  data):
        """send_line_at(time, channel,  data)   send out data at time on channel"""
        g = DfGlobal()
        buffer = g["data_channels_buffer"]
        lines = [(time+g["viz_lag"],channel,dta) for dta in data.split("\n")]
        position = bisect(buffer,(time+g["viz_lag"],channel,""))
        new = buffer[:position]
        new.extend(lines)
        new.extend(buffer[position:])
        g["data_channels_buffer"] = new
def insertion_sort(A):
    for j in range(1, len(A)):
        key = A[j]
        k = bisect(A, key, 0, j)
        for i in range(j, k - 1, -1):
            A[i] = A[i - 1]
        A[k] = key
        #A = A[:k] + [key] + A[k:j] + A[j+1:]

    return A
def solve(xs, field_length, num_ships, ship_length):
    def num_accomodate(size):
        return (size + 1) // (ship_length + 1)

    bombed = [0, field_length+1]
    holes = [field_length]

    for i, x in enumerate(xs, 1):
        pos = bisect(bombed, x)

        holes = bomed[max(q - p - 1, 0) for (p, q) in zip(h, h[1:])]
Пример #19
0
def nth(n):
    if n < 10:
        return n
    n -= 1    
    idx = bisect(base, n)
    b = base[idx - 1]
    
    remaining = n - b
    position = remaining // (idx + 1)
    num = 10 ** idx + position
    return int(str(num)[remaining % (idx + 1)])
Пример #20
0
def nth(n):
    if n < 10:
        return n
    n -= 1
    idx = bisect(base, n)
    b = base[idx - 1]

    remaining = n - b
    position = remaining // (idx + 1)
    num = 10**idx + position
    return int(str(num)[remaining % (idx + 1)])
Пример #21
0
def GetAccTime(tick, tempoList, tpb, prefixSum):
    ret = 0

    if (not hasattr(GetAccTime, "tickTable")):
        GetAccTime.tickTable = []
        for i in tempoList:
            GetAccTime.tickTable.append(i[1])

    index = bisect(GetAccTime.tickTable, tick)
    ret = prefixSum[index - 1] + mido.tick2second(
        tick - tempoList[index - 1][1], tpb, tempoList[index - 1][0])
    return ret
Пример #22
0
def createrect(curdistincthlines):
    curdistinctpaths = []
    curdistincthlines = sorted(curdistincthlines)
    length = len(curdistincthlines)
    used = length * [0]
    curdistincthlinesy = map(attrgetter('start.imag'), curdistincthlines)
    for i in range(0, length):
        if used[i] == 0:
            intersectlines = []
            intersectlines_ind = []
            for k in range(i + 1, length):
                if curdistincthlines[k].start.real == curdistincthlines[
                        i].start.real and curdistincthlines[
                            i].end.real == curdistincthlines[k].end.real:
                    intersectlines.append(curdistincthlines[k])
                    intersectlines_ind.append(k)
            intersectlinesy = map(attrgetter('start.imag'), intersectlines)
            nextstart = bisect(intersectlinesy, curdistincthlinesy[i])
            lens = len(intersectlines)
            if nextstart >= lens:
                continue
            nextend = bisect(intersectlinesy, intersectlinesy[nextstart])
            if nextend > lens:
                continue
            for j in range(nextstart, nextend):
                if used[intersectlines_ind[j]] == 0 and used[i] == 0:
                    if curdistincthlines[i].start.real == intersectlines[j].start.real and \
                            curdistincthlines[
                                i].end.real == \
                            intersectlines[j].end.real:
                        UperLine = curdistincthlines[i]
                        Lowerline = intersectlines[j]
                        newPath = Pattern(
                            UperLine, MicroLine(UperLine.end, Lowerline.end),
                            MicroLine(Lowerline.end, Lowerline.start),
                            MicroLine(Lowerline.start, UperLine.start))
                        used[intersectlines_ind[j]] = 1
                        used[i] = 1
                        curdistinctpaths.append(newPath)
    return curdistinctpaths
Пример #23
0
    def getValue(self, time):
        if 0 == len(self.times):
            return None

        t = time % self.getPeriod()
        i = bisect(self.times, t)

        if i == len(self.times):
            return lerp(self.times[i - 1], self.values[i - 1],
                        self.times[0] + self.getPeriod(), self.values[0], t)

        return lerp(self.times[i - 1], self.values[i - 1],
                    self.times[i], self.values[i], t)
Пример #24
0
 def xMasKranz(self):
     dnitime = PtGetDniTime()
     sdlName = xMasKranzsdl
     sdl = PtGetAgeSDL()
     sdl.setFlags(sdlName, 1, 1)
     sdl.sendToClients(sdlName)
     dayNum = int(time.strftime('%d', time.gmtime(dnitime)))
     monthNum = int(time.strftime('%m', time.gmtime(dnitime)))
     yearNum = int(time.strftime('%Y', time.gmtime(dnitime)))
     dates = self.get_sunday_in_advent()
     adventweek = bisect(dates, datetime.date(yearNum, monthNum, dayNum))
     sdl.setIndex(sdlName, 0, adventweek)
     PtDebugPrint(('codxMas: Current Adventweek is %d' % (adventweek)))
Пример #25
0
    def get_testing_result(self, contours, color_img, bw_img):
        width, height = bw_img.shape
        #contours, hierarchy = cv2.findContours(bw_img,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
        #idx = 0    #just for writing to file needs

        is_text_flags = []

        # used for sorting
        contour_left = []
        filtered_contours = []

        for cnt in contours:
            area = cv2.contourArea(cnt)
            if area > self.TEXT_AREA_THRESHOLD_LOWER and area < self.TEXT_AREA_THRESHOLD_UPPER:
                x,y,w,h = cv2.boundingRect(cnt)

                #sort contours by their left bound
                i = bisect(contour_left, x)
                contour_left.insert(i, x)
                filtered_contours.insert(i, cnt)

                feature1, feature2, feature3 = self.get_features(cnt, width, height)

                crossings = self.get_horizontal_crossing(cnt, bw_img, color_img)
                feature1 = np.array([feature1], np.float32)
                feature2 = np.array([feature2], np.float32)
                feature3 = np.array([feature3], np.float32)
                feature4 = np.array(crossings, np.float32)

                is_shape1 = self.svm1.predict(feature1)
                is_shape2 = self.svm2.predict(feature2)
                is_shape3 = self.svm3.predict(feature3)
                is_shape4 = self.svm4.predict(feature4)

                #is_text = not (is_shape1 and is_shape2 and is_shape3)
                is_text = not (is_shape3 and is_shape4 and is_shape1 and is_shape2)
                is_text_flags.append(is_text)

        return filtered_contours, is_text_flags
Пример #26
0
from math import tan
from bisect import *
from rootsearch import *


f = lambda x: x - tan(x)

a, b, dx = (0, 20, 0.01)

print('The roots are:')

while True:
    x1, x2 = rootsearch(f, a, b, dx)

    if x1 != None:
        a = x2
        root = bisect(f, x1, x2, 1)
        if root != None: print(root)
    else:
        print('\ndone!')
        break
input('Press return to exit')
def FindPeakMutation(inputfile,outputfile,outputfileUTR3,tmp_ref_file,search_radius,promoter_radius,promoter_radius2,genome,adjacent,pwd,SNP,PromoterStop,NearestTwoDirection,UTR3):
   print 'python process start:', datetime.now()
   print 'Load Reference'
   #######################################################
   #update 05182015, reverse strand of reference gtf for UTR3
   fout_reftmp = open(tmp_ref_file, 'w')
   for line in open(pwd + 'gencode.' + genome + '.annotation_GENE_GTF.txt', 'r'):
      line = line.strip().split('\t')
      if line[6]=='+':
         line[6] = '-'
      elif line[6]=='-':
         line[6] = '+'
      else:
         line[6] = '.'
      for i in xrange(0,(len(line)-1)):
         fout_reftmp.write(line[i] + '\t')
      fout_reftmp.write(line[(len(line)-1)] + '\n')
   fout_reftmp.close()
   #######################################################
   
   # Formatted output.
   def out_string(peak, M, record, comment):
      if record[6] == '+':
         distance = a - M
      else:
         distance = b - M
      return out_string_v2(peak, distance, record, comment)

   def out_string_exon(peak, M, record, comment):
      '''
      if m<exona and n<exonb:
         distance = n-exona
      if m>=exona and n<exonb:
         distance = n-m
      if m>=exona and n>=exonb:
         distance = exonb-m
      if m<exona and n>=exonb:
         distance = exonb-exona
      '''
      distance = [exona,exonb]
      return out_string_v2(peak, distance, record, comment)
   
   def out_string_intron(peak, M, record, comment):
      '''
      if m<introna and n<intronb:
         distance = n-introna
      if m>=introna and n<intronb:
         distance = n-m
      if m>=introna and n>=intronb:
         distance = intronb-m
      if m<introna and n>=intronb:
         distance = intronb-introna
      '''
      distance = [introna,intronb]
      return out_string_v2(peak, distance, record, comment)
   def out_string_cds(peak, M, record, comment):
      distance = [cdsa,cdsb]
      return out_string_v2(peak, distance, record, comment)
   def out_string_utr(peak, M, record, comment):
      distance = [utra,utrb]
      return out_string_v2(peak, distance, record, comment)   
   
   def out_string_v2(peak, distance, record, comment):
      chromo, source, TSS, TTS, strand = record[0], record[1], record[3], record[4], record[6]
      gene_name = record[8].strip().split(';')[1].strip().split(' ')[1][1:-1]
      transcript_id = record[8].strip().split(';')[0].strip().split(' ')[1][1:-1]
      gnote = reduce(lambda x, y: x + '\t' + y, [chromo, TSS, TTS, strand, gene_name, source, transcript_id])
      return (peak + '\t' + str(n - m) + '\t' + str(distance) + '\t' + comment + '\t' + gnote)
   
   ######load exon, intron, geneid index information
   geneid_set =[]
   exonband =[]
   intronband =[]
   cdsband =[]
   utrband =[]
   for line in open(pwd + r'/GENCODE_' + genome + '_EXONinfo.txt','r'):
      line = ast.literal_eval(line.strip().split('\t')[1])
      exonband.append(line)
   
   for line in open(pwd + r'/GENCODE_' + genome + '_INTRONinfo.txt','r'):
      line = ast.literal_eval(line.strip().split('\t')[1])
      intronband.append(line)
      
   for line in open(pwd + r'/GENCODE_' + genome + '_CDSinfo.txt','r'):
      line = ast.literal_eval(line.strip().split('\t')[1])
      cdsband.append(line)

   for line in open(pwd + r'/GENCODE_' + genome + '_UTRinfo.txt','r'):
      line = ast.literal_eval(line.strip().split('\t')[1])
      utrband.append(line)
      
   for line in open(pwd + r'/GENCODE_' + genome + '_GENEID_index.txt','r'):
      line = ast.literal_eval(line.strip().split('\t')[0])
      geneid_set.append(line)

   #index gemeid_set
   genemap=dict()
   geneset=set()
   for i in xrange(len(geneid_set)):
      key=geneid_set[i][1]
      genemap[key] = geneid_set[i][0]
      geneset.add(key)
   


   print 'Check Reference files'

   '''
   PURPOSE
      Read and parse transcript file.
   PARAMETERS
      TRANSCRIPT       List of chromosomes; each chromosome itself is a list of transcript strand.
      TRANSCRIPTLeft   List of chromosomes; each chromosome itself is a list of lower index of transcript strand.
   '''
   TRANSCRIPT = []
   TRANSCRIPTLeft = []
   transcript_chrm = {}
   ichrm = newChrmId = -1

   ##########update 05182015,UTR3 reference
   if UTR3 == False:
      reference_basedon_UTR = pwd + 'gencode.' + genome + '.annotation_GENE_GTF.txt'
   elif UTR3 == True:
      reference_basedon_UTR = tmp_ref_file
   ##########
   for line in open(reference_basedon_UTR, 'r'):
      line = line.strip().split('\t')
      if line[0][3:] in transcript_chrm:
         ichrm = transcript_chrm[ line[0][3:] ]
      else:
         newChrmId += 1
         ichrm = newChrmId
         transcript_chrm[ line[0][3:] ] = ichrm
         TRANSCRIPT.append([])
         TRANSCRIPTLeft.append([])
      TRANSCRIPT[ichrm].append( line )
      TRANSCRIPTLeft[ichrm].append( int(line[3]) )

   '''
   PURPOSE
      Sort transcript by strand end id.
   PARAMETERS
      TRANSCRIPTRightSort   List of chromosomes; each chromosome sorted by strand right location.
      TRANSCRIPTRight       List of chromosomes; each chromosome itself is a list of right location.
      TRANSCRIPTRightId     List of chromosomes; each chromosome itself is a list of exon id in the file.
   '''
   TRANSCRIPTRightSort = []
   TRANSCRIPTRight     = []
   TRANSCRIPTRightID   = []

   for transcript in TRANSCRIPT:

      TRANSCRIPTRightSort.append([])
      TRANSCRIPTRight.append([])
      TRANSCRIPTRightID.append([])

      for i in xrange(len(transcript)):
         TRANSCRIPTRightSort[-1].append( (int(transcript[i][4]), i) )

      TRANSCRIPTRightSort[-1].sort(key = lambda x: x[0])
      for key in TRANSCRIPTRightSort[-1]:
         TRANSCRIPTRight[-1].append(key[0])
         TRANSCRIPTRightID[-1].append(key[1])

   '''
   PURPOSE
      Prepare lists of begin id & key for all genes and for protein genes.
   PARAMETERS
      TSBEGIN      List of chromosome; each chromosome itself a list of (arrow begin index, line number in the transcript).
      TSBEGINKEY   List of chromosome; each chromosome itself a list of (arrow begin index).
      TSLINEID     List of chromosome; each chromosome itself a list of (line number in the transcript).

      PCBEGIN, PCBEGINKEY, PCLINEID       Same as above, but for protein codings.
   '''
   TSBEGIN    = []
   TSBEGINKEY = []
   TSLINEID   = []

   PCBEGIN = []
   PCBEGINKEY = []
   PCLINEID = []

   for transcript in TRANSCRIPT:

      TSBEGIN.append([])
      TSBEGINKEY.append([])
      TSLINEID.append([])

      PCBEGIN.append([])
      PCBEGINKEY.append([])
      PCLINEID.append([])

      for i in xrange(len(transcript)):
         record = transcript[i]

         # Append to gene list.
         if record[6] == '+':
            begin = int(record[3])
         else:
            begin = int(record[4])
         TSBEGIN[-1].append( (begin, i) )

         # Append to protein coding gene list.
         if record[1] == 'protein_coding':
            PCBEGIN[-1].append( (begin, i) )

      TSBEGIN[-1].sort(key = lambda x: x[0])
      for key in TSBEGIN[-1]:
         TSBEGINKEY[-1].append(key[0])
         TSLINEID[-1].append(key[1])

      PCBEGIN[-1].sort(key = lambda x: x[0])
      for key in PCBEGIN[-1]:
         PCBEGINKEY[-1].append(key[0])
         PCLINEID[-1].append(key[1])

   '''
   PURPOSE
      Parse the peak file.
   ALGORITHM
      (1) Identify exon;
      (2) Identify intron; CDS;UTR
      (3) Find nearest protein coding;
      (4) Find neighbors within a range.
   '''
   print 'fixed reference done:', datetime.now()

   fout = open(outputfile,'w')
   print 'Start Annotation'

   count = -1
   for line in open(inputfile, 'r'):

      # Neglect the comment line.
      count += 1
      if count == 0:
         sline =line.strip().split('\t')
         fout.write(sline[0] + '\t' +sline[1] + '\t' +sline[2] + '\t' +sline[3] + '\t' )
         fout.write('PeakLength' + '\t' + 'peakMtoStart_Overlap' + '\t' + 'type' + '\t' + 'BidirenctionalRegion' + '\t')
         fout.write('Chr' + '\t' + 'TSS'  + '\t' + 'TTS' + '\t' + 'strand' + '\t' + 'gene_name'  + '\t' + 'source'+ '\t' + 'transID' + '\n')
         continue

      # ---------------------------------------
      # Parse the peak information (m < n).
      #
      #           middle
      #             |
      #     ---m---------n---
      #
      # ---------------------------------------
      line = line.strip().split()
      pkchrm = line[1]
      if pkchrm[0:3].upper() == 'CHR':
         pkchrm = pkchrm[3:]
      m, n = int(line[2]), int(line[3])
      middle = (m + n) / 2
      peakLeft  = m
      peakRight = n

      # The information about the peak to be printed.
      pkhd = reduce(lambda x, y: x + '\t' + y, line[0:4])

      # ---------------------------------------
      # Annotate exon;intron
      # ---------------------------------------

      # Check if the chromosome has been registered.
      if pkchrm not in transcript_chrm:
         print pkchrm, 'Chromosome not registered'
         continue
      else:
         ichrm = transcript_chrm[ pkchrm ]
         transcript = TRANSCRIPT[ichrm]
         transcriptLeft    = TRANSCRIPTLeft[ichrm]
         transcriptRight   = TRANSCRIPTRight[ichrm]
         transcriptRightID = TRANSCRIPTRightID[ichrm]
         
      # A set holding everything that has been marked.
      myNeighbor = set()
      
      # Find the search range.
      iMin = bisect_left(transcriptRight, peakLeft)

      setRight = set()
      for i in xrange(iMin, len(transcriptRight)):
         setRight.add(transcriptRightID[i]) 

      iMax = bisect_right(transcriptLeft, peakRight, lo=iMin+1)
      setLeft = set(range(iMax))

      # Search the range.
      found_exon_protein = False
      found_intron_protein = False

      #search exon region
      for transcriptID in setRight.intersection(setLeft):

         record = transcript[transcriptID]
         a, b = int(record[3]), int(record[4])

         if n < a or m > b:
            continue
         else: 
            geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1]            
            if len(exonband[genemap[geneid_B]])>0:
               for i in xrange(len(exonband[genemap[geneid_B]])):
                  exona=exonband[genemap[geneid_B]][i][0]
                  exonb=exonband[genemap[geneid_B]][i][1]
                  if n < exona or m > exonb:
                     pass
                  else:
                     fout.write(out_string_exon(pkhd, middle, record, 'Exon\tNA')+ '\n')

                     #Check when exon finds, make stop label for SNP and adjacent=True
                     if record[1] == 'protein_coding':
                        if adjacent == True or SNP == True:
                           found_exon_protein = True
         myNeighbor.add(transcriptID)
      #search intron region
      if found_exon_protein == False:
         for transcriptID in setRight.intersection(setLeft):
            record = transcript[transcriptID]
            a, b = int(record[3]), int(record[4])
            if n < a or m > b:
               continue
            else:
               geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1]
               if len(intronband[genemap[geneid_B]])>0:
                  for i in xrange(len(intronband[genemap[geneid_B]])):
                     introna=intronband[genemap[geneid_B]][i][0]
                     intronb=intronband[genemap[geneid_B]][i][1]
                     if n < introna or m > intronb:
                        pass
                     else:
                        fout.write(out_string_intron(pkhd, middle, record, 'Intron\tNA')+ '\n')

                        #Check when intron finds, make stop label for adjacent=True
                        if record[1] == 'protein_coding':
                           if adjacent == True:
                              found_intron_protein = True                  
            myNeighbor.add(transcriptID)
      #search cds region
      if found_exon_protein == False and found_intron_protein == False:
         for transcriptID in setRight.intersection(setLeft):
            record = transcript[transcriptID]
            a, b = int(record[3]), int(record[4])
            if n < a or m > b:
               continue
            else:
               geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1]
               if len(cdsband[genemap[geneid_B]])>0:
                  for i in xrange(len(cdsband[genemap[geneid_B]])):
                     cdsa=cdsband[genemap[geneid_B]][i][0]
                     cdsb=cdsband[genemap[geneid_B]][i][1]
                     if n < cdsa or m > cdsb:
                        pass
                     else:
                        fout.write(out_string_cds(pkhd, middle, record, 'cds\tNA')+ '\n')
            myNeighbor.add(transcriptID)            
      #search utr region
      if found_exon_protein == False and found_intron_protein == False:
         for transcriptID in setRight.intersection(setLeft):
            record = transcript[transcriptID]
            a, b = int(record[3]), int(record[4])
            if n < a or m > b:
               continue
            else:
               geneid_B=transcript[transcriptID][8].strip().split(';')[0][9:-1]
               if len(utrband[genemap[geneid_B]])>0:
                  for i in xrange(len(utrband[genemap[geneid_B]])):
                     utra=utrband[genemap[geneid_B]][i][0]
                     utrb=utrband[genemap[geneid_B]][i][1]
                     if n < utra or m > utrb:
                        pass
                     else:
                        fout.write(out_string_utr(pkhd, middle, record, 'utr\tNA')+ '\n')                                       
            myNeighbor.add(transcriptID)
#############################
      #search internal-promoter region
      if found_exon_protein == False and found_intron_protein == False:
         for transcriptID in setRight.intersection(setLeft):
            record = transcript[transcriptID]
            a, b = int(record[3]), int(record[4])
            if n < a or m > b:
               continue
            else:
               if record[6] == '+':
                  if m < a + promoter_radius2:
                     fout.write(out_string(pkhd, middle, record, 'Promoter_internal\tNA')+ '\n')
                     myNeighbor.add(transcriptID)
               elif record[6] == '-':
                  if n > b - promoter_radius2:
                     fout.write(out_string(pkhd, middle, record, 'Promoter_internal\tNA')+ '\n')
                     myNeighbor.add(transcriptID)                                                  
#####################################            



      # If found protein_coding. Annotate the next peak.
      if found_exon_protein or found_intron_protein:
         continue
        
      # --------------------------------------------
      # Search promoter in protein coding genes.
      # --------------------------------------------
      pcbeginkey = PCBEGINKEY[ichrm]
      pclineid   = PCLINEID[ichrm]

      pcid = bisect(pcbeginkey, middle)

      # Find the left promoter.
      i = pcid - 1
      found_left_promoter, left_d = False, 0
      left_promoter = []
      while i >= 0 and not found_left_promoter and left_d < promoter_radius:
         record = transcript[ pclineid[i] ]
         a, b = int(record[3]), int(record[4])
         left_d = m - b

         if left_d < promoter_radius:
            if record[6] == '-':
               found_left_promoter = True
               left_promoter.append( record )
            i = i - 1

      # Print the non-nearest left promoters.
      for record in left_promoter[1:]:
         a, b = int(record[3]), int(record[4])
         #print out_string(pkhd, middle, record, 'Promotor_L\tN')
         fout.write(out_string(pkhd, middle, record, 'Promoter_L\tN')+ '\n')
         
      # Find the right promoter.
      i = pcid
      found_right_promoter, right_d = False, 0
      right_promoter = []
      while i < len(pcbeginkey) and not found_right_promoter and right_d < promoter_radius:
         record = transcript[ pclineid[i] ]
         a, b = int(record[3]), int(record[4])
         right_d = a - n

         if right_d < promoter_radius:
            if record[6] == '+':
               found_right_promoter = True
               right_promoter.append( record )
            i = i + 1
      # Print the non-nearest right promoters.
      for record in right_promoter[1:]:
         a, b = int(record[3]), int(record[4])
         #print out_string(pkhd, middle, record, 'Promotor_R\tN')
         fout.write(out_string(pkhd, middle, record, 'Promoter_R\tN') + '\n')

      # If promoters are found on both sides.
      if found_left_promoter and found_right_promoter:
         record = left_promoter[0]
         a, b = int(record[3]), int(record[4])
         #print out_string(pkhd, middle, record, 'Promotor_L\tY')
         fout.write(out_string(pkhd, middle, record, 'Promoter_L\tY') + '\n')

         record = right_promoter[0]
         a, b = int(record[3]), int(record[4])
         #print out_string(pkhd, middle, record, 'Promotor_R\tY')
         fout.write(out_string(pkhd, middle, record, 'Promoter_R\tY')+ '\n')

        
      # Find the right nearest neighbor if no right promoter found.
      if found_left_promoter and not found_right_promoter:
         is_right_bidirectional = False
         i = pcid
         found_right = False
         while i < len(pcbeginkey) and not found_right:
            record = transcript[ pclineid[i] ]
            a, b = int(record[3]), int(record[4])
            if min(a, b) > n:
               found_right = True
               if record[6] == '+':
                  is_right_bidirectional = True
                  #print out_string(pkhd, middle, record, 'Nearest_R\tY')
                  fout.write(out_string(pkhd, middle, record, 'Nearest_R\tY') + '\n')
               else:
                  #print out_string(pkhd, middle, record, 'Nearest_R\tN')
                  fout.write(out_string(pkhd, middle, record, 'Nearest_R\tN') + '\n')
            else:
               i = i + 1
               
         record = left_promoter[0]
         a, b = int(record[3]), int(record[4])
         if is_right_bidirectional:
            #print out_string(pkhd, middle, record, 'Promotor_L\tY')
            fout.write(out_string(pkhd, middle, record, 'Promoter_L\tY') + '\n')
         else:
            #print out_string(pkhd, middle, record, 'Promotor_L\tN')
            fout.write(out_string(pkhd, middle, record, 'Promoter_L\tN') + '\n')
         
      # Find the left nearest neighbor if no left promoter found.
      if not found_left_promoter and found_right_promoter:
         is_left_bidirectional = False
         i = pcid - 1
         found_left = False
         while i >= 0 and not found_left:
            record = transcript[ pclineid[i] ]
            a, b = int(record[3]), int(record[4])
            if max(a, b) < m:
               found_left = True
               if record[6] == '-':
                  is_left_bidirectional = True
                  #print out_string(pkhd, middle, record, 'Nearest_L\tY')
                  fout.write(out_string(pkhd, middle, record, 'Nearest_L\tY') + '\n')
               else:
                  #print out_string(pkhd, middle, record, 'Nearest_L\tN')
                  fout.write(out_string(pkhd, middle, record, 'Nearest_L\tN') + '\n')
            else:
               i = i - 1

         record = right_promoter[0]
         a, b = int(record[3]), int(record[4])
         if is_left_bidirectional:
            #print out_string(pkhd, middle, record, 'Promotor_R\tY')
            fout.write(out_string(pkhd, middle, record, 'Promoter_R\tY') + '\n')
         else:
            #print out_string(pkhd, middle, record, 'Promotor_R\tN')
            fout.write(out_string(pkhd, middle, record, 'Promoter_R\tN') + '\n')

      # Stop here if any promoter is found and if PromoterStop index is True, else if no promoter is found/or PromoterStop index is False, contitue for further
      # search in search radius.
      if PromoterStop == False:
         found_left_promoter = PromoterStop
         found_right_promoter = PromoterStop
      elif PromoterStop == True:
         pass
         
      if found_left_promoter or found_right_promoter:
         continue

      # ----------------------------------------------------
      # Search nearest neighbor in protein coding genes.
      # ----------------------------------------------------

      # Find the left nearest transcript.
      i = pcid - 1
      found_left = False
      while i >= 0 and not found_left:
         lineL = pclineid[i]
         record = transcript[lineL]
         a, b = int(record[3]), int(record[4])
         if max(a, b) < m:
            found_left = True
         else:
            i = i - 1

      # Find the right nearest transcript.
      i = pcid
      found_right = False
      while i < len(pcbeginkey) and not found_right:
         lineR = pclineid[i]
         record = transcript[lineR]
         a, b = int(record[3]), int(record[4])
         if min(a, b) > n:
            found_right = True
         else:
            i = i + 1

      # Check if is bidirectional.
      if found_left and found_right:
         recordL = transcript[lineL]
         recordR = transcript[lineR]

         if recordL[6] == '-' and recordR[6] == '+':

            a, b = int(recordL[3]), int(recordL[4])
            myNeighbor.add(lineL)
            #print out_string(pkhd, middle, recordL, 'Nearest_L\tY')
            fout.write(out_string(pkhd, middle, recordL, 'Nearest_L\tY') + '\n')

            a, b = int(recordR[3]), int(recordR[4])
            myNeighbor.add(lineR)
            #print out_string(pkhd, middle, recordR, 'Nearest_R\tY')
            fout.write(out_string(pkhd, middle, recordR, 'Nearest_R\tY') + '\n')

         else:

            a, b = int(recordL[3]), int(recordL[4])
            if recordL[6] == '+':
               dL = a - middle 
            else:
               dL = b - middle

            a, b = int(recordR[3]), int(recordR[4])
            if recordR[6] == '+':
               dR = a - middle 
            else:
               dR = b - middle

            if NearestTwoDirection == False:
               if abs(dL) < abs(dR):
                  #print out_string_v2(pkhd, dL, recordL, 'Nearest_L\tN')
                  fout.write(out_string_v2(pkhd, dL, recordL, 'Nearest\tN') + '\n')
                  myNeighbor.add(lineL)
               else:
                  #print out_string_v2(pkhd, dR, recordR, 'Nearest_R\tN')
                  fout.write(out_string_v2(pkhd, dR, recordR, 'Nearest\tN') + '\n')
                  myNeighbor.add(lineR)

            if NearestTwoDirection == True:
               fout.write(out_string_v2(pkhd, dL, recordL, 'Nearest_L\tN') + '\n')
               fout.write(out_string_v2(pkhd, dR, recordR, 'Nearest_R\tN') + '\n')
               myNeighbor.add(lineL)
               myNeighbor.add(lineR)

      elif found_left:

         myNeighbor.add(lineL)
         record = transcript[lineL]
         a, b = int(record[3]), int(record[4])
         myNeighbor.add(lineL)
         #print out_string(pkhd, middle, record, 'Nearest_L\tN')
         fout.write(out_string(pkhd, middle, record, 'Nearest\tN') + '\n')

      elif found_right:

         myNeighbor.add(lineR)
         record = transcript[lineR]
         a, b = int(record[3]), int(record[4])
         myNeighbor.add(lineR)
         #print out_string(pkhd, middle, record, 'Nearest_R\tN')
         fout.write(out_string(pkhd, middle, record, 'Nearest\tN')+ '\n')


      # -------------------------------------------
      # Print everything within searching radius.
      # -------------------------------------------
      tsbeginkey  = TSBEGINKEY[ichrm]
      tslineid    = TSLINEID[ichrm]

      lower_bound = max(0, middle - search_radius)
      upper_bound = min(tsbeginkey[-1], middle + search_radius)

      lower_id = bisect(tsbeginkey, lower_bound)
      upper_id = bisect(tsbeginkey, upper_bound, lo = lower_id)

      for key_id in xrange(lower_id, upper_id):
         line_id = tslineid[key_id]
         if line_id not in myNeighbor:
            distance = middle - tsbeginkey[key_id]
            record = transcript[line_id]
            #print out_string_v2(pkhd, distance, record, 'Neighbor\tN')
            fout.write(out_string_v2(pkhd, distance, record, 'Neighbor\tN') + '\n')

   fout.close()

   ####update 05182015 UTR3
   fout_utr = open(outputfileUTR3,'w')
   if UTR3 == False:
      for line in open(outputfile,'r'):
         fout_utr.write(line)
   elif UTR3 == True:
      nline =0
      for line in open(outputfile,'r'):
         nline +=1
         if nline ==1:
            fout_utr.write(line)
         else:
            line = line.strip().split('\t')
            if line[11]=='+':
               line[11] = '-'
            elif line[11]=='-':
               line[11] = '+'
            else:
               line[11] = '.'
            for i in xrange(0,(len(line)-1)):
               fout_utr.write(line[i] + '\t')
            fout_utr.write(line[(len(line)-1)] + '\n')            
   fout_utr.close()   
   ####

   print 'Finish Annotation'
   print 'python process end:', datetime.now()
		print >> sys.stderr, "failed extracting acc info for %s" % r.id

for file in os.listdir(igr_dirname):
	#acc = file[:file.find('.')]
	for r in SeqIO.parse(open(igr_dirname+'/'+file),'fasta'):
		m = rex.match(r.id)
		acc = m.group(1)
		i = acc.find('.')
		if i > 0: acc = acc[:i]
		
		ok_random = False
		if acc in ribo_acc_set:
			if no_ambiguous_code(r.seq.tostring()):
				start = int(m.group(2))
				end = int(m.group(3))
				if min_len <= end-start <= max_len:
					i = bisect(ribo_acc_set[acc],(start,end))
					if i == 0:
						ok_random = end < ribo_acc_set[acc][0][0]
					elif i == len(ribo_acc_set[acc])-1:
						ok_random = ribo_acc_set[acc][-1][1] < start
					elif i <= len(ribo_acc_set[acc])-2:
						ok_random = ribo_acc_set[acc][i-1][1] < start and end < ribo_acc_set[acc][i][0]
					else:
						ok_random = ribo_acc_set[acc][i-1][1] < start
		else:
			ok_random = True
		if ok_random:
			print ">%s\n%s" % (r.id,r.seq.tostring())

Пример #29
0
from bisect import *
p = [1]*300003
p[0] = p[1] = 0
for i in xrange(2,300003):
    if p[i] and i%7 in (1,6):
        p[2*i::i] = [0]*len(p[2*i::i])
    else:
        p[i] = 0
p = [i for i in xrange(2,300003) if p[i]]

while 1:
    N = int(raw_input())
    if N == 1: break
    ans = [i for i in p[:bisect(p,N+1)] if N%i == 0]
    print "%d: %s"%(N," ".join(map(str,ans)))

Пример #30
0
#coding=utf-8
import bisect

'''
利用二分法维持sorted的list
bisect(list, item[, lo[, hi]]) list为sorted的, lo和hi为list的边界, 默认为整个list 
'''

aray = [1, 2, 3, 4, 5]

index = bisect.bisect_left(aray, 3)     #遇到相等的item返回左边的index
index = bisect.bisect_right(aray, 3)    #遇到相等的item返回右边的index

#通过bisect得到index后,调用list.insert()实现
bisect.insort_left(aray, 2.5)   #遇到相等的item插入左边的index
bisect.insort_right(aray, 3)    #遇到相等的item插入右边的index

Пример #31
0
 def add(self, value):
     i = bisect(self, value)
     if i==0 or self[i-1] <> value: self.insert(i, value)
Пример #32
0
#!/usr/bin/env python

def bisect(alist, word):
    left = 0
    right = len(alist) - 1
    while left < right:
        now = (left + right) / 2
        #print left, ' ', right, ' ', now, ' ', alist[now], ' ', word
        if alist[now] == word:
            return now
        elif alist[now] > word:
            right = now - 1
        else:
            left = now + 1
    return None

word_list = []
dict_file = open('../data/words.txt')
for e in dict_file:
    word_list.append(e.strip())

print bisect(word_list, 'boy')

import bisect

print bisect.bisect(word_list, 'boy') - 1
Пример #33
0
from bisect import *

n = [1, 2, 3, 4, 5]
print bisect(n, 1)
print bisect(n, 5)
Пример #34
0
from bisect import *
R = 200001
p = [1]*R
p[0] = p[1] = 0
for i in xrange(2,int(R**0.5)):
    if p[i]: p[2*i::i] = [0]*(len(p[2*i::i]))
p = [i for i in xrange(2,R) if p[i]]
    
K = int(raw_input())
N = int(raw_input())
ans = mx = 0
H = []
for i in xrange(bisect(p,K),bisect(p,N)):
    s = str(p[i])
    while len(s) > 1:
        s = str(sum(map(int,list(s))))
    if s in H:
        if   len(H)  > mx: mx,ans = len(H),p[i-len(H)]
        elif len(H) == mx: ans = max(ans, p[i-len(H)])
    while s in H:
        H.pop(0)
    H.append(s)
else:
    if   len(H)  > mx: ans = p[i+1-len(H)]
    elif len(H) == mx: ans = max(ans, p[i+1-len(H)])
print ans
Пример #35
0
def grade(score, breakpoints=[60,70,80,90], grades="FDCBA"):
	i=bisect(breakpoints, score)
	return grades[i]
Пример #36
0
    def getValue(self, time):
        i = bisect(self.times, time % self.getPeriod())

        return self.values[i - 1]
Пример #37
0
def triplets(a, b, c):
    a, c = sorted(set(a)), sorted(set(c))
    return sum([bisect(a, x) * bisect(c, x) for x in set(b)])
Пример #38
0
 def generate(self):
     for i in range(0, len(self.list3)):
         pos = bisect(self.list4, self.list3[i])
         for e in range(pos, len(self.list4)):
             yield self.list1[i] + self.list2[e]
Пример #39
0
	return primes

from bisect import *


times = input()
params = []
m = 0
for i in range(times):
	a = input()
	m = max(m, a)
	params.append(a)

ar = get_primes(m)

ar.sort()
ar2 = []
accu = 0
for i in ar:
	accu += i
	ar2.append(accu)

for item in params:
	n = item 
	index = bisect(ar, n)

	if index <= 0:
		print 0
	else:
		print ar2[index - 1]