def rewrite_2(self, rooms): """ :type rooms: List[List[int]] :rtype: void Do not return anything, modify rooms in-place instead. """ from __builtin__ import xrange # find gate position. gates = [(i, j) for i in xrange(len(rooms)) for j in xrange(len(rooms[0])) if rooms[i][j] == 0] empty = 2147483647 ddir = ((1, 0), (-1, 0), (0, 1), (0, -1)) while gates: tmp_gates = [] for g in gates: for d in ddir: nxt = (g[0] + d[0], g[1] + d[1]) if 0 <= nxt[0] < len(rooms) and 0 <= nxt[1] < len(rooms[0]) \ and rooms[nxt[0]][nxt[1]] == empty: rooms[nxt[0]][nxt[1]] = rooms[g[0]][g[1]] + 1 tmp_gates.append(nxt) gates = tmp_gates
def wordBreak(self, s, wordDict): """ :type s: str :type wordDict: List[str] :rtype: bool """ """ s = lcoa dict = ["lco", "lcoa", "coa"] 代表input s的每個char位置(含以前)的string是否在word dict內. [True, False, False, True, True] 一定要測試 ok[j] 因為這代表著ok[j]以前的字有沒有中, 如果沒有,則即使s[j:i]中也不能算,因為之前的字為孤魂野鬼,沒中. """ from __builtin__ import xrange ok = [True] # 由1為始 for i in xrange(1, len(s) + 1): ok += any( # 重要,一定要測 ok[j], 代表j以前的字有match, # j 之後的match才有意義. ok[j] and s[j:i] in wordDict for j in xrange(i)), return ok[-1]
def update_lr(lr, position): print lr print position lrs = [x[0] for x in lr] index = lrs.index(position) new_lr = [] _break = False for i in xrange(index + 1, len(lr)): tmp = lr[i] if tmp[1][0] > 0 and not _break: new_lr.append((tmp[0], (tmp[1][0] - 1, tmp[1][1]))) else: _break = True new_lr.append(tmp) _lr = lr[:index] _lr.reverse() _break = False for i in xrange(len(_lr)): tmp = _lr[i] if tmp[1][1] > 0 and not _break: new_lr.insert(0, (tmp[0], (tmp[1][0], tmp[1][1] - 1))) else: _break = True new_lr.insert(0, tmp) return new_lr
def rob(self, nums): """ :type nums: List[int] :rtype: int 概念: 由198來. 先由前往後 rob, 但是最後一個不能rob, 因為cycle. 再由後往前 rob, 但是第一個不能rob, 因為cycle. """ if len(nums) == 1: return nums[0] if not nums: return 0 from __builtin__ import xrange prev = curr = 0 for i in xrange(len(nums) - 1): prev, curr = curr, max(nums[i] + prev, curr) max_forward = curr # ----分隔---- prev = curr = 0 for i in xrange(len(nums) - 1, 0, -1): prev, curr = curr, max(nums[i] + prev, curr) max_backward = curr return max(max_forward, max_backward)
def minSteps(self, n): """ :type n: int :rtype: int """ from __builtin__ import xrange dp = [99999999] * (n + 1) dp[0] = 1 dp[1] = 0 for current_cnt in xrange(1, n + 1): for start_cnt in xrange(1, current_cnt): left_cnt = current_cnt - start_cnt if not left_cnt % start_cnt: copyable = 1 dp[current_cnt] = min( dp[current_cnt], dp[start_cnt] + copyable + left_cnt / start_cnt) # + 1 is to copyall aka. copyable. return dp[n]
def update_lr(lr, position): print lr print position lrs = [x[0] for x in lr] index = lrs.index(position) new_lr = [] _break = False for i in xrange(index+1, len(lr)): tmp = lr[i] if tmp[1][0] > 0 and not _break: new_lr.append((tmp[0], (tmp[1][0] - 1,tmp[1][1]))) else: _break = True new_lr.append(tmp) _lr = lr[:index] _lr.reverse() _break = False for i in xrange(len(_lr)): tmp = _lr[i] if tmp[1][1] > 0 and not _break: new_lr.insert(0, (tmp[0], (tmp[1][0], tmp[1][1]-1))) else: _break = True new_lr.insert(0, tmp) return new_lr
def rewrite(self, rooms): """ :type rooms: List[List[int]] :rtype: void Do not return anything, modify rooms in-place instead. """ from __builtin__ import xrange empty = 2147483647 steps = ((0, 1), (0, -1), (1, 0), (-1, 0)) gates = [(i, j) for i in xrange(len(rooms)) for j in xrange(len(rooms[0])) if rooms[i][j] == 0] while gates: tmp_gates = [] for g in gates: for d in steps: n = (g[0] + d[0], g[1] + d[1]) if 0 <= n[0] < len(rooms) and \ 0 <= n[1] < len(rooms[0]) and \ rooms[n[0]][n[1]] == empty: rooms[n[0]][n[1]] = rooms[g[0]][g[1]] + 1 tmp_gates.append(n) gates = tmp_gates
def test_create_blank(self): color = (205, 205, 205) im = DataMatrixCreator.create_blank(600,600,color) self.assertTrue(len(im) == 600) self.assertTrue(len(im[0]) == 600) for x in xrange(im.shape[0]): for y in xrange(im.shape[1]): self.assertTrue(im[x][y][0]==color[0])
def count(ary): for i in xrange(len(ary)): if i == 0: cnt[0] = cnt[0] + sum(ary[i]) continue for j in xrange(len(ary[0])): if ary[i][j] == 1: if ary[i - 1][j] != 1: cnt[0] = cnt[0] + 1
def rewrite(self, matrix): """ :type matrix: List[List[int]] :rtype: bool """ for i in xrange(1, len(matrix)): for j in xrange(1, len(matrix[0])): if matrix[i][j] != matrix[i - 1][j - 1]: return False return True
def show(self): """ Display the current board state in the terminal. You should not need to edit this. """ for y in xrange(3): if y > 0: print "--+---+--" for x in xrange(3): if x > 0: print '|', # Print a space for empty (0), an O for player 1, or an X for player 2 print " OX"[self.get_square(x, y)], print
def random_population(): """ Return a list of POP_SIZE individuals, each randomly generated via iterating DNA_SIZE times to generate a string of random characters with random_char(). """ pop = [] for i in xrange(POP_SIZE): dna = "" for c in xrange(DNA_SIZE): dna += random_char() pop.append(dna) return pop
def rewrite(self, matrix): """ :type matrix: List[List[int]] :rtype: List[List[int]] """ if not matrix: return [] from __builtin__ import xrange # create 2 graph for P and A # 要注意!!! 每個row必須為獨立!!! 要小心初始化的方式! pgraph = [[0 for _ in xrange(len(matrix[0]))] for _ in xrange(len(matrix))] agraph = [[0 for _ in xrange(len(matrix[0]))] for _ in xrange(len(matrix))] direction = ((1, 0), (-1, 0), (0, -1), (0, 1)) def dfs(i, j, graph): graph[i][j] = 1 for d in direction: ni = i + d[0] nj = j + d[1] if ni < 0 or ni >= len(matrix) or \ nj < 0 or nj >= len(matrix[0]) or \ graph[ni][nj] or \ matrix[ni][nj] < matrix[i][j]: continue dfs(ni, nj, graph) # from up and down for j in xrange(len(matrix[0])): # pacific dfs(0, j, pgraph) # atlantic dfs(len(matrix) - 1, j, agraph) # from left and right for i in xrange(len(matrix)): # pacific dfs(i, 0, pgraph) # atlantic dfs(i, len(matrix[0]) - 1, agraph) result = [] for i in xrange(len(matrix)): for j in xrange(len(matrix[0])): if agraph[i][j] and pgraph[i][j]: result.append([i, j]) return result
def rewrite(self, s): """ :type s: str :rtype: int """ from __builtin__ import xrange roman = { 'M': 1000, 'D': 500, 'C': 100, 'L': 50, 'X': 10, 'V': 5, 'I': 1 } summ = 0 # why? because there could be single char. for i in xrange(len(s) - 1): if roman[s[i]] < roman[s[i + 1]]: summ -= roman[s[i]] else: summ += roman[s[i]] summ += roman[s[-1]] return summ
def rewrite(self, nums, k): """ :type nums: List[int] :type k: int :rtype: List[int] Window position Max --------------- ----- [1 3 -1] -3 5 3 6 7 3 1 [3 -1 -3] 5 3 6 7 3 1 3 [-1 -3 5] 3 6 7 5 1 3 -1 [-3 5 3] 6 7 5 1 3 -1 -3 [5 3 6] 7 6 1 3 -1 -3 5 [3 6 7] 7 Therefore, return the max sliding window as [3,3,5,5,6,7]. """ from __builtin__ import xrange from collections import deque result = [] dq = deque() for idx in xrange(len(nums)): while dq and nums[dq[-1]] < nums[idx]: dq.pop() dq += idx, if dq[0] == idx - k: dq.popleft() if idx >= (k - 1): result += nums[dq[0]], return result
def islandPerimeter(self, grid): """ :type grid: List[List[int]] :rtype: int 翻轉四次 一次只處理單一方向. 即處理北面。 """ from __builtin__ import xrange cnt = [0] def count(ary): for i in xrange(len(ary)): if i == 0: cnt[0] = cnt[0] + sum(ary[i]) continue for j in xrange(len(ary[0])): if ary[i][j] == 1: if ary[i - 1][j] != 1: cnt[0] = cnt[0] + 1 for _ in xrange(4): count(grid) grid = zip(*grid[::-1]) return cnt[0]
def countComponents(self, n, edges): """ :type n: int :type edges: List[List[int]] :rtype: int """ from __builtin__ import xrange graph = {i: [] for i in xrange(n)} # build graph for e in edges: graph[e[0]] += e[1], graph[e[1]] += e[0], def dfs(key): child = graph.pop(key, []) for c in child: dfs(c) cnt = 0 while graph: key = graph.keys()[0] dfs(key) cnt += 1 return cnt
def selectedChildren(self): """Returns a list of children that are selected.""" # TODO: hideChildren for Hyperlinks? selection = self.querySelection() selectedChildren = [] for i in xrange(selection.nSelectedChildren): selectedChildren.append(selection.getSelectedChild(i))
def rotateRight(self, head, k): """ :type head: ListNode :type k: int :rtype: ListNode 這個太慢啦!!! """ from __builtin__ import xrange if not head: return def move(node): onode = node prev = None ppnode = None while node: ppnode = prev prev = node node = node.next prev.next = onode if ppnode: ppnode.next = None else: prev.next = None return prev for _ in xrange(k): head = move(head) return head
def rewrite(self, prices): """ :type prices: List[int] :rtype: int """ from __builtin__ import xrange # 差距可以延續 it's transformable. tdiff = 0 mmax = 0 for i in xrange(1, len(prices)): diff = prices[i] - prices[i - 1] if tdiff + diff < 0: tdiff = 0 continue tdiff += diff mmax = max(mmax, tdiff) return mmax
def nextPermutation(self, num): """ :type nums: List[int] :rtype: void Do not return anything, modify nums in-place instead. """ if not num: return """ 1. 由後往前排序有小到大. 2. 例: 2 9 4 3 1 , 到2為止. 則將2與之前排序中第一個比2大的交換: 3 9 4 2 1, 3. 再將9 4 2 1小到大排序: 3 1 2 4 9 """ from __builtin__ import xrange count = 0 for i in xrange(num.__len__() - 1, -1, -1): if i == 0: if count == (num.__len__() - 1): num[:] = sorted(num) break if num[i] > num[i - 1]: num[i:] = sorted(num[i:]) for inx, tval in enumerate(num[i:]): tinx = inx + i if tval > num[i - 1]: """Swap""" num[i - 1], num[tinx] = tval, num[i - 1] break break else: count += 1 return num
def reverse(self, x): """ :type x: int :rtype: int """ # s = cmp(x, 0) # r = int(`s*x`[::-1]) # return s * r * (r < 2**31) from __builtin__ import xrange sx = str(x)[1:] if x < 0 else str(x) lsx = list(sx) llen = len(lsx) for i in xrange(llen / 2): lsx[i], lsx[~i] = lsx[~i], lsx[i] result = "".join(lsx) if x > 0 else "-" + "".join(lsx) # print(-2 ** 31) # print(2 ** 31) # print(int(result)) return int(result) if - 2 ** 31 <= int(result) < 2**31 else 0
def rewrite(self, s): """ :type str: List[str] :rtype: void Do not return anything, modify str in-place instead. """ from __builtin__ import xrange for i in xrange(len(s) / 2): s[i], s[~i] = s[~i], s[i] i = prev_idx = 0 s += [' '] while i < len(s): if s[i] == ' ': o_i = i while prev_idx < i: s[prev_idx], s[i - 1] = s[i - 1], s[prev_idx] prev_idx += 1 i -= 1 i = o_i prev_idx = i + 1 i += 1 s.pop()
def maxProfit(self, prices, fee): """ :type prices: List[int] :type fee: int :rtype: int 概念: 買: 負的price , -prices[i] 賣: 正的price + previous買的負的prices -prices[0] 減去手續費. """ from __builtin__ import xrange buyin = -prices[0] for_next_buy = 0 sell = 0 for i in xrange(1, len(prices)): # 買 + 之前的獲利 , 選擇最好的買點 # 初始 buyin == -prices[0] # 這次不處理next_buy, 留在下一個loop跑. # max也代表賣的初始價格最低 for_next_buy = max(buyin, -prices[i] + sell) # 目前賣的獲利 sell = max(sell, prices[i] - fee + buyin) # 下一個 loop 買 buyin = for_next_buy return sell
def partitionLabels(self, S): """ :type S: str :rtype: List[int] Input: S = "ababcbaca defegde hijhklij" """ from collections import Counter as cc from __builtin__ import xrange current_set = set() cs = cc(S) result = [] last_idx = 0 for i in xrange(len(S)): current_set.add(S[i]) cs[S[i]] -= 1 if cs[S[i]] == 0: current_set.remove(S[i]) if not current_set: result.append(i - last_idx + 1) last_idx = i + 1 return result
def countSubstrings(self, s): """ :type s: str :rtype: int “center expansion” """ from __builtin__ import xrange N = len(s) ans = 0 """ 何以要 2*N -1 ? 因為要traverse每一個點使其成為中點! e.g abc => abcXXX 則 a b c可以成為中點 當 pivot / 2時! """ for pivot in xrange(2 * N - 1): # 0 1 2 3 4 => a(x)b(x)c center = pivot / 2 rcenter = center + pivot % 2 # 因為n為偶數時有double center # 若為基數時 center 與 rcenter 相同 while center >= 0 and rcenter < N and s[center] == s[rcenter]: ans += 1 center -= 1 rcenter += 1 return ans
def areSentencesSimilar(self, words1, words2, pairs): """ :type words1: List[str] :type words2: List[str] :type pairs: List[List[str]] :rtype: bool 白痴... 是 1:n 關係... """ from __builtin__ import xrange from collections import defaultdict as dd if len(words1) != len(words2): return False dmap = dd(list) for k, v in pairs: dmap[k].append(v) dmap[v].append(k) for i in xrange(len(words1)): if words1[i] == words2[i]: continue if words1[i] not in dmap or \ words2[i] not in dmap: return False if words2[i] not in dmap[words1[i]]: return False return True
def range(*args): """ Wrapper for range usage, returning an iterator regardless of if python 2 or python 3 is used """ if len(args) < 1 or len(args) > 3: err_msg = "ERROR: Anguilla range function takes 1, 2, or 3 arguments" raise Error.Error(msg=err_msg) start = 0 stop = 1 step = 1 if len(args) == 1: stop = args[0] if len(args) == 2: start = args[0] stop = args[1] if len(args) == 3: start = args[0] stop = args[1] step = args[2] if platform.is_python2(): return builtins.xrange(start, stop, step) elif platform.is_python3(): return builtins.range(start, stop, step) else: err_msg = "ERROR: Unknown python platform" raise Error.Error(msg=err_msg)
def isStrobogrammatic(self, num): """ :type num: str :rtype: bool 0 0 不能為第一個. 1 1 6 9 9 6 8 8 """ from __builtin__ import xrange dmap = dict( (('0', '0'), ('1', '1'), ('6', '9'), ('9', '6'), ('8', '8'))) if num[0] == '0' and len(num) > 1: return False if len(num) % 2: # corner cases if num[len(num) / 2] not in ('0', '1', '8'): return False for i in xrange(len(num) / 2): if num[i] not in dmap or num[~i] != dmap[num[i]]: return False # -i 保 減 1 去 return True
def monotoneIncreasingDigits(self, N): """ :type N: int :rtype: int """ """ 1. check N is increasing or not 2. through observation, once pass the check for index number, the rest could be all 9s. """ from __builtin__ import xrange import math as m digits = int(m.log(N, 10)) + 1 result = [9] * digits def gen_number(ary): ary = ary[::-1] num = 0 for i, n in enumerate(ary): num += n * (10**i) return num for i in xrange(digits): current = result[:i] # const in the while loop rest_digits = digits - i while result[i] > 0 and \ gen_number(current + [result[i]] * rest_digits) > N: result[i] -= 1 return gen_number(result)
def maxCoins(self, nums): """ :type nums: List[int] :rtype: int """ """ 1. 夾擠. 總coins和 == [0 , k, n] + [0, x, k] + [k, y, n] 2. 也就是,若要算[0,k,n] 則表示 0~k中間已經被pop掉,而我們要最大的. 3. k~n中間也被pop掉,我們要最大的. 3. 有 左右上下限時,create matrix(m*n) m as 左限, n as 右限. """ from __builtin__ import xrange nums = [1] + nums + [1] n = len(nums) # 為matrix dp = [[0] * n for _ in xrange(n)] def calculate(i, j): # i, j are inclusive. if dp[i][j] or j == i + 1: # j == i + 1 表示i,j相鄰. return dp[i][j] coins = 0 # 此 for loop 表示 不含i 不含j for k in xrange(i + 1, j): # find the last balloon coins = max( coins, nums[i] * nums[k] * nums[j] + calculate(i, k) + calculate(k, j)) dp[i][j] = coins return coins return calculate(0, n - 1) # [0, n-1] , it's index.
def rewrite(self, nums): """ :type nums: List[int] :rtype: int """ """ 1. 解決 subproblem 2. range: 1 x 1, 1 與 x 之間的要全部pop掉. """ from __builtin__ import xrange nums = [1] + nums + [1] n_len = len(nums) # create dp matrix dp = [[0] * n_len for _ in xrange(n_len)] def count(i, j): if dp[i][j] or i + 1 == j: return dp[i][j] for k in xrange(i + 1, j): summ = nums[i] * nums[k] * nums[j] dp[i][j] = max(dp[i][j], summ + count(i, k) + count(k, j)) return dp[i][j] return count(0, n_len - 1)
def run(self, set_name, processes=5): run = self._get_run(set_name) if run: items = self._get_items(run) for i in xrange(0, len(items), self.batch_size): threads = ThreadPool(processes=processes) threads.map(self._run_batch, items[i:i+self.batch_size], processes) db.reset_queries() return True
def get_lrs(stalls): lr = [] for i in xrange(len(stalls)-1, 0, -1): if stalls[i] == 0: # Left search l = left_s(stalls, i) # Right search r = right_s(stalls, i) lr.insert(0, (i - 1, (l, r))) return lr
def find_next(number): # print number list = [int(x) for x in str(number)] first = list[0] for i in xrange(1, len(list)): if first > list[i]: for k in range(len(list[i:])): list[i+k] = 0 new_num = int(''.join(map(str, list))) return new_num - 1 first = list[i]
def solve(stalls, ppl): lr = get_lrs(stalls) for i in xrange(ppl): # lr = get_lrs(stalls) stalls, pos, lr = occupy(lr, stalls) # print_stall(stalls) _full = all([x == 1 for x in stalls]) if _full: # print 'FULL' return '0 0' for x in lr: if x[0] == pos: return '{} {}'.format(x[1][1], x[1][0])
def delele_all_events_for_date(context, summary): for attempts in xrange(0, 10): # Perform search context.execute_steps(u'* Search for events which contain "%s"' % summary) # Find the first available appointment context.app.instance.menu('Edit').click() mnu = context.app.instance.menu('Edit').menuItem('Delete Appointment') states = mnu.getState().getStates() context.app.instance.menu('Edit').click() if pyatspi.STATE_ENABLED in states: context.execute_steps(u'* Delete the selected event') else: break
def get_ngrams(sent_iterator, n): """ Get a generator that returns n-grams over the entire corpus, respecting sentence boundaries and inserting boundary tokens. Sent_iterator is a generator object whose elements are lists of tokens. """ for sent in sent_iterator: # Add boundary symbols to the sentence w_boundary = (n - 1) * [(None, "*")] w_boundary.extend(sent) w_boundary.append((None, "STOP")) #Then extract n-grams ngrams = (tuple(w_boundary[i:i + n]) for i in xrange(len(w_boundary) - n + 1)) for n_gram in ngrams: #Return one n-gram at a time yield n_gram
def getPosition(): cameraPort = 0 rampFrames = 30 camera = cv2.VideoCapture(cameraPort) # Ramp the camera - these frames will be discarded and are only used to allow v4l2 # to adjust light levels, if necessary for i in xrange(rampFrames): temp = getImage(camera) print("Taking image...") cameraCapture = getImage(camera) cv2.imwrite("gadi" +str(time.time()) + ".png", cameraCapture) # cv2.imshow('image',cameraCapture) # cv2.waitKey(0) # cv2.destroyAllWindows() # ipAns = getState(cameraCapture) del (camera) return 1
def delete_all_memos_containing(context, summary): for attempts in xrange(0, 10): # Perform search context.execute_steps(u'* Search for "%s" memo' % summary) # Find the first available task context.app.instance.menu('Edit').click() mnu = context.app.instance.menu('Edit').menuItem('Delete Memo') states = mnu.getState().getStates() context.app.instance.menu('Edit').click() if pyatspi.STATE_ENABLED in states: context.app.instance.menu('Edit').click() context.app.instance.menu('Edit').menuItem('Delete Memo').click() dialog = context.app.instance.dialog(' ') dialog.button('Delete').click() sleep(0.5) else: break
def select_memo(context, name): context.execute_steps(u'* Search for "%s" memo' % name) first_memo_name = context.app.instance.child(roleName='heading').text for attempts in xrange(0, 10): selected_memo = context.app.instance.child(roleName='heading') if selected_memo.text == name: fail = False break dogtail.rawinput.keyCombo("<Down>") selected_memo = context.app.instance.child(roleName='heading') if first_memo_name == selected_memo.text: fail = True break context.assertion.assertFalse(fail, "Can't find memo named '%s'" % name) context.selected_memo = selected_memo
def read_counts(self, corpusfile): self.n = 3 self.emission_counts = defaultdict(int) self.ngram_counts = [defaultdict(int) for i in xrange(self.n)] self.all_states = set() for line in corpusfile: parts = line.strip().split(" ") count = float(parts[0]) if parts[1] == "WORDTAG": ne_tag = parts[2] word = parts[3] self.emission_counts[(word, ne_tag)] = count self.all_states.add(ne_tag) elif parts[1].endswith("GRAM"): n = int(parts[1].replace("-GRAM", "")) ngram = tuple(parts[2:]) self.ngram_counts[n - 1][ngram] = count
def getPosition(): cameraPort = 0 rampFrames = 30 camera = cv2.VideoCapture(cameraPort) # Ramp the camera - these frames will be discarded and are only used to allow v4l2 # to adjust light levels, if necessary for i in xrange(rampFrames): temp = getImage(camera) print("Taking image...") cameraCapture = getImage(camera) # file = "C:\Users\ronib\Desktop\test_image.png" # cv2.imwrite(file, cameraCapture) # cv2.imshow('image',cameraCapture) # cv2.waitKey(0) # cv2.destroyAllWindows() ipAns = state_resolver.is_in_frame(cameraCapture) del (camera) # return True return ipAns
def train(self, corpus_file): """ Count n-gram frequencies and emission probabilities from a corpus file. """ ngram_iterator = \ get_ngrams(sentence_iterator(simple_conll_corpus_iterator(corpus_file)), self.n) for ngram in ngram_iterator: # Sanity check: n-gram we get from the corpus stream needs to have the right length assert len(ngram) == self.n, "ngram in stream is %i, expected %i" % (len(ngram, self.n)) tagsonly = tuple([ne_tag for word, ne_tag in ngram]) #retrieve only the tags for i in xrange(2, self.n + 1): #Count NE-tag 2-grams..n-grams self.ngram_counts[i - 1][tagsonly[-i:]] += 1 if ngram[-1][0] is not None: # If this is not the last word in a sentence self.ngram_counts[0][tagsonly[-1:]] += 1 # count 1-gram self.emission_counts[ngram[-1]] += 1 # and emission frequencies # Need to count a single n-1-gram of sentence start symbols per sentence if ngram[-2][0] is None: # this is the first n-gram in a sentence self.ngram_counts[self.n - 2][tuple((self.n - 1) * ["*"])] += 1
def __init__(self, input=None, label=None,\ n_ins=2, hidden_layer_sizes=[3, 3], n_outs=2,\ numpy_rng=None): self.x = input self.y = label self.sigmoid_layers = [] self.rbm_layers = [] self.n_layers = len(hidden_layer_sizes) # = len(self.rbm_layers) if numpy_rng is None: numpy_rng = numpy.random.RandomState(1234) assert self.n_layers > 0 # construct multi-layer for i in xrange(self.n_layers): # layer_size if i == 0: input_size = n_ins else: input_size = hidden_layer_sizes[i - 1] # layer_input if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].sample_h_given_v() # construct sigmoid_layer sigmoid_layer = HiddenLayer(input=layer_input, n_in=input_size, n_out=hidden_layer_sizes[i], numpy_rng=numpy_rng, activation=sigmoid) self.sigmoid_layers.append(sigmoid_layer) # construct rbm_layer if i == 0: rbm_layer = CRBM(input=layer_input, # continuous-valued inputs n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) else: rbm_layer = RBM(input=layer_input, n_visible=input_size, n_hidden=hidden_layer_sizes[i], W=sigmoid_layer.W, # W, b are shared hbias=sigmoid_layer.b) self.rbm_layers.append(rbm_layer) # layer for output using Logistic Regression self.log_layer = LogisticRegression(input=self.sigmoid_layers[-1].sample_h_given_v(), label=self.y, n_in=hidden_layer_sizes[-1], n_out=n_outs) # finetune cost: the negative log likelihood of the logistic regression layer self.finetune_cost = self.log_layer.negative_log_likelihood()
def test_cdbn(pretrain_lr=0.01, pretraining_epochs=1000, k=1, \ finetune_lr=0.01, finetune_epochs=1000): # xdata = [[0.4, 0.5, 0.5, 0., 0., 0.], # [0.5, 0.3, 0.5, 0., 0., 0.], # [0.4, 0.5, 0.5, 0., 0., 0.], # [0., 0., 0.5, 0.3, 0.5, 0.], # [0., 0., 0.5, 0.4, 0.5, 0.], # [0., 0., 0.5, 0.5, 0.5, 0.]] # print xdata # x = numpy.array(xdata) # print x # # y = numpy.array([[1, 0], # [1, 0], # [1, 0], # [0, 1], # [0, 1], # [0, 1]]) # my_data = numpy.loadtxt('../data/NOT_VALID/train-numericdate-norm-cdbn.csv', delimiter=',') # xdata = my_data[:,0:38] filez = open('../data/NOT_VALID/train-numericdate-norm-cdbn.csv', 'r') data = csv.reader(filez, delimiter=',') alldata = [map(float, row) for row in data] # print alldata # xdata = alldata[0][:] # print alldata[0:][0:38] # print alldata[0:38][0] # print xdata # ydata = [row for row in data[:,38:48]] xydata = numpy.array(alldata) # x = numpy.array(xdata) # y = numpy.array(ydata) x = xydata[:,0:38] y = xydata[:,38:48] # print [x_ for x_ in x] # print [y_ for y_ in y] rng = numpy.random.RandomState(123) # construct DBN dbn = CDBN(input=x, label=y, n_ins=38, hidden_layer_sizes=[30, 30, 30], n_outs=10, numpy_rng=rng) # pre-training (TrainUnsupervisedDBN) dbn.pretrain(lr=pretrain_lr, k=1, epochs=pretraining_epochs) # fine-tuning (DBNSupervisedFineTuning) dbn.finetune(lr=finetune_lr, epochs=finetune_epochs) # test #x = numpy.array([[0.5, 0.5, 0., 0., 0., 0.], # [0., 0., 0., 0.5, 0.5, 0.], # [0.5, 0.5, 0.5, 0.5, 0.5, 0.]]) # TODO predicting train dataset (for now) out = dbn.predict(x) print "INPUT" for x_ in x: print [in_el for in_el in x_] print "TARGET" for y_ in y: print [y_el for y_el in y_] print "PREDICTION" for o_ in out: print [out_el for out_el in o_] # normalize outputs thr = out.max(axis=0) / 2 out_norm = numpy.zeros((len(out), len(thr))) for i in xrange(0, len(out)): for j in xrange(0, len(out[i])): out_norm[i][j] = (out[i][j] >= thr[j]) print out_norm # compute revenues rev = [math.pow(10, numpy.sum(2**numpy.arange(len(out_))*out_[::-1]) / 100) for out_ in out_norm] print rev # compute RMSE filez2 = open('../data/train-numericdate.csv', 'r') data2 = csv.reader(filez2, delimiter=',') next(data2, None) alldata2 = numpy.array([row for row in data2]) revenue = map(int, alldata2[:,43]) print revenue print "RMSE = " + str(math.sqrt(mse(rev, revenue)))
def __iter__(self): self.file.seek(self.start) for j in xrange(self.size): byte = read(self.file, 1) assert len(byte) == 1 yield byte
def quick_hist(arrs, range=None, nbins=None, weights=None, getPos=False): """ N-dimensional histogram routine. Example: > xs=np.random.uniform(size=100); ys= np.random.uniform(size=100) > hh = quick_hist((xs,ys), range=[(0,1),(0,1)], nbins=[20,10]) Arguments: arr -- tuple of N-arrays range -- list of tuples of ranges nbins -- list of numbers of bins Keywords: weights -- weighting for the histogram getPos -- return the 1D vector of the positions within the histogram (-1 if the point is outside the range) """ from __builtin__ import range as xrange nd = len(arrs) if range is None: range=[] for i in xrange(nd): range.append((arrs[0].min(),arrs[0].max())) if nbins is None: nbins = [10]*nd if len(nbins)!=nd: raise ValueError('The array of nbins MUST have the same length as the number of input data vectors') if len(range)!=nd: raise ValueError('The array of ranges MUST have the same length as the number of input data vectors') nx = len(arrs[0]) for curarr in arrs: if len(curarr)!=nx: raise ValueError('All the input arrays MUST have the same length!') if weights is not None: if len(weights)!=nx: raise ValueError('The weights array MUST have the same length as the input arrays') # convert all the bins into integers nbins = [ int(_tmp) for _tmp in nbins] poss = np.zeros((nx,), dtype=np.int64) ind = np.ones_like(arrs[0]).astype(bool) nbins_rev = nbins + [] nbins_rev.reverse() mults = (reduce(lambda x, y: x + [y * x[-1]], nbins_rev, [1]))[:-1] mults.reverse() for i in xrange(nd): cur_arr = np.ascontiguousarray(arrs[i],dtype=np.float64) cur_range0 = float(range[i][0]) cur_range1 = float(range[i][1]) cur_nbins = nbins[i] cur_mult = mults[i] code1 = """ int i, cur_pos; double curfac = cur_nbins * 1./ (cur_range1-cur_range0); for (i=0; i<nx; i++) { cur_pos = (int)floor( ( cur_arr(i)-cur_range0) * curfac); if ((cur_pos>=0 ) && (cur_pos<cur_nbins)) { poss(i)=poss(i)+cur_pos*cur_mult; } else { ind(i)=false; } }""" try: scipy.weave.inline(code1, ['cur_range0', 'cur_range1', 'cur_nbins','poss','ind','nx','cur_arr','cur_mult'], type_converters=scipy.weave.converters.blitz) except: print "Sorry the compiled version didn't work :(" cur_pos = (cur_arr - cur_range0) * (cur_nbins * 1. / (cur_range1 - cur_range0)) cur_pos = np.floor(cur_pos).astype(np.int64) ind &= ((cur_pos >= 0) & ( cur_pos < cur_nbins)) poss += cur_pos * cur_mult poss = poss[ind] newlen = len(poss) if weights is None: weights_str = '1' else: weightsind = weights[ind] weights_str = 'weightsind(i)' if not getPos: del ind res = np.zeros(np.array(nbins, dtype=np.int64).prod()) code = """ int i; for (i=0; i<newlen; i++) { res(poss(i)) = res(poss(i)) + %s; }"""%weights_str try: if weights is None: scipy.weave.inline(code, ['res', 'poss', 'newlen'], type_converters=scipy.weave.converters.blitz) else: scipy.weave.inline(code, ['res', 'poss', 'newlen','weightsind'], type_converters=scipy.weave.converters.blitz) except Exception: print "Sorry the compiled version didn't work :(" if weights is None: for i in xrange(len(poss)): res[poss[i]]+=1 else: for i in xrange(len(poss)): res[poss[i]]+=weights[i] if not getPos: return res.reshape(nbins) else: H = np.zeros(len(ind),dtype=np.int64)-1 H[ind] = poss return res.reshape(nbins),H
def __iter__(self): for j in xrange(self.number_of_words): yield self[j]
def __init__(self, n=3): assert n >= 2, "Expecting n>=2." self.n = n self.emission_counts = defaultdict(int) self.ngram_counts = [defaultdict(int) for i in xrange(self.n)] self.all_states = set()