def testUriChain(): from serializers import obj2json uri = "testObjects/67a01e17-9e8e-4d99-973b-170d74110a4b" ret = uriProcessingChain(uri, uriChain) ret = obj2json(ret) pp(ret) ret = type(ret)
def getPicID(self, profile_id): url = "https://m.facebook.com/app_scoped_user_id/" + profile_id self.browser.addheaders = [( 'User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36' )] html = self.browser.open(url).read() #pp(html) parser = etree.HTMLParser() tree = etree.parse(StringIO.StringIO(html), parser) ids = tree.xpath("//div[@class='bl bm']/a/@href") if (not ids): ids = tree.xpath("//div[@class='bk bl']/a/@href") if (not ids): ids = tree.xpath("//div[@class='de']/a/@href") if (not ids): ids = tree.xpath("//div[@class='bi cv']/a/@href") isID = re.search(r'id=[0-9]*', ids[0], re.M | re.I) if (isID): pid = isID.group() pp(pid) return pid.split('=')[1] else: isID = re.search(r'profile_id=[0-9]*', ids[0], re.M | re.I) pid = isID.group() return pid.split('=')[1]
def getFromJson(number): import json f = open(str(number) + "/flft.json").read() Flftfiles = json.loads(f) from collections import defaultdict all_flft = defaultdict(int) Flfts = [] for count,flft_gram in enumerate(Flftfiles): flftGroup = flft_gram[0] file = flft_gram[1] flftGroup2 = [] for flft in flftGroup: if remove(flft): continue flft = re.sub(u"第二百二五十三条", u"第二百五十三条", flft) flft = re.sub(u"第三十二条(五)", u"第三十二条", flft) flft = re.sub(u"第一审民商事案件标准的通知》第三条", u"第三条", flft) all_flft[re.sub(u"^.*?第", u"第", flft)] += 1 flftGroup2.append(flft) # all_flft[re.sub(u"第.*?条",u"",flftGroup[idx])] += 1 # all_flft[flftGroup[idx]] += 1 Flfts.append([flftGroup2, file]) if ((count + 1) % 500 == 0): print "进度:", (count * 1.0 + 1) / 11953 pp(all_flft) f = open(str(number) + "/flftDC.json", "w") data = json.dumps(Flfts, ensure_ascii=False) f.write(data.encode('utf-8'))
def getTestObjectByApi(log=False): url = 'http://192.168.99.100:8081/test-man/api/v0/testObjects?filter=abc' resp = requests.get(url) testObjects = json.loads(resp.text) testObjectUri = testObjects[0]['uri'] resp = requests.get(testObjectUri) testObject = json.loads(resp.text) if log: pp(testObject) return testObject
def make_tag(): if confirm(yellow("Tag this release?"), default=True): print(green("The last 5 tags were: ")) tags = local('git tag | tail -n 20', capture=True) pp(sorted(tags.split('\n'), compare_versions, reverse=True)) prompt("New release tag in the format vX.Y[.Z]?", 'tag', validate=VERSION_PATTERN) local('git tag -as %(tag)s' % env) local('git push --tags origin', capture=True) local('git fetch --tags origin', capture=True)
def testSet(self): set1 = set(['John', 'Jane', 'Jack', 'Janice']) pp(set1) expected=''' [ "Jane", "Janice", "John", "Jack" ] '''.strip() self.assertEqual(pp_str(set1), expected)
def compare2Std(): import json f2 = open("Flft_55000_2.json").read() Flftfiles2 = json.loads(f2) for count, flft_gram in enumerate(Flftfiles2[8000:8500]): flftGroup = flft_gram[0] for c, flft in enumerate(flftGroup): flft = re.sub(u"第.*?条$", u"", flft) stdFlft = getStdFlft(flft) stdFlft2 = getStdFlft2(flft) if stdFlft != stdFlft2: pp([flft, stdFlft, stdFlft2])
def testObject(self): class MyClass(object): def __str__(self): return "<MyClass>" ls = list([1, MyClass()]) pp(ls) expected=''' [ 1, "<MyClass>" ] '''.strip() self.assertEqual(pp_str(ls), expected)
def testClassifier(x_train, y_train, x_test, y_test, clf): """ this method will first train the classifier on the training data and will then test the trained classifier on test data. Finally it will report some metrics on the classifier performance. Parameters ---------- x_train: np.ndarray train data matrix y_train: list train data label x_test: np.ndarray test data matrix y_test: list test data label clf: sklearn classifier object implementing fit() and predict() methods Returns ------- metrics: list [training time, testing time, recall and precision for every class, macro-averaged F1 score] """ #metrics = [] start = dt.now() clf.fit(x_train, y_train) end = dt.now() print 'training time: ', (end - start) # add training time to metrics #metrics.append(end-start) start = dt.now() yhat = clf.predict(x_test) end = dt.now() print 'testing time: ', (end - start) # add testing time to metrics #metrics.append(end-start) print 'classification report: ' #print classification_report(y_test, yhat) pp(classification_report(y_test, yhat)) print 'f1 score' print f1_score(y_test, yhat, average='weighted') print 'accuracy score'
def get_np(): connects = [] for cols in make_list(): connect = [] for col in cols: if col["pos"] == "名詞": connect.append(col["surface"]) else: if len(connect) > 1: connects.append("".join(connect)) connect = [] if len(connect) > 1: connects.append("".join(connect)) np = set(connects) pp(sorted(np, key=connects.index))
def find_gunosy_accounts(): api = tweepy.API() gunosy_accounts = set() for tweet in tweepy.Cursor(api.search, q=search_query, rpp=100, result_type="recent", include_entities=True, lang="ja").items(): for entity in tweet.entities['urls']: url = entity['expanded_url'] result = url_analyzer.search(url) if result: gunosy_accounts.add(result.group(1)) gunosy_accounts = list(gunosy_accounts) print "----use following accounts-----" pp(gunosy_accounts) return gunosy_accounts
def test(): ''' test verbena and rose, the modified apps ''' apps = [ 'verbena', 'rose', ] with settings(warn_only=True): with virtualenv(): with cd("%s/arxer" % DIR): for app in apps: result = run('./manage.py test %s' % app) if result.failed and not confirm("Tests failed. Continue anyway?"): abort("Aborting at request of user") else: from prettyprint import pp pp(result)
def solve(): import json f2 = open("Flft_55000_2.json").read() Flftfiles2 = json.loads(f2) f11 = open("Flft_55000_11.json").read() Flftfiles11 = json.loads(f11) for count, flft_gram in enumerate(Flftfiles2[8000:8500]): flftGroup = flft_gram[0] flftGroup11 = Flftfiles11[count][0] if len(flftGroup) != len(flftGroup11): continue for c, flft in enumerate(flftGroup): flft = re.sub(u"第.*?条$", u"", flft) flft11 = re.sub(u"第.*?条$", u"", flftGroup11[c]) flft11 = re.sub(u"\(.*?\)$", u"", flft11) stdFlft = getStdFlft(flft) if flft11 != stdFlft: pp([flft, stdFlft, flft11])
def testNestedSet(self): set1 = list([6, set([2,1,3]), 5,[3,1,2], None]) pp(set1) expected=''' [ 6, [ 1, 2, 3 ], 5, [ 3, 1, 2 ], null ] '''.strip() self.assertEqual(pp_str(set1), expected)
def apiRoot(request, uri): obj = uriProcessingChain(uri, uriChain) print obj if (None == obj): response = HttpResponse() response.status_code = 404 return response ret = obj2json(obj) objType = type(obj) print 'objType', objType pp(ret) injector = None if obj in uriInjectors: injector = uriInjectors[obj] elif objType in uriInjectors: injector = uriInjectors[objType] ret = injectUri(injector, ret, getUriPrefixByRequest(request)) ret = json.dumps(ret, ensure_ascii=False) response = HttpResponse(ret) return response
def writeFlft(number): from collections import defaultdict all_flft = defaultdict(int) file_dir = u"E:/毕业设计/素材/9015/" f = open(str(number) + "/files.json").read() Files = json.loads(f) file_num = len(Files) Flfts = [] for count, file in enumerate(Files): flftGroup = getFlft(file_dir + file) for idx,flft in enumerate(flftGroup): flftGroup[idx] = flft flftGroup[idx] = normalize_flft(flftGroup[idx]) all_flft[re.sub(u"第.*?条",u"",flftGroup[idx])] += 1 # all_flft[flftGroup[idx]] += 1 Flfts.append([flftGroup,file]) if ((count + 1) % 500 == 0): print "进度:", (count * 1.0 + 1) / file_num pp(all_flft) f = open(str(number) + "/flft.json", "w") data = json.dumps(Flfts, ensure_ascii=False) f.write(data.encode('utf-8'))
def messagegen(self): jsonStr = self.getChat(self.minTimestampMs) responseItems = [] try: responseItems = json.loads(jsonStr) except ValueError: pass if 'result' not in responseItems: self.errorcount += 1 self.adjust_sleep(0) if(self.errorcount > self.MAX_ERRORS): print("error counter exceeded, existing...") raise UnexpectedResultException(jsonStr) # sys.exit(1) if 'error' in responseItems: print(responseItems) else: self.errorcount += 1 else: self.errorcount = 0 responseItemsOrderedAsc = responseItems['result'] responseItemsOrderedAsc.reverse() for message in responseItemsOrderedAsc: yield message self.minTimestampMs = message[1] + 1 prettyprint.pp(responseItems) tm = time.localtime(self.minTimestampMs/1000.0) print("%04d/%02d/%02d %02d:%02d:%02d" % (tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec)) counts = len(responseItemsOrderedAsc) self.adjust_sleep(counts) print("counts: %d" % len(responseItemsOrderedAsc)) print("sleep_sec: %d" % self.sleep_sec)
def main(): """ Main Function loop """ mydevice = Device() mydevice.connect(ASA_CREDENTIALS) myroutingtable = ASARoutingTable() myroutingtable.setroutingtable(mydevice) print "\nPrint Routing Table" myroutingtable.printtable() print "\nWhat interface does {0} exit".format(TRACEDATA['destip']) print myroutingtable.whatinterface(TRACEDATA['destip']) mypackettracer = PacketTracer() print "\nRunning Packet Trace" output = mypackettracer.runpackettracer(mydevice, TRACEDATA) _, results = mypackettracer.processtracer(output) pp(results)
def getPicID(self, profile_id): url = "https://m.facebook.com/app_scoped_user_id/"+profile_id self.browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36')] html = self.browser.open(url).read() #pp(html) parser = etree.HTMLParser() tree = etree.parse(StringIO.StringIO(html), parser) ids = tree.xpath("//div[@class='bl bm']/a/@href") if(not ids): ids = tree.xpath("//div[@class='bk bl']/a/@href") if(not ids): ids = tree.xpath("//div[@class='de']/a/@href") if(not ids): ids = tree.xpath("//div[@class='bi cv']/a/@href") isID = re.search(r'id=[0-9]*', ids[0], re.M|re.I) if(isID): pid = isID.group() pp(pid) return pid.split('=')[1] else: isID = re.search(r'profile_id=[0-9]*', ids[0], re.M|re.I) pid = isID.group() return pid.split('=')[1]
def getFromJson4(dir): import json f = open(str(dir) + "/flftDC.json").read() Flftfiles = json.loads(f) from collections import defaultdict all_flft = defaultdict(int) Flfts = [] for count, flft_gram in enumerate(Flftfiles): flftGroup = flft_gram[0] file = flft_gram[1] flftGroup2 = [] for flft in flftGroup: ft = re.sub(u"^.*?第", u"第", flft) flft = transfer.getStdFlft(flft) all_flft[flft + ft] += 1 flftGroup2.append(flft + ft) Flfts.append([flftGroup2, file]) if ((count + 1) % 500 == 0): print "进度:", (count * 1.0 + 1) / len(Flftfiles) pp(all_flft) f = open(str(dir) + "/flftStd.json", "w") data = json.dumps(Flfts, ensure_ascii=False) f.write(data.encode('utf-8'))
def genLevel(ws, remains, callback): cellAndValues = map(lambda x: (x, getMergedCellPresent(ws, x).value), remains) callback(cellAndValues) def pushLevels(levelDict, cellAndValues): currentDict = levelDict for cell, value in cellAndValues: if value not in currentDict: currentDict[value] = {} currentDict = currentDict[value] def travesalLevels(ws, tl, rb, onCell): levels = ws[tl:rb] for row in levels: genLevel(ws, row, onCell) if __name__ == '__main__': print 'here' import sys wb = load_workbook(filename="../test.xlsx", read_only=False) ws = wb.worksheets[0] levelTypes = getLevelTypes(ws, 'B6', 'D7') levelDict = {} travesalLevels(ws, 'B8', 'D419', partial(pushLevels, levelDict)) pp(levelDict)
# -*- coding: utf-8 -*- import os import sys import random import MeCab import wakati from prettyprint import pp, pp_str if __name__ == "__main__": src = open("test.txt", "r").read() wordlist = wakati.parse(src) pp(wordlist) #ret = wakati.parse("今日は、いい天気ですね。明日はどんな天気でしょうか?") #pp(ret)
rules = [ (shouldHavePriority, u"优先级必须确定"), (shouldHaveDeadline, u"高优先级任务必须有截止时间"), (partial(shouldHaveOwner, tf), u"任务必须有负责人"), (None, u"高优先级任务必须有工时"), (None, u"必须有所属项目"), (None, u"如果父任务有截止时间,任务截止时间不得超出父项目"), (None, u"任务应该有描述"), ] availableRules = filter(lambda x: x[0] != None, rules) results = [] with CacheUsers(users) as cu: ui =UserInfo() #for task in filter(filters[0][0], tasks): for task in tasks: violations = [] for rule in availableRules: if False == rule[0](task): violations.append(rule[1]) if len(violations) > 0: tid = task['id'] results.append({ 'id': tid, 'title': tf.longTitle(tid), 'violations': violations, 'owner': tf.owner(tid), 'author': tf.author(tid)}) pp(results)
def countDictWords(self): return len(self.vocab) def wakachi(self, text): w = self.mecab.parse(text) return w.split() if __name__ == '__main__': ## source activate root ## pip install prettyprint from prettyprint import pp dictfile = 'dict.dat' dict = MyDict(dictfile) #dict.loadict() lids = dict.loadtext('jp.txt') dict.savedict() #print lids #print dict.lids2words(lids) print dict.countDictWords() i = 0 for ids in lids: jwords = ','.join(dict.ids2words(ids)) iwords = ','.join(map(str, ids)) s = str(i) + '\t' + jwords + '\t' + iwords pp(s) i += 1
def append_mecab(): with open(filename) as data, open(output, "w") as out: mc = MeCab.Tagger() out.write(mc.parse(data.read())) def make_list(): with open(output) as f: sentences = [] for line in f: splited = line.split("\t") if (len(splited) < 2): break factor = splited[1].split(",") sentence = { 'surface': splited[0], 'base': factor[6], 'pos': factor[0], 'pos1': factor[1] } sentences.append(sentence) if factor[1] == "句点": yield sentences sentences = [] if __name__ == "__main__": # append_mecab() pp(make_list())
srvc = host.get_service('tcp', port) if srvc is None: continue record[host.ip][srvc.name] = int(port) self.shared_queue.put( { self.target: record } ) except: # We don't know what we caught. raise finally: # Don't let a failure hang any waiting threads. self.alive = False if __name__ == '__main__': import prettyprint import time import os, sys if not os.geteuid() == 0: sys.exit("\nOnly a root user can run this\n") sh_queue = Queue.Queue() psc = PortScanner('window-specialist.com', shared_queue=sh_queue) while psc.alive: time.sleep(0.1) prettyprint.pp(sh_queue.get(block=False))
def make_release(release=None): """Based on the deployment type and any arguments from the command line, determine the proper identifier for the commit to deploy. If a tag is required (e.g. when in the production app environment), the deploy must be coming from the master branch, and cannot proceed without either creating a new tag or specifing and existing one. Requires the env keys: allow_no_tag - whether or not to require the release to be tagged in git default_revision - the commit ref for HEAD """ require('allow_no_tag') require('default_revision') env.release = release env.tagged = False if not env.release or env.release == 'latest_tag': if not env.allow_no_tag: branch = utils.branch() if branch != "master": abort("Make sure to checkout the master branch and merge in the" " development branch before deploying to production.") local('git checkout master', capture=True) description = local('git describe master' % env, capture=True ).rstrip('\n') if '-' in description: env.latest_tag = description[:description.find('-')] else: env.latest_tag = description if not re.match(env.version_pattern, env.latest_tag): env.latest_tag = None env.release = env.release or env.latest_tag env.commit = 'HEAD' if not env.allow_no_tag: if confirm(yellow("Tag this release?"), default=False): require('master_remote') from prettyprint import pp print(green("The last 5 tags were: ")) tags = local('git tag | tail -n 20', capture=True) pp(sorted(tags.split('\n'), utils.compare_versions, reverse=True)) prompt("New release tag in the format vX.Y[.Z]?", 'tag', validate=env.version_pattern) require('commit') local('git tag -s %(tag)s %(commit)s' % env) local('git push --tags %(master_remote)s' % env, capture=True) env.tagged = True env.release = env.tag local('git fetch --tags %(master_remote)s' % env, capture=True) else: print(green("Using latest tag %(latest_tag)s" % env)) env.release = env.latest_tag else: make_head_commit() env.release = env.head_commit print(green("Using the HEAD commit %s" % env.head_commit)) else: local('git checkout %s' % env.release, capture=True) env.tagged = re.match(env.version_pattern, env.release) make_pretty_release()
} u = User(**ukw) print repr(u) print str(u) ### Create a new Account object and add to User object # One way to do it aname = 'Budget' a = Account(username=u.user, accountname=aname) u.add_account(a) # Returns a # Another way to do it aname = 'Salary' a = u.new_account(accountname=aname) # These should be changed, so that Account objects returns something useful print repr(a) print str(a) ### Invoke __get__() for property User.accounts print u.accounts # u.accounts.pop(0) # Direct access to the _accounts property. pp(u.accounts)
testObject = getTestObjectByApi() url = testObject['categories'] resp = requests.get(url) return json.loads(resp.text) def getOrphanTestCategories(): url = 'http://192.168.99.100:8081/test-man/api/v0/testCategories/orphans' resp = requests.get(url) return (json.loads(resp.text)) def putTestObjectCategory(): testObject = getTestObjectByApi() category = getOrphanTestCategories()[0] url = testObject['categories'] resp = requests.put(url, json=[category['id']]) return (json.loads(resp.text)) #pp(putTestObjectCategory()) #pp(getTestObjectCategories()) #pp(getOrphanTestCategories()) #pp(getTestObjectCategories()) #obj = getTestObjectsByApi() #pp(obj) #obj = getTestObjectByApi() #pp(obj) obj = getTestObjectCategories() pp(obj)
#TODO: filenem/(path ?) generation in eigene Struktur mit unterscheidung ob host-bezogen (yml Files) oder liste (dhcpd.con) etc #TODO ggfs hn aus writeFile hearusnehmen und abghaengig vom tpl/tpl-type anderen parameter uebergeben ?! def writeFile(hn,contens): if tpl == 'auto-install': out_filename['cloud-config'] = 'auto-install'+'.@@hn@@.yml' filename = out_filename[tpl_type].replace('@@hn@@', hn) out_file = os.path.join(deploydir,filename) print "create " + out_file with open(out_file,'w+') as f: f.write(contens) # main cfg = cfg_defaults.copy() # for testing only one possibility: use given tpl and tpl_type # TODO: handle values/options from cmdline args and create single host related outputs as well as hostlist based outputs content = createObjectFromHostCfg(hn,tpl,tpl_type) print "\nOUTPUT:\n" pp(content) writeFile(hn, content)
# cut -f1 hightemp.txt | sort | uniq -c | sort -r import sys import csv from prettyprint import pp if __name__ == '__main__': argv = sys.argv if len(argv) != 2: # 引数がちゃんとあるかチェック # 正しくなければメッセージを出力して終了 print('Usage: python %s filename1 ...' % argv[0]) quit() file_path = argv[1] word_set = set() with open(file_path) as f: reader = csv.reader(f, delimiter="\t") for row in reader: word_set.add(row[0].encode("utf-8")) pp("文字種類 : ") pp(word_set) pp("文字数:" + str(len(word_set)))
cover=js['cover'], description=js['description'].encode('utf-8'), ratings=js['ratings'], reviews=js['reviews'], title=js['title'], url=js['url'], outlinks=js['outlinks']) book.add_authors(js['authors']) book.add_userreviews(js['userreviews']) # book.authors_url = authors_urls # for userrev in js['userreviews']: # book.add_userreview(userrev['userName'], userrev['userReview'], userrev['userReviewDate'], userrev['userURL']) book.id = idx book.save() # s = Search(es).index('book-index').doc_type('book').query("match", description='prince') s = Search(es).index('book-index').doc_type('book') response = s.execute() print response.success(), response.hits.total for res in response: print res._meta.score print res.title print res.description.encode('utf-8') # pp(res.outlinks) pp(res.userreviews_userName) pp(res.userreviews_userReview) print '' # print response.to_dict()
def testClassifier(x_train, y_train, x_test, y_test, clf): """ this method will first train the classifier on the training data and will then test the trained classifier on test data. Finally it will report some metrics on the classifier performance. Parameters ---------- x_train: np.ndarray train data matrix y_train: list train data label x_test: np.ndarray test data matrix y_test: list test data label clf: sklearn classifier object implementing fit() and predict() methods Returns ------- metrics: list [training time, testing time, recall and precision for every class, macro-averaged F1 score] """ #metrics = [] start = dt.now() clf.fit(x_train, y_train) end = dt.now() print 'training time: ', (end - start) # add training time to metrics #metrics.append(end-start) start = dt.now() yhat = clf.predict(x_test) end = dt.now() print 'testing time: ', (end - start) # add testing time to metrics #metrics.append(end-start) print 'classification report: ' # print classification_report(y_test, yhat) pp(classification_report(y_test, yhat)) print 'f1 score' print f1_score(y_test, yhat, average='macro') print 'accuracy score' accuracy = accuracy_score(y_test, yhat) print accuracy #metrics.append(accuracy) #precision = precision_score(y_test, yhat, average=None) #recall = recall_score(y_test, yhat, average=None) # add precision and recall values to metrics #for p, r in zip(precision, recall): # metrics.append(p) # metrics.append(r) #add macro-averaged F1 score to metrics #metrics.append(f1_score(y_test, yhat, average='macro')) print 'confusion matrix:' print confusion_matrix(y_test, yhat) # plot the confusion matrix plt.imshow(confusion_matrix(y_test, yhat), interpolation='nearest') plt.show() return accuracy
raise Exception(msg.format(cnt, row, self.destination_ip)) if len(hops) > 0: # This hop told us who it was. trace_globs[count+1] = tuple(hops) else: # This hop doesn't like traceroute. trace_globs[count+1] = None return trace_globs class whois(object): def __init__(self, ip): pass def test_trace_route(domain='www.google.com'): """Test the trace route object on domain. """ import time queue = Queue.Queue() trc = TraceRoute(domain, queue) time.sleep(15) return queue.get(block=True, timeout=30) if __name__ == '__main__': import prettyprint prettyprint.pp(test_trace_route())
# coding: utf-8 import MeCab from NLP_30 import make_list from collections import Counter from prettyprint import pp def get_counter(): counter = Counter() for cols in make_list(): counter.update([col["surface"] for col in cols]) word_list = counter.most_common() return word_list if __name__ == "__main__": pp(get_counter())
return self.pyg.record_by_name(hostname) else: raise Exception('Undefined error') @singleton class GeoIp(object): def __init__(self): pass def distance(self, record_a, record_b): if not ('latitude' in record_a and 'longitude' in record_a): raise Exception('latitude/longitude not in {}'.format(record_a)) if not ('latitude' in record_b and 'longitude' in record_b): raise Exception('latitude/longitude not in {}'.format(record_b)) point_a = ( record_a['latitude'], record_a['longitude'] ) point_b = ( record_b['latitude'], record_b['longitude'] ) return geopy.distance.vicenty(point_a, point_b).miles def test_ip_address(): """A function to test the ip_address object. """ ipa = IPAddress() record = ipa.approx_geograph(hostname='www.google.com') return record if __name__ == '__main__': import prettyprint prettyprint.pp(test_ip_address())
labellist.append(erritem) msgidlist = [] for msgtxt in erritem['message']: ## wakachiで処理するため明示的にunicodeにする ## ex) cmapeerdが停止したためプロセスを再起動します w = u'' + msgtxt words = dict.wakachi(w.encode('utf-8')) ids = dict.words2ids(words) ## ex) [113, 63, 11, 9, 13, 104, 6, 7, 66, 8, 9, 10] #y = train.predict(ids) y = train.predict(ids[::-1]) wordid = y.data.argmax(1)[0] ## ex) 84 msgidlist.append(wordid) analizeRnnInputList.append(msgidlist) pp(erritem) pp(msgidlist) ## ex) [[65,66],[40,86,84]] pp(analizeRnnInputList) ## 採用メッセージリスト保存 ## [[83], [65, 66]...] こうなっているのを [83,65,66...]にしてユーニークにする fsave(acceptFile, list(set([flatten for inner in analizeRnnInputList for flatten in inner]))) print "training...learning" trainData = analizeRnnInputList config = ConfigParser.SafeConfigParser() config.read(configFile)
# -*- encoding: utf-8 -*- import re from prettyprint import pp from knock20 import load_json_file, get_record if __name__ == "__main__": data = load_json_file("jawiki-country.json") text = get_record(data, "イギリス") r = re.compile(u"\{\{基礎情報 国([\s\S]+)\}\}") pp(r.findall(text)) field_dic = {} field_flag = False for row in text: if row.find("{{基礎情報") != -1: field_flag = True continue if field_flag: data = row.split(" = ") field_dic[data.replace("|", "")] = data[1] if row == "{{"
while counter < max_packets: # receive a packet pkt_obj = receive_raw_packet(sock) if pkt_obj and pkt_obj.protocol == UDP_PROTO: if pkt_obj.data: # Host name lookup # This is a sen't packet. if pkt_obj.data[:2].__repr__() not in pkt_seen: print 's', pkt_obj.data[2:].__repr__() pkt_seen[pkt_obj.data[:2].__repr__()] = True PENDING_DNS_REQUESTS[pkt_obj.src_port] = pkt_obj.data[:2].__repr__() if pkt_obj.dest_port in PENDING_DNS_REQUESTS: print 'd', pkt_obj.data[10:] counter += 1 pp (pkt_seen) pp(PENDING_DNS_REQUESTS) print 'TEST RECEIVE RAW COMPLETE\n\n' if __name__ == '__main__': import time import os, sys if not os.geteuid() == 0: sys.exit("\nOnly a root user can run this\n") #dnsp = DNSParser() test_receive_raw_packet() #dnsp.alive.clear() # time.sleep(5) # test_receive_tcp_packet()
parser.add_argument('--sg', type=int, dest='sg', required=False, default=0) # 0:cbow , 1:skipgram parser.add_argument('--cbow_type', type=int, dest='cbow_type', required=False, default=1) # 0:average_concat +syn1_doc 1:average , 2:concatenate+syn1_doc , 3:concatenate , 4:average_concat parser.add_argument('--skip_gram_type', type=int, dest='skip_gram_type', required=False, default=0) parser.add_argument('--alpha', type=float, dest='alpha', required=False, default=0.025) parser.add_argument('--alpha_doc', type=int, dest='alpha_doc', required=False, default=0.025) parser.add_argument('--alpha_rate', type=int, dest='alpha_rate', required=False, default=0.025) parser.add_argument('--alpha_flag', type=int, dest='alpha_flag', required=False, default=0) # 0:学習パラメータは減少させていく, 1:減少させない parser.add_argument('--cbow_mean', type=int, dest='cbow_mean', required=False, default=1) # 0:no 1:average parser.add_argument('--iteration', type=int, dest='iteration', required=False, default=20) # 学習回数 parser.add_argument('--average_flag', type=int, dest='average_flag', required=False, default=1) # 0:word_vecの和のみ 1:doc_vecとword_vecの和 parser.add_argument('--is_np_mean_syn1', type=int, dest='is_np_mean_syn1', required=False, default=0) #0: syn1の誤差を平均, 1:syn1の誤差を合計 (cbow_type=0,2のとき) parser.add_argument('--is_using_word2vec', type=int, dest='is_using_word2vec', required=False, default=0) # 1:word2vecの単語ベクトルを用いる, 0:用いない parser.add_argument('--is_using_wiki', type=int, dest='is_using_wiki', required=False, default=0) # 0:none 1:wiki parser.add_argument('--hs', type=int, dest='hs', required=False, default=0) # 0:hsを使わない 1:使う parser.add_argument('--sample', type=float, dest='sample', required=False, default=1e-5) # 1e-5 頻度の高い単語を減らす parser.add_argument('--freeze_learn', type=int, dest='freeze_learn', required=False, default=0) # 0:更新する 1: word2vecを使った場合に単語ベクトルを更新しない parser.add_argument('--random_learn_flag', type=int, dest='random_learn_flag', required=False, default=0) # 1:ランダムに学習する, 0:与えられた文章から順に学習する parser.add_argument('--n_gram_mode', type=int, dest='n_gram_mode', required=False, default=0) # 0:変換しない 1,2,3-gram parser.add_argument('--null_vec_type', type=int, dest='null_vec_type', required=False, default=2) # 0:zeros , 1: ones, 2:random parser.add_argument('--skip_concat', type=int, dest='skip_concat', required=False, default=0) # 0:not skip 1:nullをskip parser.add_argument('--input', type=str, dest='input_file', required=False, default="INPUT.txt") # 1行1ドキュメントのファイル名 args = parser.parse_args() args_dict = vars(args) pp(args_dict) make_paragraph_vector(**args_dict)
# -*- encoding: utf-8 -*- import sys import csv from prettyprint import pp if __name__ == '__main__': argv = sys.argv if len(argv) != 2: # 引数がちゃんとあるかチェック # 正しくなければメッセージを出力して終了 print('Usage: python %s filename1 ...' % argv[0]) quit() file_path = argv[1] data_frame = [] with open(file_path) as f: reader = csv.reader(f, delimiter="\t") for row in reader: data_frame.append(row) sorted(data_frame, key=lambda data:data[2], reverse=True) pp(data_frame)
def predict(self, data): inmsg = data.split('\n') #inmsg = ['エージェントが可能な変更を行った後動作を開始しました'] log.debug('predict: predict data=%s', inmsg) dictfile = self.model_path + '/dict.dat' configFile = self.model_path + '/analog.ini' dict = MyDict(dictfile) dict.loadict() print 'dict count: %d' % dict.countDictWords() ## 採用メッセージID acceptid = self.fload(self.model_path + '/accept.dat') print '**accept id:' print acceptid ## エラー定義の読み込み with open(self.model_path + '/error.json', 'r') as f: errjson = json.load(f, "utf-8") ## RNNメッセージID化 vocab = 10000 dim = 100 y = 94 train = Trainer(vocab, dim, y) train.load(self.model_path + '/train') labellist = [] ## ラベル一覧(出力) #msglist = [] ## メッセージ一覧(無視リスト作成のため使用メッセージを保存) analizeRnnInputList = [] analizeRnnLabelList = [] msgidlist = [] for msgtxt in inmsg: ## wakachiで処理するため明示的にunicodeにする ## ex) cmapeerdが停止したためプロセスを再起動します #w = u'' + msgtxt #words = dict.wakachi(w.encode('utf-8')) words = dict.wakachi(msgtxt) ids = dict.words2ids(words) ## 改行のみはスキップ if ids == []: continue ## ex) [113, 63, 11, 9, 13, 104, 6, 7, 66, 8, 9, 10] y = train.predict(ids[::-1]) wordid = y.data.argmax(1)[0] ## ex) 84 ## エラーリストにあるメッセージ以外は処理にしない if wordid in acceptid: msgidlist.append(wordid) else: print 'not accept id:%d, msg:%s' % (wordid, msgtxt) continue print '** msg prdict %d, %2f' % (wordid, y.data[0][wordid] ) ## 選択されたidの確率表示 pp(inmsg) pp(msgidlist) ## 対象メッセージが無い if msgidlist == []: result = [{'score': 100, 'id': None, 'label': None}] return result ## エラーケースの予測 config = ConfigParser.SafeConfigParser() config.read(configFile) dim_in = config.getint('analize', 'dim_in') dim_mid = config.getint('analize', 'dim_mid') dim_out = config.getint('analize', 'dim_out') """ train = AnazlizeTrainer(dim_in, dim_mid, dim_out) train.load(self.model_path + '/train_analize') y = train.predict(msgidlist[::-1]) print y.data.argmax(1)[0] rank = y.data.argsort()[0] uprank = map(int, rank[::-1]) print uprank #print y.data[0] """ train = AnazlizeTrainer(dim_in, dim_mid, dim_out) train.load(self.model_path + '/train_analize') #y = train.predict(msgidlist[::-1]) targetlist = msgidlist resultlist = np.zeros(dim_out) ## 確率結果の最大値を格納する配列 print resultlist for i in range(len(targetlist)): target = targetlist[i:] y = train.predict(target[::-1]) print target print y.data[0] for i in range(len(y.data[0])): if y.data[0][i] > resultlist[i]: resultlist[i] = y.data[0][i] print resultlist #print y.data.argmax(1)[0] #rank = y.data.argsort()[0] rank = resultlist.argsort() uprank = map(int, rank[::-1]) print uprank #print y.data[0] result = [] for i in uprank: print '%d, %2f' % (i, resultlist[i]) item = { 'score': round(float(resultlist[i]) * 100, 2), 'id': i, 'label': errjson[i]['label'] } result.append(item) return result
# -*- encoding: utf-8 -*- import re from prettyprint import pp from knock20 import load_json_file, get_record if __name__ == "__main__": data = load_json_file("jawiki-country.json") text = get_record(data, "イギリス") r = re.compile("\[\[Category:(.+?)\]") m = r.findall(text) pp(m)
csrfToken, session) csrfToken = mock.getCsrfValue(resp.text) resp = mock.addTaskParent(theTask['phid'], parent, csrfToken, session) transactions = [] try: if None != points and len(points.strip()) > 0: transaction = {'type': 'points', 'value': str(points)} transactions.append(transaction) except Exception, e: pass # if None != parent: # transaction = {'type': 'parent', 'value': parent} # transactions.append(transaction) if len(transactions) > 0: print 'transactions:' pp(transactions) fab.maniphest.edit(transactions=transactions, objectIdentifier=theTask['objectName']) for subTask in subTasks: subTask['parent'] = 'T%s' % (taskId, ) newTask(fab, **subTask) return theTask def updateTask(fab, **args): # 获取 tid tid = args.pop('tid') if None == tid: return title = args.pop('task')
if (len(splited) < 2): break factor = splited[1].split(",") sentence = { 'surface': splited[0], 'base': factor[6], 'pos': factor[0], 'pos1': factor[1] } sentences.append(sentence) if factor[1] == "句点": yield sentences sentences = [] if __name__ == "__main__": np = set() np_test = [] for cols in make_list(): for i in range(1, len(cols) - 1): if cols[i]["surface"] == "の" \ and cols[i - 1]["pos"] == "名詞" \ and cols[i + 1]["pos"] == "名詞": np_test.append(cols[i - 1]["surface"] + cols[i]["surface"] + cols[i + 1]["surface"]) np.add(cols[i - 1]["surface"] + cols[i]["surface"] + cols[i + 1]["surface"]) pp(sorted(np, key=np_test.index))
pydir = os.path.dirname(os.path.abspath(__file__)) basedir = os.path.dirname(pydir) confdir = os.path.join(basedir,"config") tpldir = os.path.join(basedir,"tpl") cfg_defaults = {} hosts = {} filename = "virt-install-cmd.xen.hjson" in_file = os.path.join(tpldir,filename) #print "read " + in_file with open(in_file,'r') as f: dict = hjson.load(f) for k in dict['cfg'].keys(): #print "key=%s\n" % k pp(dict['cfg'][k]) # cfg['initial-cluster-string'] = getCoreosInitialClusterString() # cfg['install-img-path'] = getInstallImgPath() # cfg['install-img-format'] = cfg['disks']['disk0']['img-format'] # cfg['install-bridge'] = cfg['nics']['nic0']['bridge'] # cfg['install-mac'] = cfg['nics']['nic0']['mac']
csrfToken, session) csrfToken = mock.getCsrfValue(resp.text) resp = mock.addTaskParent(theTask['phid'], parent, csrfToken, session) transactions = [] try: if None != points and len(points.strip()) > 0: transaction = {'type':'points', 'value':str(points)} transactions.append(transaction) except Exception, e: pass # if None != parent: # transaction = {'type': 'parent', 'value': parent} # transactions.append(transaction) if len(transactions) > 0: print 'transactions:' pp(transactions) fab.maniphest.edit( transactions = transactions, objectIdentifier = theTask['objectName']) for subTask in subTasks: subTask['parent'] = 'T%s' % (taskId,) newTask(fab, **subTask) return theTask def updateTask(fab, **args): # 获取 tid tid = args.pop('tid') if None == tid: return title = args.pop('task')