def lb(): rr_dict ={} rr_set = set() kl = util.get_key_list() for key in kl: print(key) wl = util.get_list_from_file('D:\文档\项目\数据采集文档\词频分布//r//' + key +'r.txt') i = 0 r_set = set() while i < 8000 and i < len(wl): wwl = wl[i].split('\t') num = rr_dict.get(wwl[0]) r_set.add(wwl[0]) if num is None: rr_dict[wwl[0]] = wwl[1] else: rr_dict[wwl[0]] = int(num) + int(wwl[1]) i += 1 if len(rr_set) == 0: rr_set = rr_set | r_set else: rr_set = rr_set & r_set kl_list = [] srl = [] for ww in rr_set: num = rr_dict.get(ww) kl_list.append({'word': ww, 'num': num}) kl_list.sort(key=lambda obj: obj.get('num'), reverse=True) for rrl in kl_list: srl.append(rrl['word'] + '\t' + str(rrl['num'])) util.save_file('D:\文档\项目\数据采集文档\词频分布//' + 'result' + 'r.txt', srl)
def move_files(base_dir, instance, domain, target_dir, use_vanilla): """ Moves the domain and instance description files plus additional data files to the translation directory """ definition_dir = target_dir + '/definition' data_dir = target_dir + '/data' # Copy the domain and instance file to the subfolder "definition" on the destination dir util.mkdirp(definition_dir) shutil.copy(instance, definition_dir) shutil.copy(domain, definition_dir) is_external_defined = os.path.isfile(base_dir + '/external.hxx') if is_external_defined and use_vanilla: raise RuntimeError("An external definitions file was found at '{}', but the runner script determined" "that no external files were needed. Something is wrong.") if not use_vanilla: # The ad-hoc external definitions file - if it does not exist, we use the default. if is_external_defined: shutil.copy(base_dir + '/external.hxx', target_dir) if os.path.isfile(base_dir + '/external.cxx'): # We also copy a possible cxx implementation file shutil.copy(base_dir + '/external.cxx', target_dir) else: default = tplManager.get('external_default.hxx').substitute() # No substitutions for the default template util.save_file(target_dir + '/external.hxx', default) # Copy, if they exist, all data files origin_data_dir = base_dir + '/data' if os.path.isdir(origin_data_dir): for filename in glob.glob(os.path.join(origin_data_dir, '*')): if os.path.isfile(filename): shutil.copy(filename, data_dir)
def get_oracle(url): source = util.get_source(url) html = lxml.html.document_fromstring(source) html.make_links_absolute(url, resolve_base_href=True) util.save_file(lxml.html.tostring(html), 'oracle.html') util.screenshot('oracle.html', 'oracle.png') return html
def evaluate(MLResult, labelpath, resultArray, timeIndex, threshold): """ evalute the results return F score of prediction and truth. """ groundTruthArray = [] TPArray = [] num = 0 for fileName in read_file(MLResult.system_path("mid_summary"), "TestFileNameList.txt"): with open(labelpath + fileName, "r") as f: fline = f.readlines() for line in fline: count = line.strip("\n") if int(count) == 1 and num in timeIndex: groundTruthArray.append(num) num += 1 TP = 0 for i in resultArray: if i in groundTruthArray: TP += 1 TPArray.append(i) FP = len(resultArray) - TP FN = len(groundTruthArray) - TP Precision = TP / (float(TP + FP)) if TP + FP != 0 else 1 Recall = TP / (float(TP + FN)) if TP + FN != 0 else 1 F = 0 if Recall + Precision == 0 else (2 * Recall * Precision) / (Recall + Precision) save_file(MLResult.system_path("result_summary"), "PRF.txt", [threshold, F, Precision, Recall, TP, FP, FN, "\n"], ",", "a") return F
def download_price(theater_id,movie_id): # http://bj.nuomi.com/pcindex/main/timetable?cinemaid=1c2e250a3e9691059ee32187&mid=9762&needMovieInfo=0&tploption=5&_=1448004690864#j-movie-list1 url = "http://bj.nuomi.com/pcindex/main/timetable?cinemaid=%s&mid=%s&needMovieInfo=0"%(theater_id,movie_id) lfile = '%snuomi/price_%s_%s.html' % (download_dir,theater_id,movie_id) respHtml = browser.downad_and_save(url,lfile) soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset) li = [] dom_divs = soup.findAll('div',attrs = {'class': 'list'}) for day,div in enumerate(dom_divs): trs = div.findAll('tr') rows = [] for tr in trs: tds = tr.findAll('td') if not tds: continue p = tds[3].find('span') pp = p.contents[0].split(';')[-1] order_url = completeInnerUrl("http://bj.nuomi.com/", tds[4].find('a')['href']) li.append(','.join([str(day),theater_id,movie_id,tds[0].contents[0].strip(),tds[1].contents[0],pp.strip(),order_url])) csv_file = '%snuomi/price_%s_%s.csv' % (download_dir,theater_id,movie_id) save_file(csv_file,li)
def seed_html_fault(html, elements, prop, value): while elements: e = random.choice(elements) elements.remove(e) original_style = None if prop in e.attrib: original_value = e.attrib[prop] e.attrib[prop] = value util.save_file(lxml.html.tostring(html, doctype=html.doctype), 'test.html') util.screenshot('test.html', 'test.png') e.attrib[prop] = original_value else: e.attrib[prop] = value util.save_file(lxml.html.tostring(html, doctype=html.doctype), 'test.html') util.screenshot('test.html', 'test.png') del e.attrib[prop] if not filecmp.cmp('oracle.png', 'test.png'): xpath = get_xpath(e) default_value = util.get_default_value('oracle.html', xpath, prop, css=False) return (xpath, default_value, value) break return (None, None, None)
def timbra_xml(xml): log.info('Enviando a timbrar...') id_timbrado = util.get_epoch() ok, data = util.timbra_xml(atributos['emisor']['rfc'], xml, id_timbrado) if ok: name = '{}/{}.xml'.format(PATH['TIMBRADAS'], data['UUID']) util.save_file(name, data['XML']) log.info('Factura timbrada correctamente: {}'.format(name)) return
def sim_user(self): simi_users = {} for i in self.train_data.keys(): simi_users.setdefault(i, []) for user in self.train_data: scores = self.calulate_similarity(self.train_data ,user) # using person as the key for its scores simi_users[user] = scores util.save_file(simi_users, save_file) self.simi_data = simi_users
def test(): stream_test = StreamParser() stream_test.open_mic() import time time.sleep(5) stream_test.close_mic() time.sleep(5) import util data = "".join(stream_test.cache_frames) util.save_file(data) stream_test.play_stream(data)
def start(self): try: logger = open('log', 'a') for attachId in range(self.initId + 1, self.endId): attachment = get_attach(self.rootUrl + str(attachId)) if attachment: save_file(attachment) logging(logger, attachId) finally: if logger: logger.close()
def sim_item(self): simi_items = {} re_train_data = self.switch_key() # print(self.train_data) for i in re_train_data.keys(): simi_items[i] = [] for user in re_train_data: scores = self.calulate_similarity(re_train_data, user) simi_items[user] = scores util.save_file(simi_items, save_file) self.simi_data = simi_items
def change_property(htmlfile, xpath, prop, value): html = lxml.html.parse(htmlfile) elements = html.xpath(xpath) for e in elements: if 'style' in e.attrib: original_style = e.attrib['style'] e.attrib['style'] += ';{0}:{1};'.format(prop, value) else: e.attrib['style'] = '{0}:{1};'.format(prop, value) util.save_file(lxml.html.tostring(html), 'temp.html') util.screenshot('temp.html', 'temp.png')
def change_property(htmlfile, xpath, prop, value): html = lxml.html.parse(htmlfile) elements = html.xpath(xpath) for e in elements: if "style" in e.attrib: original_style = e.attrib["style"] e.attrib["style"] += ";{0}:{1};".format(prop, value) else: e.attrib["style"] = "{0}:{1};".format(prop, value) util.save_file(lxml.html.tostring(html), "temp.html") util.screenshot("temp.html", "temp.png")
def download(self, id, path, name): """ Download a file with it's id == <id> and stores it to path/name """ request = self.service.files().get_media(fileId=id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Downloading: {}% ...".format(int(status.progress() * 100))) util.save_file(fh, path, name)
def build(self): print '[debug]: Starting [building] function' #database = zip(self.get_data()[0], self.get_video(), self.get_data()[1], self.get_desc(),self.get_mobile()) database = { "Episodes": [{ "title": a, "url": b, "thumb": c, "mobile": d } for a, b, c, d in zip(self.get_data()[0], self.get_video(), self.get_data()[1], self.get_mobile())] } save_file(database, 'data/Season_23.json')
def la(): kl = ['吐槽', '淡定', '自拍'] for key in kl: rl = [] srl = [] wl = util.get_list_from_file('D:\文档\项目\数据采集文档\词频分布//' + key +'.txt') for w in wl: wwl = w.split('\t') if len(wwl[0]) > 1 and int(wwl[1]) > 10: rl.append({'word':wwl[0],'num':int(wwl[1])}) rl.sort(key=lambda obj: obj.get('num'), reverse=True) for rrl in rl: srl.append(rrl['word']+'\t'+str(rrl['num'])) util.save_file('D:\文档\项目\数据采集文档\词频分布//' + key +'r.txt',srl)
def process(url, ads_xpath): test_dir = urlparse.urlparse(url).hostname if os.path.isdir(test_dir): shutil.rmtree(test_dir) os.mkdir(test_dir) if os.path.isfile(test_dir + 'description.txt'): os.remove(test_dir + 'description.txt') oracle_html = get_oracle(url) for xpath in ads_xpath: oracle_html = hide_advertisement(oracle_html, xpath) util.save_file(lxml.html.tostring(oracle_html, doctype=oracle_html.doctype), 'oracle.html') util.screenshot('oracle.html', 'oracle.png') total = 0 total = process_css(test_dir, oracle_html, total) total = process_html(test_dir, oracle_html, total)
def record_file(self, filename, seconds=10, format=pyaudio.paInt16, \ channels=1, rate=16000, buffer=None): """ 录制音频到指定文件 """ if buffer is None: buffer = self.chunk print("开始录制文件:\n" "filename = %s\n" "record time = %s\n" "format = %s\n" "channels = %s\n" "rate = %s\n" "buffer = %s\n") % (filename, seconds, format, channels, rate, buffer) if filename and os.path.isfile(filename): print "文件名%s 已经存在" % filename return 1 stream = self.audio.open(format=format, channels=channels, rate=rate, input=True, frames_per_buffer=buffer) frames = [] for _num in range(int(rate * 1.0 / buffer * seconds) + 3): data = stream.read(buffer) frames.append(data) stream.stop_stream() stream.close() return save_file(b''.join(frames), filename, self.audio.get_sample_size(format), channels, rate)
def record_file(self, filename, seconds=10, format=pyaudio.paInt16, \ channels=1, rate=16000, buffer=None): """ 录制音频到指定文件 """ if buffer is None: buffer = self.chunk print ("开始录制文件:\n" "filename = %s\n" "record time = %s\n" "format = %s\n" "channels = %s\n" "rate = %s\n" "buffer = %s\n") % (filename, seconds, format, channels, rate, buffer) if filename and os.path.isfile(filename): print "文件名%s 已经存在" % filename return 1 stream = self.audio.open(format=format, channels=channels, rate=rate, input=True, frames_per_buffer=buffer) frames = [] for _num in range(int(rate*1.0 / buffer * seconds)+3): data = stream.read(buffer) frames.append(data) stream.stop_stream() stream.close() return save_file(b''.join(frames), filename, self.audio.get_sample_size(format), channels, rate)
def app(): st.markdown(""" # Create Strategy Here you can create your strategy! You will need to create a function called `strategy` that takes in a dictionary named `bot_positions` (e.g. `{"Bot1": 1, "Bot2": 3}`) as argument and should return either *"walk"* or *"sabotage"*. See the ***Example page*** for examples. You can either: """) with st.beta_expander("Write Code Directly"): bot_name = st.text_input(label="Bot Name") strategy_code = st.text_area( label="Strategy Code: ", value=inspect.cleandoc(""" import random def strategy(bot_positions): return random.choice(["walk", "sabotage"]) """), height=320, ) if st.button("Submit"): if bot_name: fp = util.save_code_to_file(code=strategy_code, filename=bot_name) util.validate_file(fp) st.success("File uploaded and validated successfully, " "go to `Race Page` to run the Game") else: st.error("Please provide a name for the Bot") with st.beta_expander("Upload a file"): file_buffer = st.file_uploader( "Upload a strategy file (.py)", help="The filename will be used to name the Bot", ) if file_buffer: fp = util.save_file(filename=file_buffer.name, filebytes=file_buffer.getvalue()) util.validate_file(fp) st.success("File uploaded and validated successfully, " "go to `Race Page` to run the Game") st.markdown(f"## Current Competitors:") competitors = util.build_all_bots() if competitors: st.markdown("\n".join([f"\t- {c}" for c in competitors])) else: st.markdown("no competitors saved yet") if st.button("Add example bots"): util.add_example_bots() st.experimental_rerun()
def process(url, ads_xpath): test_dir = urlparse.urlparse(url).hostname if os.path.isdir(test_dir): shutil.rmtree(test_dir) os.mkdir(test_dir) if os.path.isfile(test_dir + 'description.txt'): os.remove(test_dir + 'description.txt') oracle_html = get_oracle(url) for xpath in ads_xpath: oracle_html = hide_advertisement(oracle_html, xpath) util.save_file( lxml.html.tostring(oracle_html, doctype=oracle_html.doctype), 'oracle.html') util.screenshot('oracle.html', 'oracle.png') total = 0 total = process_css(test_dir, oracle_html, total) total = process_html(test_dir, oracle_html, total)
def download_movie_playing(): url = "http://bj.nuomi.com/movie/" lfile = '%smovies_playing.html' % (root_dir) respHtml = browser.downad_and_save(url,lfile) soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset) li = [] # <div class="section-item clearfix no-top-border"> dom_movies = soup.find('div',attrs = {'class': 'section-item clearfix no-top-border'}) dom_a = dom_movies.findAll('a') for m in dom_a: # dom_a = m.find('a') uid = m['href'].split('/')[-1] uri = completeInnerUrl("http://bj.nuomi.com/",m['href']) name = m.contents[0] li.append("Movie,%s,%s,%s"%(uid,uri,name)) csv_file = '%snuomi/movie_result.csv' % (download_dir) save_file(csv_file,li)
def download_mtheater_list(page_index=1,proxy=None): url = "http://t.dianping.com/movie/beijing/tab_cinema?pageno=%d" % (page_index) lfile = '%smtheater_list_%d.html' % (root_dir,page_index) respHtml = browser.downad_and_save(url,lfile,proxy) soup = BeautifulSoup(respHtml, fromEncoding=htmlCharset) li = [] # <div class="index-cinema-list"> # <li class="item Fix"> dom_theaterlist = soup.find('div',attrs = {'class': 'index-cinema-list'}) dom_mtheaters = dom_theaterlist.findAll('li',attrs = {'class': 'item Fix'}) for mt in dom_mtheaters: dom_a = mt.findAll('a')[1] uid = dom_a['href'].split('/')[-1] uri = completeInnerUrl("http://t.dianping.com/",dom_a['href']) name = dom_a.contents[0] li.append("MovieTheater,北京,%s,%s,%s"%(uid,uri,name)) csv_file = '%smtheater_result_%d.csv' % (root_dir,page_index) save_file(csv_file,li)
def seed_css_fault(html, elements, prop, value): while elements: e = random.choice(elements) elements.remove(e) original_style = None if 'style' in e.attrib: original_style = e.attrib['style'] e.attrib['style'] += ';{0}:{1};'.format(prop, value) else: e.attrib['style'] = '{0}:{1};'.format(prop, value) util.save_file(lxml.html.tostring(html, doctype=html.doctype), 'test.html') util.screenshot('test.html', 'test.png') if original_style is not None: e.attrib['style'] = original_style else: del e.attrib['style'] if not filecmp.cmp('oracle.png', 'test.png'): xpath = get_xpath(e) default_value = util.get_default_value('oracle.html', xpath, prop) return (xpath, default_value, value) break return (None, None, None)
def seed_error(html, elements, prop, value): while elements: e = random.choice(elements) elements.remove(e) #print 'Trying ', e original_style = None if 'style' in e.attrib: original_style = e.attrib['style'] e.attrib['style'] += ';{0}:{1};'.format(prop, value) else: e.attrib['style'] = '{0}:{1};'.format(prop, value) util.save_file(lxml.html.tostring(html), 'test.html') util.screenshot('test.html', 'test.png') if original_style: e.attrib['style'] = original_style else: del e.attrib['style'] if not filecmp.cmp('oracle.png', 'test.png'): xpath = get_xpath(e) default_value = util.get_default_value('oracle.html', xpath, prop) return (xpath, default_value, value) break return (None, None, None)
def move_files(base_dir, instance, domain, target_dir, use_vanilla): """ Moves the domain and instance description files plus additional data files to the translation directory """ definition_dir = target_dir + '/definition' data_dir = target_dir + '/data' # Copy the domain and instance file to the subfolder "definition" on the destination dir util.mkdirp(definition_dir) shutil.copy(instance, definition_dir) shutil.copy(domain, definition_dir) is_external_defined = os.path.isfile(base_dir + '/external.hxx') if is_external_defined and use_vanilla: raise RuntimeError( "An external definitions file was found at '{}', but the runner script determined" "that no external files were needed. Something is wrong.") if not use_vanilla: # The ad-hoc external definitions file - if it does not exist, we use the default. if is_external_defined: shutil.copy(base_dir + '/external.hxx', target_dir) if os.path.isfile( base_dir + '/external.cxx' ): # We also copy a possible cxx implementation file shutil.copy(base_dir + '/external.cxx', target_dir) else: default = tplManager.get('external_default.hxx').substitute( ) # No substitutions for the default template util.save_file(target_dir + '/external.hxx', default) # Copy, if they exist, all data files origin_data_dir = base_dir + '/data' if os.path.isdir(origin_data_dir): for filename in glob.glob(os.path.join(origin_data_dir, '*')): if os.path.isfile(filename): shutil.copy(filename, data_dir)
def train_test_videos(path_train_violence, path_test_violence, path_train_raw_nonviolence, path_train_new_nonviolence, path_test_raw_nonviolence, path_test_new_nonviolence, proportion_norm_videos, min_num_frames): """ load train-test split from original dataset """ train_names = [] train_labels = [] test_names = [] test_labels = [] train_bbox_files = [] test_bbox_files = [] train_names_violence = util.read_file(path_train_violence) train_names_new_nonviolence = util.read_file(path_train_new_nonviolence) train_names_raw_nonviolence = util.read_file(path_train_raw_nonviolence) test_names_violence = util.read_file(path_test_violence) test_names_new_nonviolence = util.read_file(path_test_new_nonviolence) test_names_raw_nonviolence = util.read_file(path_test_raw_nonviolence) ##Remove normal videos of short duration train_names_new_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, train_names_new_nonviolence, min_num_frames) train_names_raw_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, train_names_raw_nonviolence, min_num_frames) test_names_new_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, test_names_new_nonviolence, min_num_frames) test_names_raw_nonviolence = remove_short_videos( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, test_names_raw_nonviolence, min_num_frames) new_split = False ### Train # print('Train names: ', len(train_names_violence)) for tr_name in train_names_violence: train_names.append( os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_VIOLENCE, tr_name)) train_labels.append(1) video_name = re.findall(r'[\w\.-]+-', tr_name)[0][:-1] train_bbox_files.append( os.path.join(constants.PATH_VIOLENCECRIME2LOCAL_BBOX_ANNOTATIONS, video_name + '.txt')) ##ramdom normal samples negative_samples = [] if not os.path.exists(constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT): print('Creating Random Normal Train examples file...') num_new_samples = int(len(train_names) * proportion_norm_videos) train_names_new_nonviolence = random.choices( train_names_new_nonviolence, k=num_new_samples) train_names_raw_nonviolence = random.choices( train_names_raw_nonviolence, k=len(train_names) - num_new_samples) if len(train_names_new_nonviolence) == 0: print('Using only raw non violence videos...') for neagtive_name in train_names_new_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name)) # train_names.append(os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name)) # train_bbox_files.append(None) for neagtive_name in train_names_raw_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name)) util.save_file(negative_samples, constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT) # train_names.append(os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name)) # train_bbox_files.append(None) new_split = True else: negative_samples = util.read_file( constants.PATH_FINAL_RANDOM_NONVIOLENCE_TRAIN_SPLIT) for sample in negative_samples: train_names.append(sample) train_bbox_files.append(None) negative_labels = [0 for i in range(len(negative_samples))] train_labels.extend(negative_labels) NumFrames_train = [ len(glob.glob1(train_names[i], "*.jpg")) for i in range(len(train_names)) ] ### Test for ts_name in test_names_violence: test_names.append( os.path.join(constants.PATH_UCFCRIME2LOCAL_FRAMES_VIOLENCE, ts_name)) test_labels.append(1) video_name = re.findall(r'[\w\.-]+-', ts_name)[0][:-1] test_bbox_files.append( os.path.join(constants.PATH_VIOLENCECRIME2LOCAL_BBOX_ANNOTATIONS, video_name + '.txt')) negative_samples = [] if not os.path.exists(constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT): print('Creating Random Normal Test examples file...') num_samples = int(len(test_names) * proportion_norm_videos) test_names_new_nonviolence = random.choices(test_names_new_nonviolence, k=num_samples) test_names_raw_nonviolence = random.choices(test_names_raw_nonviolence, k=len(test_names) - num_new_samples) for neagtive_name in test_names_new_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_NEW_NONVIOLENCE, neagtive_name)) for neagtive_name in test_names_raw_nonviolence: negative_samples.append( os.path.join( constants.PATH_UCFCRIME2LOCAL_FRAMES_RAW_NONVIOLENCE, neagtive_name)) util.save_file(negative_samples, constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT) new_split = True else: negative_samples = util.read_file( constants.PATH_FINAL_RANDOM_NONVIOLENCE_TEST_SPLIT) for sample in negative_samples: test_names.append(sample) test_bbox_files.append(None) negative_labels = [0 for i in range(len(negative_samples))] test_labels.extend(negative_labels) NumFrames_test = [ len(glob.glob1(test_names[i], "*.jpg")) for i in range(len(test_names)) ] print('Train Split: ', len(train_names), len(train_labels), len(NumFrames_train), len(train_bbox_files), ', Test Split: ', len(test_names), len(test_labels), len(NumFrames_test), len(test_bbox_files)) data = { 'train_names': train_names, 'train_labels': train_labels, 'NumFrames_train': NumFrames_train, 'train_bbox_files': train_bbox_files, 'test_names': test_names, 'test_labels': test_labels, 'NumFrames_test': NumFrames_test, 'test_bbox_files': test_bbox_files } return data, new_split
wg = nx.Graph() pg = nx.Graph() ii = 0 time_cost = [] # time_cost1 = [] print(time.strftime('%H:%M:%S', time.localtime(time.time()))) for sentence in s_list: # 创建整句话的网络 ll = util.input_filer(sentence) start = time.clock() wg = add_s2g(wg, ' '.join(ll)) end = time.clock() tc = str(end - start) ii += 1 if ii == 50: ii = 0 print(tc) time_cost.append(tc) # time_cost1.append(tc1) # 只创建关键词所在的分句的网络 # for ss in ll: # if ('淡定' in ss): # pg = add_s2g(pg, ss) print(time.strftime('%H:%M:%S', time.localtime(time.time()))) util.save_file('D:\semantic analysis\c_date/zw_record1.txt', time_cost)
def down_music(self, url, song_name, singer, suffix): response = self.session.get(url) util.save_file(response, 'files/' + song_name + '-' + singer + suffix)
def mplayer(data): import os import time filename = save_file(data) os.system("mplayer %s" % filename) time.sleep(1)
import time time.sleep(5) stream_test.close_mic() time.sleep(5) import util data = "".join(stream_test.cache_frames) util.save_file(data) stream_test.play_stream(data) def main(): stream_test = StreamParser() stream_test.open_mic() import time while 1: try: time.sleep(5) except KeyboardInterrupt, e: #time.sleep(5) stream_test.close_mic() break import util data = "".join(stream_test.cache_frames) util.save_file(data) stream_test.play_stream(data) if __name__ == "__main__": main()
import util data = "".join(stream_test.cache_frames) util.save_file(data) stream_test.play_stream(data) def main(): stream_test = StreamParser() stream_test.open_mic() import time while 1: try: time.sleep(5) except KeyboardInterrupt, e: #time.sleep(5) stream_test.close_mic() break import util data = "".join(stream_test.cache_frames) util.save_file(data) stream_test.play_stream(data) if __name__ == "__main__": main()
def main(): # parse the arguments arg_parser = ArgumentParser() spt.register_config_arguments(config, arg_parser, title='Model options') spt.register_config_arguments(spt.settings, arg_parser, prefix='tfsnippet', title='TFSnippet options') arg_parser.parse_args(sys.argv[1:]) # print the config print_with_title('Configurations', pformat(config.to_dict()), after='\n') # open the result object and prepare for result directories model_file = config.result_dir + "/" + os.path.basename(__file__).split(".py")[0] + "_" + \ str(config.noExp) + ".model" dirName = os.path.basename(__file__).split(".py")[0] + "_" + str( config.noExp) results = MLResults(os.path.join(config.result_dir, dirName)) results.save_config(config) # save experiment settings results.make_dirs('train_summary', exist_ok=True) results.make_dirs('result_summary', exist_ok=True) results.make_dirs('mid_summary', exist_ok=True) # os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_number # input placeholders input_x = tf.placeholder(dtype=tf.float32, shape=(None, ) + config.x_shape, name='input_x') learning_rate = spt.AnnealingVariable('learning_rate', config.initial_lr, config.lr_anneal_factor, min_value=1e-6) multi_gpu = MultiGPU(disable_prebuild=True) # multi_gpu = MultiGPU() # derive the training operation gradses = [] grad_vars = [] train_losses = [] BATCH_SIZE = get_batch_size(input_x) for dev, pre_build, [dev_input_x ] in multi_gpu.data_parallel(BATCH_SIZE, [input_x]): with tf.device(dev), multi_gpu.maybe_name_scope(dev): # derive the loss for initializing with tf.name_scope('initialization'), \ arg_scope([p_net, q_net], is_initializing=True), \ spt.utils.scoped_set_config(spt.settings, auto_histogram=False): init_q_net = q_net(dev_input_x, n_z=config.train_n_samples) init_chain = init_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) init_loss = tf.reduce_mean(init_chain.vi.training.vimco()) # derive the loss and lower-bound for training with tf.name_scope('training'), \ arg_scope([p_net, q_net], is_training=True): train_q_net = q_net(dev_input_x, n_z=config.train_n_samples) train_chain = train_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) train_loss = (tf.reduce_mean(train_chain.vi.training.vimco()) + tf.losses.get_regularization_loss()) train_losses.append(train_loss) # derive the logits output for testing with tf.name_scope('testing'): test_q_net = q_net(dev_input_x, n_z=config.test_n_z) test_chain = test_q_net.chain(p_net, latent_axis=0, observed={'x': dev_input_x}) # log_prob of X and each univariate time series of X log_prob = tf.reduce_mean( test_chain.model['x'].distribution.log_prob(dev_input_x), 0) log_prob_per_element = tf.reduce_sum(log_prob) log_prob_per_element_univariate_TS = tf.reduce_sum( log_prob, [0, 1, 3]) log_prob_per_element_univariate_TS_All = tf.reduce_sum( log_prob, [1, 3]) # derive the optimizer with tf.name_scope('optimizing'): params = tf.trainable_variables() optimizer = tf.train.AdamOptimizer(learning_rate) grads = optimizer.compute_gradients(train_loss, params) for grad, var in grads: if grad is not None and var is not None: if config.grad_clip_norm: grad = tf.clip_by_norm(grad, config.grad_clip_norm) if config.check_numerics: grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) grad_vars.append((grad, var)) gradses.append(grad_vars) # merge multi-gpu outputs and operations [train_loss] = multi_gpu.average([train_losses], BATCH_SIZE) train_op = multi_gpu.apply_grads(grads=multi_gpu.average_grads(gradses), optimizer=optimizer, control_inputs=tf.get_collection( tf.GraphKeys.UPDATE_OPS)) # sort the contribution of each univariate_TS of input SORT_UNIVARIATE_TS_INPUT = tf.placeholder(dtype=tf.float32, shape=(None, None), name='SORT_UNIVARIATE_TS_INPUT') SORT_UNIVARIATE_TS = tf.nn.top_k(SORT_UNIVARIATE_TS_INPUT, k=config.metricNumber).indices + 1 # load the training and testing data print("=" * 10 + "Shape of Input data" + "=" * 10) x, time_indexs, x_test, time_indexs2 = load_matrix_allData( config.dataReadformat, config.datapathForTrain, config.datapathForTest, config.timeLength, config.metricNumber, "TrainFileNameList.txt", "TestFileNameList.txt", results, config.norm) x_test = x_test.reshape([-1, config.timeLength, config.metricNumber, 1]) print("Test:", x_test.shape) if config.batchTest: test_flow = DataFlow.arrays( [x_test], config.test_batch_size) # DataFlow is iterator del x_test x_train, x_val = split_numpy_array(x, portion=config.VALID_PORTION) x_train = x_train.reshape([-1, config.timeLength, config.metricNumber, 1]) x_val = x_val.reshape([-1, config.timeLength, config.metricNumber, 1]) train_flow = DataFlow.arrays([x_train], config.batch_size, shuffle=False, skip_incomplete=True) val_flow = DataFlow.arrays([x_val], config.test_batch_size) print("Note:", config.x_dim, ", x_dim = size of datapoint = timeLength * metricNumber") print("Input data shape:", x.shape, "Train data shape:", x_train.shape, "Validation data shape:", x_val.shape) del x_train, x_val, x # training part with spt.utils.create_session().as_default() as session: spt.utils.ensure_variables_initialized() saver = CheckpointSaver(tf.trainable_variables(), model_file) if os.path.exists(model_file): # load the parameters of trained model saver.restore_latest() else: # initialize the network while True: breakFlag = 0 for [x] in train_flow: INITLOSS = session.run(init_loss, feed_dict={input_x: x}) print('Network initialized, first-batch loss is {:.6g}.'. format(INITLOSS)) if np.isnan(INITLOSS) or np.isinf( INITLOSS) or INITLOSS > 10**5: pass else: breakFlag = 1 break if breakFlag: break # train the network with train_flow.threaded(10) as train_flow: with spt.TrainLoop( params, var_groups=['q_net', 'p_net'], max_epoch=config.max_epoch, max_step=config.max_step, summary_dir=(results.system_path('train_summary') if config.write_summary else None), summary_graph=tf.get_default_graph(), early_stopping=True) as loop: trainer = spt.Trainer(loop, train_op, [input_x], train_flow, metrics={'loss': train_loss}, summaries=tf.summary.merge_all( spt.GraphKeys.AUTO_HISTOGRAM)) # anneal the learning rate trainer.anneal_after(learning_rate, epochs=config.lr_anneal_epoch_freq, steps=config.lr_anneal_step_freq) validator = spt.Validator( loop, train_loss, [input_x], val_flow, ) trainer.evaluate_after_epochs(validator, freq=10) trainer.log_after_epochs(freq=1) trainer.run() saver.save() # save the training infomation firWrite = True num = 0 time0 = time.time() for [x_train] in train_flow: if config.savetrainDS: # log prob of each metric of each instance log_prob_per_element_univariate_TS_list_item_Train = ( session.run(log_prob_per_element_univariate_TS_All, feed_dict={input_x: x_train})) log_prob_per_element_univariate_TS_list_Train = log_prob_per_element_univariate_TS_list_item_Train log_prob_per_element_list_Train = np.sum(np.array( log_prob_per_element_univariate_TS_list_item_Train), axis=1).tolist() if firWrite: save_file( results.system_path("train_summary"), "OutlierScores_metric.txt", log_prob_per_element_univariate_TS_list_Train) save_file(results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train) else: save_file( results.system_path("train_summary"), "OutlierScores_metric.txt", log_prob_per_element_univariate_TS_list_Train, "\n", "a") save_file(results.system_path("train_summary"), "OutlierScores.txt", log_prob_per_element_list_Train, "\n", "a") firWrite = False num += 1 if num % 1000 == 0: print( "-----Train %s >>>>>:Sum time of batch instances:%s" % (num, float(time.time() - time0) / float(num))) del train_flow, val_flow # online test time2 = time.time() log_prob_per_element_list, log_prob_per_element_univariate_TS_list = [], [] if config.batchTest: num = 0 for [x_test] in test_flow: if config.savetestDS: # log prob of each metric of each instance log_prob_per_element_univariate_TS_list_item = ( session.run(log_prob_per_element_univariate_TS_All, feed_dict={input_x: x_test})) log_prob_per_element_univariate_TS_list += log_prob_per_element_univariate_TS_list_item.tolist( ) log_prob_per_element_list += np.sum( np.array(log_prob_per_element_univariate_TS_list_item), axis=1).tolist() num += 1 if num % 200 == 0: print("-----Test %s >>>>>:Sum time of batch instances:%s" % (num, float(time.time() - time2) / float(num))) else: num = 1 for batch_x in x_test: if config.savetestTS: log_prob_per_element_list_item = (session.run( log_prob_per_element, feed_dict={input_x: [batch_x]})) log_prob_per_element_list.append( log_prob_per_element_list_item) if config.savetestDS: log_prob_per_element_univariate_TS_list_item = ( session.run(log_prob_per_element_univariate_TS, feed_dict={input_x: [batch_x]})) log_prob_per_element_univariate_TS_list.append( log_prob_per_element_univariate_TS_list_item) log_prob_per_element_list.append( sum(log_prob_per_element_univariate_TS_list_item)) if num % 200 == 0: print( "-----Test>>>>>:%d, average time of each instance:%s" % (num, float(time.time() - time2) / float(num))) num += 1 # get the lable file name and its line cnt number allLabelFileNameLineCntList = get_machineID(results, config.labelpath) print("No of OutlierScores for all dataPoint:(%s):" % len(log_prob_per_element_list)) if config.savetestDS: save_file( results.system_path("result_summary"), "OutlierScores_metric.txt", cat_List(allLabelFileNameLineCntList, log_prob_per_element_univariate_TS_list)) save_file( results.system_path("result_summary"), "OutlierScores.txt", cat_List(allLabelFileNameLineCntList, log_prob_per_element_list)) if config.evaluation: # Prepraration for the hitory two-metric results twoMetricScore = read_file(results.system_path("train_summary"), "OutlierScores_metric.txt") ave_twoMetricScore = np.mean(np.array(twoMetricScore), axis=0).tolist() save_file(results.system_path("result_summary"), "PRF.txt", ["Average score of each univariate time series", "\n"], ",") save_file(results.system_path("result_summary"), "PRF.txt", ave_twoMetricScore + ["\n"], ",", "a") save_file(results.system_path("result_summary"), "PRF.txt", [ "Threshold", "F", "Precision", "Recall", "TP", "FP", "FN", "\n" ], ",", "a") # get the sorted item each metric by change score twoMetricScoreList = cal_scoreChanges( log_prob_per_element_list, ave_twoMetricScore, log_prob_per_element_univariate_TS_list) MetricResult = session.run( SORT_UNIVARIATE_TS, feed_dict={SORT_UNIVARIATE_TS_INPUT: twoMetricScoreList}) save_file(results.system_path("result_summary"), "MetricResult.txt", cat_List(allLabelFileNameLineCntList, MetricResult)) # POT evalution POT_TH = pot_eval( read_file(results.system_path("train_summary"), "OutlierScores.txt", "float"), config.q, config.level) resultArray, outlierLabelfileNameLineCntList = cal_binaryResult( log_prob_per_element_list, POT_TH, time_indexs2, config.saveMetricInfo, allLabelFileNameLineCntList) evaluate(results, config.labelpath, resultArray, time_indexs2, POT_TH) # print the final metrics and close the results object print_with_title('Results', results.format_metrics(), before='\n') results.close() interpretation_hit_ratio(truth_filepath=config.interpret_filepath, prediction_filepath=os.path.join( config.result_dir, dirName, "result_summary", "MetricResult.txt"))
# 用于对新词添加权重 import util slist = util.get_list_from_file( 'D://semantic analysis//分词//词库//ok//result_dict.txt') rlist = [] for ll in slist: rlist.append(ll + ' 3') util.save_file('D://semantic analysis//分词//词库//ok//result_dict1.txt', rlist)
def count_word(it, w_dict): for node in it: num = w_dict.get(node) if num is None: w_dict[node] = 1 else: w_dict[node] = num + 1 return w_dict klist = ['正能量', '完爆', '扯淡', '达人', '接地气', '纠结', '吐槽', '淡定', '自拍'] txt_dir = 'D:\semantic analysis\c_date//' jieba.set_dictionary("D://fc\dict1.txt") jieba.initialize() for key in klist: r_dict = {} r_list = [] print(key) # 获取文件列表 f_list = util.get_file_list(txt_dir + key, '.txt') for file in f_list: w_list = util.get_list_from_file(txt_dir + key + '//' + file) rl = get_word_list(w_list) count_word(rl, r_dict) # 保存结果 for (k, v) in r_dict.items(): r_list.append(k + '\t' + str(v)) util.save_file('D://fc//result//' + key + '.txt', r_list)
def get_qrlogin_pic(self): heads = { } url = self._get_polling_url('ptqrshow') response = self.session.get(url) util.save_file(response, "login_qr.png")
# 过滤微博中的无用字符 def input_filer(temp): # 匹配url temp = re.sub("(分享自@\S+)", '', temp) temp = re.sub("(@\S+)", '', temp) # 将正则表达式编译成Pattern对象 pattern = re.compile(r'[\u4e00-\u9fa5]+') # 使用Pattern匹配文本,获得匹配结果,无法匹配时将返回None match = pattern.findall(temp) return ' '.join(match) rr_list = [] with open('2013-02-03.txt', 'r', encoding='utf-8') as r_file: r_list = r_file.readlines() for sentence in r_list: sentence = sentence.strip().rstrip('\n') if sentence is not None and sentence != '': rr_list.append(input_filer(sentence)) util.save_file('result.txt', rr_list) # jieba.load_userdict("D:\semantic analysis\分词\词库\导出结果\dict1.txt") # jieba.set_dictionary("D:\semantic analysis\分词\词库\导出结果\dict1.txt") # jieba.initialize() # seg_list = jieba.cut(test_str, cut_all=False) # print("Default Mode: " + "/ ".join(seg_list)) # 精确模式 # # print(" ".join(seg_list))