def _getFeeds(feeds, feed_list): """Collect feed data from request response.""" # If feeds exist feeds = feeds['feed'] if 'feed' in feeds else feeds if 'data' in feeds: for feed in feeds['data']: Log.info('Extracting feed data: ' + feed['id']) message = feed['message'] if 'message' in feed else '' link = feed['link'] if 'link' in feed else '' shares = _getShares(feed) comments_count = _getComments(feed) reactions_count = _getReactions(feed) feed_list.append( (feed['id'], message, link, shares, feed['created_time'], comments_count, reactions_count)) # Check if feed has next or not if 'paging' in feeds and 'next' in feeds['paging']: feeds_url = feeds['paging']['next'] feed_list = _getFeeds(_getRequest(feeds_url), feed_list) return feed_list
def rp5_download(location, beginDate, endDate, source, fileName=None, dirPath=None, extract=True): # Mandatory arguments to rp5Interface.rp5_download API : # location -> (string) Name of the place to download the weather data for. # beginDate -> (string) Start Date in format - dd.mm.yyyy # endDate -> (string) End Date in format - dd.mm.yyyy # source -> (string) Data source from rp5.in portal. either "metar" or "archive" # Optional arguments to rp5Interface.rp5_download API : # FileName -> (string) Name of file without any extension # dirPath -> (string) Full directory path where file is to be donwloaded. default - rp5_ddmmYYYY # extract -> (bool) Weather to extract the downloaded .xls.gz file. Default - True if fileName is not None: fileName = fileName + '.xls.gz' rp5 = RP5Interface(location=location, beginDate=beginDate, endDate=endDate, source=source, fileName=fileName, dirPath=dirPath) fullPath = rp5.download_date() if extract: fileFullPath = fullPath.rsplit('.', 1)[0] with gzip.open(fullPath, 'rb') as f_in: with open(fileFullPath, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) Log.info("Extracted file at - {}".format(fileFullPath))
def setTimeInterval(self, time_delta=1): """ Set time interval in days for data extraction. DEFAULT: 1 day """ self.time_delta = timedelta(days=time_delta) Log.info('Time delta: %i day(s)' % time_delta)
def _browser_exit(self, exit_status=0): if self._browser is not None: self._browser.close() if exit_status is not 0: Log.error(self._exit_msg) else: Log.info(self._exit_msg) sys.exit(exit_status)
def _browser_create(self): try: if self._browser is not None: Log.info("Closing Existing browser instance") self._browser.close() self._browser = webdriver.Firefox() Log.info("Creating browser instance ...") except: Log.fatal("Some error occured while creating browser instance!")
def init(self): """Start crawling.""" Log.info('Crawling initiated...') data = self._getTarget() df = pd.DataFrame(data) df.to_json('data/data.json') Log.info('Crawling finished!')
def _select_data_source(self): if self._browser is not None: if (self._rp5source.lower() == "metar"): Log.info("Getting METAR data") dataLinkElem = self._browser.find_element_by_id('metar_link') elif (self._rp5source.lower() == "archive"): Log.info("Getting ARCHIVE data") dataLinkElem = self._browser.find_element_by_id('archive_link') else: self._exit_msg = "Something went wrong, exiting!" self._browser_exit(1) dataLinkElem.click()
def run_listener(q): log = Log(__name__,level='INFO') log.info('Run AMQP listener until ctrl-c input\n {0}'.format(q)) def thread_func(worker,id): worker.start() def signal_handler(signum,stack): sys.exit(0) signal.signal(signal.SIGINT, signal_handler) worker = AMQPWorker(queue=q) task = WorkerThread(worker,'amqp') tasks = WorkerTasks(tasks=[task], func=thread_func) tasks.run() tasks.wait_for_completion(timeout_sec=-1)
def download_date(self): self._browser_create() self._search_loc() self._select_loc() DataSource.setSource(self._rp5source) self._rp5source = DataSource.getSource() self._select_data_source() if (self._rp5source.lower() == "metar"): downTabElem = self._browser.find_element_by_id('tabMetarDLoad') elif (self._rp5source.lower() == "archive"): downTabElem = self._browser.find_element_by_id('tabSynopDLoad') else: self._exit_msg = "Rp5 Source is not from available options, pls retry, exiting!" self._browser_exit(1) downTabElem.click() time.sleep(3) beginDateElem = self._browser.find_element_by_id('calender_dload') beginDateElem.clear() Log.info("Start Date is - {}".format(self._beginDate)) beginDateElem.send_keys(self._beginDate) endDateElem = self._browser.find_element_by_id('calender_dload2') endDateElem.clear() Log.info("End Date is - {}".format(self._endDate)) endDateElem.send_keys(self._endDate) generateDownloadElem = self._browser.find_elements_by_class_name( 'archButton')[1] generateDownloadElem.click() time.sleep(5) try: downloadElem = self._browser.find_element_by_link_text('Download') fileUrl = downloadElem.get_attribute('href') Log.info("File URL is - " + fileUrl) except: self._exit_msg = "Download link not found, exiting ..." self._browser_exit(1) fullPath = download_data(fileUrl, self._fileName, self._dirPath) Log.info("Downloading Data to {} ...".format(fullPath)) Log.info( "rp5 data downloaded successfully for {} location from {} to {}". format(self._loc, self._beginDate, self._endDate)) self._browser.close() return fullPath
class DB: def __init__(self): self.log = Log() try: self.conn = connect(host=host, port=port, user=user, password=password, db=db, charset='utf8', cursorclass=cursors.DictCursor) except OperationalError as e: self.log.error("Mysql Error %d: %s" (e.args[0], e.args[1])) #封装select语句 def select(self, table_name, table_data): if table_data['where'] == '': #print("无where子句") real_sql = "select " + table_data['fields'] + " from " + table_name else: keys = table_data['where'] #print(keys) str = 'where' for key, value in keys.items(): str = str + ' ' + key + ' =' + ' ' + '\'' + value + '\'' + 'and' str = str[:-3] real_sql = "select " + table_data[ 'fields'] + " from " + table_name + ' ' + str self.log.info(real_sql) #print(table_name) cur = self.conn.cursor() cur.execute(real_sql) results = cur.fetchall() return results #封装insert语句 def insert(self, table_name, table_data): keys = table_data #print(keys) str1, str2 = '', '' for key, value in keys.items(): str1 = str1 + key + ',' str2 = str2 + value + ',' str1 = str1[:-1] str2 = str2[:-1] real_sql = "insert into " + table_name + " (" + str1 + ")" + " values " + "(" + str2 + ")" self.log.info(real_sql) #print(table_name) cur = self.conn.cursor() cur.execute(real_sql)
def __init__(self, gse, merge_cols=True, percentile=.75): """Initialize filter. Requires populated gse. Args: gse: GSE instance associated with row_iter merge_cols: bool if to merge columns if able percentile: float 0<x<=1 of top percent by std to keep """ # 1. Require that GSE is populated and is of correct type. # ========== if not gse.populated: raise geo.NotPopulatedError, "%s must be populated to filter rows." % gse if gse.type != "eQTL": raise geo.StudyTypeMismatch, "%s must be type 'eQTL', not '%s'." % \ (gse, gse.type) # 2. Set Attributes. # ========== self.gse = gse self.col_titles = self.gse.col_titles[:] self.col_map = None self.rows_filtered = [] self.rows_per_gene = {} self.row_stats = {} self.merge_cols = merge_cols self.percentile = percentile # 3. Get column map for column merging. # ========== n_samples = len(self.gse.samples) n_uniques = len(self.gse.subject_gsms) # If there are more samples than unique subjects, then create column map. if self.merge_cols and n_samples > n_uniques: self.col_map = self._make_col_map() rx_str = self.gse.parameters['rx_gsm_subject_str'] Log.info(("Created column merge map for %s (%d samples to %d subjects)" +\ " with rx '%s'") % \ (self.gse, n_samples, n_uniques, rx_str)) # Verify that column merge map is reasonable (num uniques + 1 for ID column) if len(self.col_map) != n_uniques + 1: Log.warning("Column merge map has %d classes, expected %d in %s." % \ (len(self.col_map), n_uniques, self)) # No column merging scheme can exist. Do not create a col_map. else: # Retrieve the regular expression used rx_str = self.gse.parameters['rx_gsm_subject_str'] Log.info("No column merge map created for %s using rx '%s'. Merge_cols flag is %s" % \ (self.gse, rx_str, self.merge_cols))
def _getRequest(url): """Send HTTP request to url and return the response.""" try: request_result = requests.get(url, headers={ 'Connection': 'close' }).json() Log.info('Sent request to: %s' % url) time.sleep(0.01) except: Log.error('URL not found!') sys.exit() return request_result
def __init__(self, location, beginDate, endDate, source, fileName=None, dirPath=None): Log.info("Selenium Version - {}".format(selenium.__version__)) self._loc = location self._beginDate = beginDate self._endDate = endDate self._exit_msg = "" self._rp5source = source self._fileName = fileName self._dirPath = dirPath self._browser = None
def run_listener(q, timeout_sec=3): log = Log(__name__, level='INFO') log.info('Run AMQP listener until ctrl-c input\n {0}'.format(q)) def thread_func(worker, id): worker.start() def signal_handler(signum, stack): sys.exit(0) signal.signal(signal.SIGINT, signal_handler) worker = AMQPWorker(queue=q) task = WorkerThread(worker, 'amqp') tasks = WorkerTasks(tasks=[task], func=thread_func) tasks.run() tasks.wait_for_completion(timeout_sec)
class MyUnit(unittest.TestCase): # def setUpClass(cls): # cls.db = DB() # # cls.log = Log() # # cls.excel = Excel() # # def tearDownClass(cls): # cls.log.info('success') def setUp(self): self.db = DB() self.log = Log() self.excel = Excel() def tearDown(self): self.log.info(self.result)
def _processFeed(feed): """Turns feed content into dictionary.""" # Log process Log.info('Processing feed: ' + feed[0]) # Create feed content dictionary feed_content = { 'id': feed[0], 'message': feed[1], 'link': feed[2], 'shares': feed[3], 'created_time': feed[4], 'comments_count': feed[5], 'reactions_count': feed[6] } return feed_content
def read(self): """Return a file-pointer-like object to this resource. Returns: iter: file-pointer-like str line iterator (uncompressed) """ # Attempt to retreive from cache if possible. if self.read_cache: fp = self._fetch_from_cache() else: fp = None if fp: Log.info("Fetched %s from cache." % self.url) return fp else: Log.info("Downloading %s from network." % self.url) # From HTTP, Fetch request and populate self with response. http_fp = self.fetch() # If compressed, wrap http handle in a gzip decompressor. if self.headers and "content-encoding" in self.headers and \ self.headers["content-encoding"] == "gzip": zip_fp = gzip.GzipFile(fileobj=http_fp) fp = zip_fp else: fp = http_fp # Return download iterator from decompressed HTTP handle. if self.write_cache: cache = self.cache_name else: cache = None # Get expected download size in bytes. if self.headers and 'content-length' in self.headers: try: size = int(self.headers['content-length']) except: size = None else: size = None return DownloadIter( \ fp, cache=cache, size=size, report=self.report_status, finalize=self.finalize)
def run_server(addr,port): global task log = Log(__name__,level='INFO') log.info('Run httpd server until ctrl-c input') def shutdown(task): task.worker.stop() task.running = False def start(httpd, id): httpd.start() def signal_handler(signum,stack): log.info('Sending shutdown to httpd server') thread.start_new_thread(shutdown, (task,)) signal.signal(signal.SIGINT, signal_handler) server = Httpd(port=int(port),address=addr) task = WorkerThread(server,'httpd') worker = WorkerTasks(tasks=[task], func=start) worker.run() worker.wait_for_completion(timeout_sec=-1) # run forever
class SynchronizeData(): def __init__(self): # self.time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%dT%H:%M:%SZ') # self.ssh = paramiko.SSHClient() self.log = Log() self.db = pymysql.connect(host="172.16.129.40", port=3306, user="******", password="******", charset='utf8') self.cursor = self.db.cursor() # def connection_service(self): # '''连接linux服务器''' # self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # 允许连接不在know_hosts文件中的主机 # self.ssh.connect(hostname='172.16.129.40', port=8800, username='******', password='******') # 连接服务器 # sftp_client = self.ssh.open_sftp() # mysqld_log = sftp_client.open("/var/log/mysqld.log") # 文件路径 # l = mysqld_log.readlines() # 获取所有日志内容 # row_list = [x.strip() for x in l if x.strip() != ''] # error_list = [] # for log_list in row_list: # if '1733' in log_list: # error_list.append(log_list) # return error_list def synchronize(self): self.cursor.execute("show slave status") result = self.cursor.fetchall() # 获取字段信息 self.db.commit() try: if 'no' in result[0]: print("数据链接断开,准备重新启动同步程序") self.log.info("数据同步有问题,准备重新启动同步程序") self.cursor.execute("start slave") self.db.commit() else: print("同步数据正常,状态正常") self.log.info("同步数据正常,无错误日志") # self.ssh.close() # 关闭 except Exception as e: print(e) self.db.rollback() # 出现异常,回滚
def _search_loc(self): if self._browser is not None: self._browser.get("http://rp5.in/") searchElem = self._browser.find_element_by_id('searchStr') searchElem.clear() Log.info("Searching for location - {}".format(self._loc)) searchElem.send_keys(self._loc) searchButtonElem = self._browser.find_element_by_id('searchButton') searchButtonElem.click() try: self._browser.find_element_by_class_name('searchResults') except selenium.common.exceptions.NoSuchElementException: self._exit_msg = "No Search results, Pls try searching with other location" self._browser_exit(1) except: self._exit_msg = "some other error happened while searching" self._browser_exit(1)
def _getTarget(self): """Get data from target.""" # Log task start time start_time = time.time() Log.info('Task started.') # Set time interval since = datetime.strftime(datetime.now() - self.time_delta, '%Y-%m-%d') until = datetime.strftime(datetime.now(), '%Y-%m-%d') # Get list of feed id from target feeds_url = 'https://graph.facebook.com/v%s/' % self.version + self.target + '/?fields=feed.since(' + since + ').until(' + until + '){id,message,link,shares,created_time,comments.summary(true), reactions.summary(true)}&' + self.token feed_list = _getFeeds(_getRequest(feeds_url), []) # Get message, comments and reactions from feed data = [] if feed_list: data = [_processFeed(feed) for feed in feed_list] # Get time cost cost_time = time.time() - start_time # Log task end time and time cost Log.info('Task finished.') Log.info('Time Cost: ' + str(cost_time)) return data
def run_server(addr, port): global task log = Log(__name__, level='INFO') log.info('Run httpd server until ctrl-c input') def shutdown(task): task.worker.stop() task.running = False def start(httpd, id): httpd.start() def signal_handler(signum, stack): log.info('Sending shutdown to httpd server') thread.start_new_thread(shutdown, (task, )) signal.signal(signal.SIGINT, signal_handler) server = Httpd(port=int(port), address=addr) task = WorkerThread(server, 'httpd') worker = WorkerTasks(tasks=[task], func=start) worker.run() worker.wait_for_completion(timeout_sec=-1) # run forever
def detect_system_language(log: logger.Log): db: Dict[str, Union[bool, list, str]] = utils.access_db() if not db['has_asked_language']: language_codes: dict = { read_localization_files()[lang]['code']: lang for lang in langs[1:] } system_locale: str = locale.windows_locale[ ctypes.windll.kernel32.GetUserDefaultUILanguage()] system_language_code: str = system_locale.split('_')[0] is_brazilian_port: bool = system_locale == 'pt_BR' if system_language_code in language_codes or is_brazilian_port: system_language: str = language_codes[system_language_code] can_localize: bool = True else: if system_language_code != 'en': log.error(f"System locale {system_locale} is not localizable") can_localize = False if can_localize and settings.get('language') != system_language: log.info( f"System language ({system_locale}, {system_language}) != settings language ({settings.get('language')}), asking to change" ) db['has_asked_language'] = True utils.access_db(db) system_language_display: str = 'Português Brasileiro' if is_brazilian_port else system_language # this is intentionally not localized changed_language: str = messagebox.askquestion( f"TF2 Rich Presence {launcher.VERSION}", f"Change language to your system's default ({system_language_display})?" ) log.debug(f"Changed language: {changed_language}") if changed_language == 'yes': settings.change('language', system_language)
class Runner(RunnerInterface): def __init__(self): super().__init__() self.store = Store() self.torr_searcher = TorrSearch() self.loop = asyncio.get_event_loop() self.queue = asyncio.Queue() self.bot = TgBot(self.queue) self.log = Log(__name__) async def background_updater(self): await asyncio.sleep(5.0) self.log.debug(f"Start update after 5 seconds") while True: await asyncio.sleep(10) movies = await self.store.get_movies() self.log.debug(f"Search for {movies}") for movie in movies: self.log.debug(f"Find '{movie['title']}' for users: {movie['watchers']}") result = await self.torr_searcher.search_word(movie['title']) self.log.debug(f"Result: {result}") if result: message = self.format_films(movie['title'], result) for watcher in movie['watchers']: await self.bot.send_message(watcher, message) await self.store.create_or_update_movie(movie=movie['title'], is_active=False) @staticmethod def format_films(search_str, films): msg = f'По запросу: "{search_str}" найдены следущие раздачи:\n' for i in films[:6]: msg += f"---\n{i['date']} | {i['size']} | {i['name']}\n" return msg async def process_messages(self): while True: item = await self.queue.get() if item is not None: self.log.info(item) if not isinstance(item, dict) or 'type' not in item.keys(): self.log.error(f"Get incorrect object from tgbot: {item}") if item['type'] == 'start': await self.store.create_or_update_user(item['content']['from']) elif item['type'] == 'deactivate': await self.store.create_or_update_user(item['content']['from'], is_active=False) elif item['type'] == 'ignore': watcher = item['content']['from']['id'] movie = item['content']['text'][8:].strip() self.log.debug(f"User {watcher} trying ignore: {movie}") await self.store.ignore_movie(movie=movie, watcher=watcher) answer = f"You are unsubscribed from '{movie}' search." await self.bot.send_message(watcher, answer) elif item['type'] == 'list': watcher = item['content']['from']['id'] movies = await self.store.get_movies(telegram_id=watcher) results = '\n'.join([i['title'] for i in movies]) answer = f"You are waiting for:\n" \ f"{results}" await self.bot.send_message(watcher, answer) elif item['type'] == 'message': movie = item['content']['text'].strip() watcher = item['content']['from']['id'] if movie.startswith('/'): answer = f"Incorrect command. Use /help for additional information." else: if await self.store.get_users(telegram_id=watcher): await self.store.create_or_update_movie(movie=movie, watcher=watcher) answer = f"Title '{movie}' was added" else: answer = f'You need /start chatting with bot before make requests.' await self.bot.send_message(watcher, answer) else: self.log.error(f"Unknown type from item: {item}") def prepare(self): self.loop.create_task(self.process_messages()) self.loop.create_task(self.background_updater()) def run(self): self.prepare() # Bot exec run loop forever self.bot.run() async def search_digital(self, keywords): pass async def search_bd(self): pass async def search_torrent(self, keywords): return await self.torr_searcher.search_word(keywords)
def close(self): """Close any open file pointers, close and finalize cache file. """ # Ignore repeated calls to close() if self.closed: Log.info("Redundant call to close(), Ignored for %s." % self) return else: Log.info("Closing %s..." % self) # Handle finalize requests to complete download to buffer. if self.finalize: if not self.completed and self.cache: Log.info("Finalizing download of %s." % self) # Read remaining buffer unconditionally. Use iterator if reporting. if self.report: while True: try: self.next() except StopIteration: break else: self.read() # If not closed in previous read(), try another read(). if not self.closed: # This closes self since the previous read flushed the buffer. self.read() if not self.closed: Log.warning("Close sequence not completed as expected for %s." % self) # Exit: prior reads in the finalize process already closed self. return # self.buffer.close() causes bugs with FTP. Python sockets clean up after # themselves in garbage collection, so to remove the reference to buffer # self.buffer.close() self.buffer = None self.fp_out.close() if self.completed: Log.info("Download complete. %d bytes read." % (self.bytes_read)) # Finalize cache. if self.cache: os.rename(self.tmp_filepath, self.dest_filepath) Log.info("Cache finalized as '%s'." % (self.dest_filepath)) else: Log.info("Download closed before completion. %d bytes read." % \ (self.bytes_read)) # Flush cache. if self.cache: os.remove(self.tmp_filepath) Log.info("Incomplete cache '%s' deleted." % (self.tmp_filepath)) # Flag self as closed to prevent redundant .close() calls. self.closed = True
"""信号处理,退出程序 """ tornado.ioloop.IOLoop.instance().stop() logger.info('Msg-delivery stopped!') signal.signal(signal.SIGTERM, quit_app) signal.signal(signal.SIGINT, quit_app) if __name__ == "__main__": #init port = 8776 includes = None opts, argvs = getopt.getopt(sys.argv[1:], "c:p:h") for op, value in opts: if op == '-c': includes = value elif op == '-p': port = int(value) elif op == '-h': Usage() if not includes: Usage() confs = init_application(includes) logger.info("Msg-delivery initialized!") #main timer = timer_procedure.msgTimer() application = tornado.web.Application([(r"^/([^\.|]*)(?!\.\w+)$",MainHandler,dict(timer=timer)),], log_function=log_request) application.listen(port) logger.info("Msg-delivery start to Loop!") tornado.ioloop.IOLoop.instance().start()
log.info("Starting Media Center Version", version, "(" + date + ")") def manageLogFileSize(max_size): # Simple log file rollover when > 50K bytes if os.path.exists(log.file): if os.stat(log.file).st_size > int(max_size): log.rotateLogFile() if __name__ == '__main__': logVersionAndDate() preferences = Preferences() preferences.readConfig() manageLogFileSize(preferences.log.max_size) # Start the chrome browser in the background. browser_pid = startChrome() #print("BROWSER:", browser_pid) websocket_server.browser_pid = browser_pid #print("'startChrome' is commented out for testing purposes") # Start the websocket server (host) websocket_server.startServer() log.info("Main Exit") sys.exit()
def __init__(self, token, version): """Crawler instance takes version and token as parameters.""" self.version = version self.token = token Log.info('New Crawler: ver. %s' % version)
def get_rows(self): """Return filtered row iterator. CLEAN THIS UP It may be best to break this into multiple filters? Fix to return [str] Returns: *[str] of filtered rows of data split by columns """ Log.info("Initiated filter %s for rows of %s" % (self, self.gse)) if self.col_map: Log.info("self.col_map exists. Merge %d to %d columns for %s" % \ (len(self.col_titles), len(self.col_map), self)) else: Log.info("No col_map. Will not merge %d columns for %s." % \ (len(self.col_titles), self)) # 0. Determine best gene name column in case GENE_SYMBOL does not exist. # ========== gene_symbol_name = None # Traverse column names in preferred order. for name in geo.GPL.EQTL_GENE_NAME_LIST: # Skip columns without assignments. Continue if self.gse.platform.special_cols[name] is None: continue # Choose the first column that has an acceptable assignment. Break. else: actual_column_name = self.gse.platform.special_cols[name] gene_symbol_name = name break # Verify that a column was chosen to identify the row. if gene_symbol_name: Log.info("Selected column '%s=>%s' to best represent gene name for %s." %\ (gene_symbol_name, actual_column_name, self.gse.platform)) else: raise MalformedFilterError, "Cannot select gene symbol column from %s" % \ (self.gse.platform) # 1. Update column titles accounting for merged columns. # ========== if self.col_map: self.col_titles = self._merge_cols(self.col_titles, merge_titles) # Insert generated column titles (AFTER merging columns) # self.col_titles[0] should always be "ID_REF" col_titles_prefix = ["ID_REF", gene_symbol_name, "NUM_VALUES", "MEAN", "STD"] self.col_titles = col_titles_prefix + self.col_titles[1:] Log.info("Added %s, NUM_VALUES, MEAN, STD to col titles for %s." %\ (gene_symbol_name, self)) # Open new temporary file. XXX RENAME filepath = temp_file_name("%s.rowmerge" % self.gse.id) fp_out = open(filepath, "w") # 2: @DATAPASS 1: Merge columns, add gene symbol, filter non-genes. # ========== Log.info(("Started filter 1 in %s for %s: find and add gene, merge cols. " + "(This may take a while.)") % (self, self.gse)) num_rows = 0 for row in self.gse.get_rows(): # TODO: Add status reporting to console num_rows += 1 # Determine gene symbol for this row. Filter if no gene symbol exists. row_id = row[0] # Row ID should always be the first entry in a row. gene_sym = self.gse.platform.get_column(row_id, gene_symbol_name) if not gene_sym: self.rows_filtered.append(row_id) continue # skip this row else: self.rows_per_gene.setdefault(gene_sym, set()).add(row_id) # Merge columns using column mapping of series matrix columns. # Also, transform row into "floats" and None if self.col_map: # XXX_merge_cols is slow, perhaps due to float conversions. row = self._merge_cols(row, merge_floats) else: row = map(get_float, row) # Compute mean and standard deviation of all non-ID columns # check for None specifically since a valid value could be 0 filtered_row = filter(lambda x: x is not None, row[1:]) std = calc_std(filtered_row) mean = calc_mean(filtered_row) num_values = len(filtered_row) # Store row statistics self.row_stats[row_id] = \ {'num_values': num_values, 'mean': mean, 'std': std} # Insert (gene_sym, size, mean, std) into second column row = [row_id , gene_sym, num_values, mean, std] + row[1:] # Write row to temporary file. # TODO: I may want to compress my row by converting it to a pickle. # pickling a list of floats uses 2/3 space and takes 1/2 compute time. fp_out.write("\t".join(map(str, row))) fp_out.write("\n") fp_out.close() # Log results of filter pass 1 # ========== n = len(self.rows_filtered) n_gene_rows = num_rows-n mean_rows_per_gene = float(num_rows-n)/len(self.rows_per_gene) if num_rows != self.gse.est_num_row: Log.warning("Num rows read(%d) not num rows expected(%d) for %s" % \ (num_rows, self.gse.est_num_row, self)) Log.info(("Filter 1 complete for %s. " + \ "%d of %d (%.2f%%) rows removed for no gene symbol. %d rows remain.") % \ (self, n, num_rows, (n/float(num_rows))*100, n_gene_rows)) Log.info("Number of unique genes: %d, %.1f mean num rows per gene." % \ (len(self.rows_per_gene), mean_rows_per_gene)) # 3: Choose representative genes from self.row_stats and self.rows_per_gene # ========== # select all rows for a gene. If a gene selected_row_ids = [] for gene, row_ids in self.rows_per_gene.items(): # If only a single row for this gene exists, choose it. if len(row_ids) == 1: best_row_id = row_ids.pop() # Else, choose row with the highest mean value. else: s = sorted(row_ids, key=lambda x: self.row_stats[x]['mean']) best_row_id = s[-1] # Add this row_id to the accepted list selected_row_ids.append(best_row_id) n_single_gene_rows = len(selected_row_ids) Log.info("Selected %d of %d rows for %d genes by maximum row mean." % \ (n_single_gene_rows, n_gene_rows, len(self.rows_per_gene))) # Sort row_ids by row standard deviation in decreasing order. selected_row_ids.sort(key=lambda x: self.row_stats[x]['std'], reverse=True) # Select top percentile by std. Convert type to set for easier membership tests. x = int(len(selected_row_ids)*self.percentile) selected_row_ids = set(selected_row_ids[:x]) threshold_num_rows = len(selected_row_ids) assert(x == threshold_num_rows) Log.info("Selected top %d%% of rows (%d of %d) by standard deviation." % (self.percentile*100, threshold_num_rows, n_single_gene_rows)) # FINAL PASS: YIELD FILTERED LINES # =========== # Open temporary file generated in first pass. fp = open(filepath, "r") # Yield (modified) column titles. yield self.col_titles[:] # For each line, only yield if the row_id is in the selected_row_ids list. num_yielded_rows = 0 for line in fp: row = line.strip().split("\t") row_id = row[0] if row_id in selected_row_ids: num_yielded_rows += 1 yield row # All lines yielded. Check number of lines yielded with expected value. if num_yielded_rows != threshold_num_rows: Log.warning("%d yielded rows != %d expected number of rows." % \ (num_yielded_rows, threshold_num_rows)) else: Log.info("Filter complete. yielded %d rows." % (num_yielded_rows))
class Rutor(TorrentInterface): def __init__(self): self.log = Log(__name__) self.session = aiohttp.ClientSession() self._init_variables() def _init_logging(self): pass def _init_variables(self): self._rutor = dict() self._rutor['protocols'] = ['http', 'https'] self._rutor['host'] = ['rutor.is', 'rutor.info', '6tor.net'] self._rutor['search_string'] = '/search/' self._rutor['search_keyword'] = '/search/0/0/100/0/' self._rutor['search_words'] = '' async def fetch_url(self, url): try: async with self.session.get(url, allow_redirects=False) as resp: if resp.status != 200: self.log.info(f"Got code {resp.status} from {url}") await asyncio.sleep(2.0) return None return await resp.text() except client_exceptions.ClientConnectionError as e: self.log.info(e) await asyncio.sleep(2.0) return None def _generate_links(self, search_str, method='search_string'): links = list() for host in self._rutor['host']: for proto in self._rutor['protocols']: links.append(f"{proto}://{host}{self._rutor[method]}{search_str}") return links @staticmethod def parse(html_text): tree = html.fromstring(html_text) elements = tree.xpath('//table[@width]//tr') results = list() for e in elements: data = e.xpath('./td//text()') link = e.xpath('.//a/@href') if len(data) == 7: element = { "date": data[0], "name": data[2], "size": data[3], "link": link[2] } elif len(data) == 8: element = { "date": data[0], "name": data[2], "size": data[4], "link": link[2] } else: continue results.append(element) return results async def search(self, search_str): futures = [self.fetch_url(link) for link in self._generate_links(search_str)] self.log.debug(f"Generated links: {' '.join(self._generate_links(search_str))}") return await self.run_search(futures) async def run_search(self, futures): done, pending = await asyncio.wait(futures, return_when=asyncio.FIRST_COMPLETED) for future in pending: future.cancel() try: html_page = done.pop().result() except: return None # !!! Make run in executor return self.parse(html_page) async def search_keywords(self, keywords): if type(keywords) is list: keywords = ' '.join(keywords) futures = [self.fetch_url(link) for link in self._generate_links(search_str=keywords, method='search_keyword')] self.log.debug(f"Generated links: " f"{' '.join(self._generate_links(search_str=keywords, method='search_keyword'))}") return await self.run_search(futures)
def setTarget(self, target): """Set target FB page for Crawler instance.""" self.target = target Log.info('Target: %s' % target)
def setSource(cls, sourceStr): cls.__source = getattr(cls.__SOURCES, sourceStr.upper(), cls.__SOURCES.ARCHIVE) Log.info("Setting Data source as {}".format(cls.__source))
# return json.load(res) def get(url, data=None): if data: response = requests.get(url, data=data) else: response = requests.get(url) if not response.ok: raise Exception('http error:' + url + response.status_code) res = response.content.decode('unicode_escape') return json.loads(res) if __name__ == '__main__': log.info('program started:------------------------------------') info_url = config.httpInfo + 'info' finish_url = config.httpInfo + 'finish' while (True): person_info = {} # body_info = {} try: result = get(info_url) if result['has_record'] == 1: log.info('process record: %s' % result['data']) record_id = result['data']['id'] person_info['person_id'] = result['data']['bbiid'] person_info['height'] = result['data']['height'] person_info['weight'] = result['data']['weight'] person_info['name'] = result['data']['name'] person_info['body_id'] = result['data']['body_id']
class TL: def __init__(self): self.tlc = TestlinkAPIClient(url,key) self.log = Log() #格式转化 def __changeformat(self,oldformat): newformat = oldformat.replace('"','"').replace('<p>','').replace('</p>','').replace('\n','').replace('\t','') return newformat #获取所有testsuite的id和name def get_testsuite_idname(self): projects = self.tlc.getProjects() animbus = projects[0] topSuites = self.tlc.getFirstLevelTestSuitesForTestProject(animbus['id']) suite = topSuites[0] for suite in topSuites: print('suite_id'+suite['id'],'suite_name'+suite['name']) #创建testsuite # def create_testsuite(self,project_id, test_suite_name, test_suite_describe, father_id): # if father_id == "": # self.tlc.createTestSuite(project_id, test_suite_name, test_suite_describe) # else: # self.tlc.createTestSuite(project_id, test_suite_name, test_suite_describe, parentid=father_id) #获取测试用例 def get_testcase(self,testcase_id): self.log.info("开始获取测试用例") testcase_list=[] testcase = self.tlc.getTestCase(testcase_id) for i in testcase: self.log.info("获取信息头"+self.__changeformat(i.get('preconditions'))) testcase_list.append(self.__changeformat(i.get('preconditions'))) for m in i.get('steps'): expected_results = self.__changeformat(m.get("expected_results")) actions = self.__changeformat(m.get("actions")) step_number = self.__changeformat(m.get("step_number")) # testcase_list.append(step_number) testcase_list.append(actions) testcase_list.append(expected_results) self.log.info("步骤:"+step_number+" 步骤动作:"+actions+" 期望结果:"+expected_results) return testcase_list #获取指定testplan下所有testcase的id def get_alltestcaseid_for_testplanid(self,testplanid): testcase_id = [] testplan = self.tlc.getTestCasesForTestPlan(testplanid) for i in testplan.keys(): self.log.info("testplanid等于"+str(testplanid)+"下【测试用例id:"+i+"】的testcase已加入列表") testcase_id.append(i) return testcase_id #获取指定testsuite下所有testcase的id def get_alltestcaseid_for_testsuiteid(self,testsuiteid): testcase_id = [] testsuite = self.tlc.getTestCasesForTestSuite(testsuiteid,0,"") for i in testsuite: self.log.info("testsuiteid等于"+str(testsuiteid)+"下【测试用例id:"+i['id']+" name:"+i['name']+"】的testcase已加入列表") testcase_id.append(int(i['id'])) return testcase_id
if not self.exist_t(evt.t): #持久化时间戳 self.save_t(evt.t) logger.info("Save a event-timer![ TIME:%s ]" % evt.t) #建立定时任务 try: callback = functools.partial(self.timeout_callback, evt.t) tornado.ioloop.IOLoop.instance().add_timeout(int(evt.t), callback) except Exception,e: logger.error("Tornado add timeout error! [ ERROR:%s ]" % e) logger.info("Set event-timer's callback![ TIME:%s ]" % evt.t) #持久化事件(设置过期时间) self.save_evt(evt) logger.info("Save a event-list![ KEY:%s ]" % (REDIS_EVT_LST_PREFIX + evt.t)) def timeout_callback(self, t): logger.debug("Event-timer callback![ TIMER:%s NOW:%s ]" % (t, time.time())) #将t时刻的事件全部处理 key = REDIS_EVT_LST_PREFIX + t len = self.R.llen(key) logger.debug("Scan event list![ KEY:%s TOTAL:%d ]" % (key, len)) for i in range(len): str = self.R.lpop(key) logger.debug("Pop a event from list![ KEY:%s EVT:%s ]" % (key, str)) if not str: continue dct = ujson.loads(str) evt = msgEvent().from_dict(dct) msg_procedure.process(evt)