def get_service_props(service_name): ''' Get the properties for an installation service Generate a dictionary of the service properties as available for the specified service_name. Input: service_name - An AI service name or None if supplying configfile Return: A dictionary of relevant properties for the specified service_name. Returns None, if service config file does not exist ''' logging.log(com.XDEBUG, '**** START service_config.get_service_props ****') cfgp = _read_config_file(service_name) if cfgp is None: return None props = dict() if cfgp.has_section(SERVICE): section_dict = dict(cfgp.items(SERVICE)) props.update(section_dict) for prop in props: logging.log(com.XDEBUG, ' property: %s=%s', prop, props[prop]) return props
def addExtras(self): """Add packages generated by the sources but not in any seed.""" self.structure.addExtra() self._newSeed("extra") logging.log(self.PROGRESS, "Identifying extras ...") found = True while found: found = False sorted_srcs = list(self.all_srcs) sorted_srcs.sort() for srcname in sorted_srcs: for pkg in self.sources[srcname]["Binaries"]: if pkg not in self.packages: continue if self.packages[pkg]["Source"] != srcname: continue if pkg in self.all: continue if pkg in self.hints and self.hints[pkg] != "extra": logging.warning("Taking the hint: %s", pkg) continue self.seed["extra"].append(pkg) self._addPackage("extra", pkg, "Generated by " + srcname, second_class=True) found = True
def connectionMade(self): logging.log(self.getLogLevel(), "HTTP connection made.") self.sendRequest() self.sendHeaders() if (self.command == 'POST'): self.sendPostData()
def get_aliased_services(service_name, recurse=False): ''' Get list of services aliased to service_name Input: service name recurse - True if recursion desired, to get aliases of aliases... False if only aliases of specified service name desired Returns: list of aliased service names or empty list if there are no aliases ''' logging.log(com.XDEBUG, "get_aliased_services: %s, recurse %s", service_name, recurse) aliases = list() all_svc_data = get_all_service_props() for svc_data in all_svc_data.values(): if (PROP_ALIAS_OF in svc_data and svc_data[PROP_ALIAS_OF] == service_name): aliases.append(svc_data[PROP_SERVICE_NAME]) if recurse: taliases = list() for alias in aliases: taliases.extend(get_aliased_services(alias, recurse)) aliases.extend(taliases) logging.log(com.XDEBUG, "aliases=%s" % aliases) return aliases
def grow(self): """Grow the seeds.""" for seedname in self.seeds: logging.log(self.PROGRESS, "Resolving %s dependencies ...", seedname) if self.structure.branch is None: why = "%s seed" % seedname.title() else: why = ("%s %s seed" % (self.structure.branch.title(), seedname)) # Check for blacklisted seed entries. self.seed[seedname] = self._weedBlacklist( self.seed[seedname], seedname, False, why) self.seedrecommends[seedname] = self._weedBlacklist( self.seedrecommends[seedname], seedname, False, why) # Note that seedrecommends are not processed with # recommends=True; that is reserved for Recommends of packages, # not packages recommended by the seed. Changing this results in # less helpful output when a package is recommended by an inner # seed and required by an outer seed. for pkg in self.seed[seedname] + self.seedrecommends[seedname]: self._addPackage(seedname, pkg, why) for rescue_seedname in self.seeds: self._rescueIncludes(seedname, rescue_seedname, build_tree=False) if rescue_seedname == seedname: # only rescue from seeds up to and including the current # seed; later ones have not been grown break self._rescueIncludes(seedname, "extra", build_tree=False) self._rescueIncludes(self.supported, "extra", build_tree=True)
def handle_event(self, sock, fd, event): # handle events and dispatch to handlers if sock: logging.log(shell.VERBOSE_LEVEL, 'fd %d %s', fd, eventloop.EVENT_NAMES.get(event, event)) if sock == self._server_socket: if event & eventloop.POLL_ERR: # TODO raise Exception('server_socket error') try: logging.debug('accept') conn = self._server_socket.accept() TCPRelayHandler(self, self._fd_to_handlers, self._eventloop, conn[0], self._config, self._dns_resolver, self._is_local) except (OSError, IOError) as e: error_no = eventloop.errno_from_exception(e) if error_no in (errno.EAGAIN, errno.EINPROGRESS, errno.EWOULDBLOCK): return else: shell.print_exception(e) if self._config['verbose']: traceback.print_exc() else: if sock: handler = self._fd_to_handlers.get(fd, None) if handler: handler.handle_event(sock, event) else: logging.warn('poll removed fd')
def setup(self, X, num_centers, alpha, save_to='dec_model'): sep = X.shape[0]*9/10 X_train = X[:sep] X_val = X[sep:] ae_model = AutoEncoderModel(self.xpu, [X.shape[1],500,500,2000,10], pt_dropout=0.2) if not os.path.exists(save_to+'_pt.arg'): ae_model.layerwise_pretrain(X_train, 256, 50000, 'sgd', l_rate=0.1, decay=0.0, lr_scheduler=mx.misc.FactorScheduler(20000,0.1)) ae_model.finetune(X_train, 256, 100000, 'sgd', l_rate=0.1, decay=0.0, lr_scheduler=mx.misc.FactorScheduler(20000,0.1)) ae_model.save(save_to+'_pt.arg') logging.log(logging.INFO, "Autoencoder Training error: %f"%ae_model.eval(X_train)) logging.log(logging.INFO, "Autoencoder Validation error: %f"%ae_model.eval(X_val)) else: ae_model.load(save_to+'_pt.arg') self.ae_model = ae_model self.dec_op = DECModel.DECLoss(num_centers, alpha) label = mx.sym.Variable('label') self.feature = self.ae_model.encoder self.loss = self.dec_op(data=self.ae_model.encoder, label=label, name='dec') self.args.update({k:v for k,v in self.ae_model.args.items() if k in self.ae_model.encoder.list_arguments()}) self.args['dec_mu'] = mx.nd.empty((num_centers, self.ae_model.dims[-1]), ctx=self.xpu) self.args_grad.update({k: mx.nd.empty(v.shape, ctx=self.xpu) for k,v in self.args.items()}) self.args_mult.update({k: k.endswith('bias') and 2.0 or 1.0 for k in self.args}) self.num_centers = num_centers
def get_texts(self): if self.dictionary is None: logging.log(1, 'No dictionary set! Call set_dictionary() first.') sys.exit() for line in open(self.textfile): #yield self.dictionary.doc2bow(line.split()) yield line.split()
def _sweep_timeout(self): # tornado's timeout memory management is more flexible than we need # we just need a sorted last_activity queue and it's faster than heapq # in fact we can do O(1) insertion/remove so we invent our own if self._timeouts: logging.log(shell.VERBOSE_LEVEL, 'sweeping timeouts') now = time.time() length = len(self._timeouts) pos = self._timeout_offset while pos < length: handler = self._timeouts[pos] if handler: if now - handler.last_activity < self._timeout: break else: if handler.remote_address: logging.warn('timed out: %s:%d' % handler.remote_address) else: logging.warn('timed out') handler.destroy() self._timeouts[pos] = None # free memory pos += 1 else: pos += 1 if pos > TIMEOUTS_CLEAN_SIZE and pos > length >> 1: # clean up the timeout queue when it gets larger than half # of the queue self._timeouts = self._timeouts[pos:] for key in self._handler_to_timeouts: self._handler_to_timeouts[key] -= pos pos = 0 self._timeout_offset = pos
def download_list(list_to_download, folder): if GTK: progress.set_text(Locale.DOWNLOAD_PROCESS % len(list_to_download)) stopped.clear() stopEvent.clear() logging.log(logging.DEBUG, 'creating daemon thread') thread = threading.Thread(target=get_images, args=( list_to_download, folder, stopEvent, stopped, badURLs, progress, checkbox.get_active())) thread.setDaemon(True) logging.log(logging.DEBUG, 'starting daemon thread') thread.start() def wait_while_done(): thread.join(0.49) if stopped.is_set() or not thread.is_alive(): stop_button_click(None) else: gobject.timeout_add(500, wait_while_done) wait_while_done() else: get_images(list_to_download, folder, None, None, badURLs, None, len(sys.argv) == 3 and sys.argv[2] == '-skip')
def reset(self): logging.log(5, "stack reset") self.strings = [] self.start = 999999999 # start pos of first element self.end = 0 # end position of last element self.charLen = 0 # length of whole stack in characters self.tainted = False # has ASCII-cleaning been used to populate any element in the stack?
def log_message(self, plugin_name, msg, level): message = '[%s] -> %s' % (plugin_name, msg) logging.log(level, message) self.show_status_message(message, True if level == logging.ERROR else False) log_plugin = self.plugin_handler.get_plugin('Log') if log_plugin: log_plugin.ui.add_log_message(time.time(), level, plugin_name, msg)
def log_list(msg, items, level=logging.INFO): """ Record a a list of values with a message This would ordinarily be a simple logging call but we want to keep the length below the 1024-byte syslog() limitation and we'll format things nicely by repeating our message with as many of the values as will fit. Individual items longer than the maximum length will be truncated. """ max_len = 1024 - len(msg % "") cur_len = 0 cur_items = list() while [ i[:max_len] for i in items]: i = items.pop() if cur_len + len(i) + 2 > max_len: logging.info(msg % ", ".join(cur_items)) cur_len = 0 cur_items = list() cur_items.append(i) cur_len += len(i) + 2 logging.log(level, msg % ", ".join(cur_items))
def mediaitem_hash_exists(input_hash): mi = MediaItem.query.filter_by(hash_cd=input_hash).first() if mi: logging.log(logging.INFO, str(mi)) return True else: return False
def extract_and_attach_metadata(mediaitem, filepath): if mediaitem.media_type_cd == 100: try: media_file = open(filepath, 'rb') tags = exifread.process_file(media_file, details=False) org_date_tag = tags.get('EXIF DateTimeOriginal') org_date = datetime.now() if org_date_tag: org_date = datetime.strptime(str(org_date_tag), '%Y:%m:%d %H:%M:%S') else: org_date_tag = tags.get('EXIF DateTimeDigitized') if org_date_tag: org_date = datetime.strptime(str(org_date_tag), '%Y:%m:%d %H:%M:%S') else: org_date_tag = os.stat(filepath).st_birthtime if org_date_tag: org_date = datetime.fromtimestamp(org_date_tag) else: org_date_tag = os.stat(filepath).st_ctime if org_date_tag: org_date = datetime.fromtimestamp(org_date_tag) mediaitem.origin_date = org_date except: logging.error('failed to extract metadata for: ' + str(mediaitem)) file_size = os.stat(filepath).st_size mediaitem.file_size = file_size logging.log(logging.DEBUG, str(mediaitem) + ' - set file size = ' + str(file_size))
def plot_single_peak(filename, model, quartiles=False): '''Einzelnen Peak und seine Quartile plotten''' with open (filename, "rb") as data: aPeak = pickle.load(data) fig, ax = plt.subplots() #print (aPeak.distribution) #plt.tight_layout() plt.plot(aPeak.distribution, label = " ") hoehe = np.max(aPeak.distribution) logging.log(20, aPeak.pd) # Quartile mitplotten if quartiles: plt.plot([aPeak.pd[1][0] * 10, aPeak.pd[1][0] * 10], [0, hoehe], color = "lightblue") plt.plot([aPeak.pd[1][1] * 10, aPeak.pd[1][1] * 10], [0, hoehe], color = "lightblue") plt.plot([aPeak.pd[1][2] * 10, aPeak.pd[1][2] * 10], [0, hoehe], color = "lightblue") plt.axis([0.0,2400,0.0,0.03]) if model == "2s": plt.title("ps: " + str(aPeak.params[0]) +" pm: " + str(aPeak.params[1])) if model == "3a": plt.title("pmm: " + str(aPeak.params[0]) +" pml: " + str(aPeak.params[2]) + " paa: " + str(aPeak.params[4]) +" pll: " + str(aPeak.params[8])) plt.xlabel("Retentionszeit / s") plt.ylabel("Signalintensität") ax.set_xticklabels([0, 50, 100, 150, 200]) plt.legend(title = "Lage " + str(round(aPeak.pd[0],4))+ " Breite "+str(round(aPeak.pd[2],2)) + " Schiefe " +str(round(aPeak.pd[3],2))) plt.show() return
def init(self, **kwargs): self._init.setdefault('board_id', None) self._init.setdefault('avoid_download', False) if self._init['board_id'] and int(self._init['board_id']) >= 0: self._sidev = SiUSBDevice.from_board_id(self._init['board_id']) else: # search for any available device devices = GetUSBBoards() if not devices: raise IOError('Can\'t find USB board. Connect or reset USB board!') else: logging.info('Found USB board(s): {}'.format(', '.join(('%s with ID %s (FW %s)' % (device.board_name, filter(type(device.board_id).isdigit, device.board_id), filter(type(device.fw_version).isdigit, device.fw_version))) for device in devices))) if len(devices) > 1: raise ValueError('Please specify ID of USB board') self._sidev = devices[0] if 'bit_file' in self._init.keys(): if 'avoid_download' in self._init.keys() and self._init['avoid_download'] is True and self._sidev.XilinxAlreadyLoaded(): logging.info("FPGA already programmed, skipping download") else: if os.path.exists(self._init['bit_file']): bit_file = self._init['bit_file'] elif os.path.exists(os.path.join(os.path.dirname(self.parent.conf_path), self._init['bit_file'])): bit_file = os.path.join(os.path.dirname(self.parent.conf_path), self._init['bit_file']) else: raise ValueError('No such bit file: %s' % self._init['bit_file']) logging.info("Programming FPGA: %s..." % (self._init['bit_file'])) status = self._sidev.DownloadXilinx(bit_file) logging.log(logging.INFO if status else logging.ERROR, 'Success!' if status else 'Failed!') else: if not self._sidev.XilinxAlreadyLoaded(): raise ValueError('FPGA not initialized, bit_file not specified') else: logging.info("Programming FPGA: bit_file not specified")
def fetch_public_ip(self): logging.log("Getting public IP address") response = requests.get("http://ip.42.pl/raw") self.ip = response.text.strip() if response.status_code != 200 or len(self.ip) == 0: raise Exception("Could not find public IP") logging.log("Found IP " + self.ip)
def do_GET(self): global SKIP global FILTER global TEMPLATE global ITERATION global GENERATOR self.send_response(200) self.send_header('Content-Type', 'text/html') self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate') self.send_header('Pragma', 'no-cache') self.end_headers() if self.path == '/fuzzing': try: if SKIP != 0: for i in range(0, SKIP): GENERATOR.next() ITERATION += 1 SKIP = 0 value = GENERATOR.next() while re.search(FILTER, value): value = GENERATOR.next() ITERATION += 1 logging.log(60, ITERATION) self.wfile.write(TEMPLATE % value) ITERATION += 1 except StopIteration: self.wfile.write('DONE!') server.stop()
def debugRDKitMol(rdmol, level=logging.INFO): """ Takes an rdkit molecule object and logs some debugging information equivalent to calling rdmol.Debug() but uses our logging framework. Default logging level is INFO but can be controlled with the `level` parameter. Also returns the message as a string, should you want it for something. """ import tempfile import os my_temp_file = tempfile.NamedTemporaryFile() try: old_stdout_file_descriptor = os.dup(sys.stdout.fileno()) except: message = "Can't access the sys.stdout file descriptor, so can't capture RDKit debug info" print message rdmol.Debug() return message os.dup2(my_temp_file.fileno(), sys.stdout.fileno()) rdmol.Debug() os.dup2(old_stdout_file_descriptor, sys.stdout.fileno()) my_temp_file.file.seek(0) message = my_temp_file.file.read() message = "RDKit Molecule debugging information:\n" + message logging.log(level, message) return message
def on_message(self, message): """Evaluates the function pointed to by json-rpc.""" json_rpc = json.loads(message) logging.log(logging.DEBUG, json_rpc) if self.pool is None: self.pool = multiprocessing.Pool(processes=args.workers) # Spawn a process to protect the server against segfaults async = self.pool.apply_async(_worker_process, [json_rpc]) try: result = async.get(timeout=args.timeout) error = 0 except multiprocessing.TimeoutError: result = ("File format conversion timed out! This is due " "either to a large input file or a segmentation " "fault in the underlying open babel library.") error = 1 self.pool.terminate() self.pool = multiprocessing.Pool(processes=args.workers) except Exception: result = traceback.format_exc() error = 1 logging.log(logging.DEBUG, result) self.write_message(json.dumps({'result': result, 'error': error, 'id': json_rpc['id']}, separators=(',', ':')))
def process_request(self, request, spider): ua = random.choice(self.user_agent_list) if ua: # 显示当前使用的user agent print 'curr user agent: %s' % ua log.log(level=log.INFO, msg='Current UserAgent: %s' % ua) request.headers.setdefault('User-Agent', ua)
def _write_config_file(service_name, cfg): ''' Write out the passed in cfg for an installation service Input: service_name - An AI service name or None if supplying configfile cfg - A ConfigParser object with the current config Raises: OSError if problem creating service dir ''' logging.log(com.XDEBUG, '**** START service_config._write_config_file ****') svcdir = os.path.join(AI_SERVICE_DIR_PATH, service_name) try: os.makedirs(svcdir) except OSError as err: if err.errno != errno.EEXIST: raise cfgpath = os.path.join(svcdir, CFGFILE) logging.log(com.XDEBUG, 'writing config file: %s', cfgpath) # .config file should be created with right permissions orig_umask = os.umask(0022) with open(cfgpath, 'w') as cfgfile: cfg.write(cfgfile) os.umask(orig_umask)
def is_client(client_id): ''' Find out if client exists Input: clientid ('01aabbccaabbcc') Returns: True if client exists False otherwise Raises: ServiceCfgError if service missing .config file ''' logging.log(com.XDEBUG, "**** START service_config.is_client: %s ****", client_id) exists = False all_svc_names = get_all_service_names() for svc in all_svc_names: cfg = _read_config_file(svc) if cfg is None: raise ServiceCfgError(_("\nMissing configuration file for " "service: %s\n" % svc)) if CLIENTS not in cfg.sections(): continue clients = dict(cfg.items(CLIENTS)) # cfgparser changes client_id to lower if client_id.lower() in clients: exists = True logging.log(com.XDEBUG, 'client exists: %s', exists) return exists
def remove_client_from_config(service_name, client_id): ''' Remove client entry from .config file Input: service name client_id of entry to remove Raises: ServiceCfgError if service missing .config file ''' logging.log(com.XDEBUG, "**** START service_config.remove_client_from_config: %s " "%s ****", service_name, client_id) cfg = _read_config_file(service_name) if cfg is None: raise ServiceCfgError(_("\nMissing configuration file for " "service: %s\n" % service_name)) if CLIENTS not in cfg.sections(): return clients = cfg.options(CLIENTS) if client_id.lower() in clients: cfg.remove_option(CLIENTS, client_id.lower()) # if last client deleted, remove section if not cfg.options(CLIENTS): cfg.remove_section(CLIENTS) _write_config_file(service_name, cfg)
def get_clients(service_name): ''' Get info on all clients of a service Input: service name Returns: dictionary of clients, key is clientid (01aabbccaabbcc) and value is dict of client data (see find_client) Raises: ServiceCfgError if service missing .config file ''' logging.log(com.XDEBUG, "**** START service_config.get_clients: %s ****", service_name) cfg = _read_config_file(service_name) if cfg is None: raise ServiceCfgError(_("\nMissing configuration file for service: " "%s\n" % service_name)) clients = dict() if CLIENTS not in cfg.sections(): return clients rawclients = dict(cfg.items(CLIENTS)) for client in rawclients: data = rawclients[client] value = ast.literal_eval(data) clients[client.upper()] = value logging.log(com.XDEBUG, 'clients are %s', clients) return clients
def find_client(client_id): ''' Get info on a particular client Input: clientid ('01aabbccaabbcc') Returns: tuple consisting of service_name of client and dict of client data, both None if client does not exist. Client data can include: FILES: [list of files to remove when deleting client] BOOTARGS: comma separated string of client specific boot args <property>=<value>, Raises: ServiceCfgError if service missing .config file ''' logging.log(com.XDEBUG, "**** START service_config.find_client: %s ****", client_id) service = None files = None for svc in get_all_service_names(): cfg = _read_config_file(svc) if cfg is None: raise ServiceCfgError(_("\nMissing configuration file for " "service: %s\n" % svc)) if CLIENTS not in cfg.sections(): continue clients = dict(cfg.items(CLIENTS)) # cfgparser changes client_id to lower if client_id.lower() in clients: data = clients[client_id.lower()] files = ast.literal_eval(data) service = svc break logging.log(com.XDEBUG, 'service is %s, files are %s', service, files) return (service, files)
def do_POST(self): "post decides on the path which handler it should call." try: logging.log(1, 'POST request: %s %s' % (self.path, self.client_address[0])) self.handle_proxy_interface('POST') except Exception, e: logging.exception('do_POST failed with %r' % e)
def add_client_info(service_name, clientid, clientdata): '''add client info to the service configuration file Input: service_name - service name clientid - clientid of client (01aabbccaabbcc) clientdata - dict of client data (see find_client) Raises: ServiceCfgError if service missing .config file ''' logging.log(com.XDEBUG, '**** START service_config.add_client_info ****') logging.log(com.XDEBUG, ' service=%s, clientid=%s, clientdata=%s', service_name, clientid, clientdata) cfg = _read_config_file(service_name) if cfg is None: raise ServiceCfgError(_("\nMissing configuration file for service: " "%s\n" % service_name)) if CLIENTS not in cfg.sections(): cfg.add_section(CLIENTS) # add the client cfg.set(CLIENTS, clientid, clientdata) _write_config_file(service_name, cfg)
def get_ffts(self): ffts = [] for i in range(0, self.frame_count()): logging.log("Generating fft num " + str(i)) f = self.frame(i) ffts.append(f) return ffts
def set_animating(self, status): logging.log(logging.INFO, "Setting animating to: %s" % status) self.animating = status
def user_parse(self, response): # inspect_response(response, self) user_item = UserItem() # 用户信息 item = response.meta['item'] # 记录日志 logging.log(logging.INFO, '用户id :' + item['author_id']) user_item['user_id'] = item['author_id'] for sel in response.xpath( '//div[@id="content"]/table[@class="profile_table"][1]/tr'): td_data = sel.xpath('.//td/text()').extract() if '性别' in td_data[0]: user_item['gender'] = self.get_gender( td_data[1]) if len(td_data) > 1 else 0 if '等级' in td_data[0]: user_item['bbs_level'] = td_data[1] if len(td_data) > 1 else 0 if '社团' in td_data[0]: user_item['associations'] = td_data[1] if len( td_data) > 1 else 0 if '现金' in td_data[0]: hupu_property_data = td_data[1] if len(td_data) > 1 else 0 property = re.findall(r'^(\d+).*$', hupu_property_data) user_item['hupu_property'] = property[0] if len( property) > 0 else 0 if '在线' in td_data[0]: online_time_data = td_data[1] if len(td_data) > 1 else 0 online_time = re.findall(r'^(\d+).*$', online_time_data) user_item['online_time'] = online_time[0] if len( online_time) > 0 else 0 if '注册' in td_data[0]: user_item['reg_time'] = td_data[1] if len(td_data) > 1 else 0 if '最后' in td_data[0]: user_item['last_login'] = td_data[1] if len(td_data) > 1 else 0 if '自我' in td_data[0]: user_item['self_introduction'] = td_data[1] if len( td_data) > 1 else '' # common_path = response.xpath('//div[@id="content"]/table[@class="profile_table"][1]') # 档案中 ,有些包含地区有些没有,造成资料行数不一致 # tr_count = len(response.xpath('//div[@id="content"]/table[@class="profile_table"][1]/tr').extract()) # is_offset = 1 if tr_count > 8 else 0 # profile = response.xpath('//div[@id="content"]/table[@class="profile_table"]/tr/td/text()').extract() # user_item['gender'] = profile[1] if len(profile[1]) > 0 else '保密' # gender_res = common_path.xpath('.//tr[1]/td/text()').extract() # gender_val = gender_res[1] if len(gender_res) > 1 else '保密' # user_item['gender'] = self.get_gender(gender_val) # user_item['bbs_reputation'] = 0 # 社区等级 # level_tr = 3 if is_offset else 2 # bbs_level_res = common_path.xpath('.//tr[$val]/td/text()', val=level_tr).extract() # user_item['bbs_level'] = bbs_level_res[1] if len(bbs_level_res) > 1 else 0 # 所属社团 # associations_tr = 4 if is_offset else 3 # associations_res = common_path.xpath('.//tr[$val]/td/text()', val=associations_tr).extract() # user_item['associations'] = associations_res[1] if len(associations_res) > 1 else 0 # 社区资产 # property_tr = 5 if is_offset else 4 # hupu_property_res = common_path.xpath('.//tr[$val]/td/text()', val=property_tr).extract() # hupu_property_data = hupu_property_res[1] if len(hupu_property_res) > 1 else 0 # property = re.findall(r'^(\d+).*$', hupu_property_data) # user_item['hupu_property'] = property[0] if len(property) > 0 else 0 # 在线时间 # online_time_tr = 6 if is_offset else 5 # online_time_res = common_path.xpath('.//tr[$val]/td/text()', val=online_time_tr).extract() # online_time_data = online_time_res[1] if len(online_time_res) > 1 else 0 # online_time = re.findall(r'^(\d+).*$', online_time_data) # user_item['online_time'] = online_time[0] if len(online_time) > 0 else 0 # reg_time_tr = 7 if is_offset else 6 # reg_time_res = common_path.xpath('.//tr[$val]/td/text()', val=reg_time_tr).extract() # user_item['reg_time'] = reg_time_res[1] if len(reg_time_res) > 1 else 0 # # last_login_tr = 8 if is_offset else 7 # last_login_res = common_path.xpath('.//tr[$val]/td/text()', val=last_login_tr).extract() # user_item['last_login'] = last_login_res[1] if len(last_login_res) > 1 else 0 # # introduction_tr = 9 if is_offset else 8 # self_introduction_res = common_path.xpath('.//tr[$val]/td/text()', val=introduction_tr).extract() # user_item['self_introduction'] = self_introduction_res[1] if len(self_introduction_res) > 1 else '' # 喜欢的事情 common_path_favorite = response.xpath( '//div[@id="content"]/table[@class="profile_table"][2]') favorite_sport_res = common_path_favorite.xpath( './/tr[1]/td/text()').extract() user_item['favorite_sport'] = favorite_sport_res[1] if len( favorite_sport_res) > 1 else '' # 最喜欢的联赛 favorite_league_res = common_path_favorite.xpath( './/tr[2]/td/text()').extract() user_item['favorite_league'] = favorite_league_res[1] if len( favorite_league_res) > 1 else '' favorite_team_res = common_path_favorite.xpath( './/tr[3]/td/text()').extract() user_item['favorite_team'] = favorite_team_res[1] if len( favorite_team_res) > 1 else '' # yield user_item user_info_other_url = 'https://my.hupu.com/' + item['author_id'] # user_info_other_url = 'https://my.hupu.com/' + '268318221130217' # 4822766296690 52011257892977 189695810822085 yield scrapy.Request(user_info_other_url, meta={'user_item': user_item}, callback=self.user_other_parse, cookies=self.cookie_dict)
def article_parse(self, response): # inspect_response(response, self) comment_item = CommentItem() # 高亮回复 item = response.meta['item'] # inspect_response(response, self) title = response.xpath('//div[@class="bbs-hd-h1"]/h1/text()').extract() item['uid'] = item['author_id'] time_data = response.xpath( '//div[@class="floor-show"]/div[@class="floor_box"]/div[@class="author"]/div[@class="left"]/span[@class="stime"]/text()' ).extract() post_time = time_data[0] if time_data else '' # 发帖 的24小时 时间, item['post_hour'] = time.strptime( post_time, '%Y-%m-%d %H:%M').tm_hour if post_time else 0 post_from_data = response.xpath( '//div[@class="floor-show"]/div[@class="floor_box"]/table/tbody/tr/td/div[@class="quote-content"]/small' ).xpath('string(.)').extract() post_from_str = post_from_data[0] if len(post_from_data) > 0 else '' logging.info(item['article_id'] + '来源:' + post_from_str) if re.search(r'iPhone', post_from_str): post_from = 'iPhone' elif re.search(r'Android', post_from_str): post_from = 'Android' elif re.search(r'm\.hupu\.com', post_from_str ): # https://bbs.hupu.com/21750357.html 发自 m.hupu.com post_from = 'wap' # 手机网页上 else: post_from = 'web' item['post_from'] = post_from content = response.xpath( '//div[@class="floor-show"]/div[@class="floor_box"]/table/tbody/tr/td/div[@class="quote-content"]' ).xpath('string(.)').extract() item['article_content'] = content[0] if content else 0 images = response.xpath( '//div[@class="floor-show"]/div[@class="floor_box"]/table/tbody/tr/td/div[@class="quote-content"]' ).xpath('.//a/@href').extract() images2 = response.xpath( '//div[@class="floor-show"]/div[@class="floor_box"]/table/tbody/tr/td/div[@class="quote-content"]' ).xpath('.//img/@src').extract() all_img = images + images2 item['all_images'] = json.dumps(all_img) highlights_re = response.xpath( '//div[@class="w_reply clearfix"]/div[@id="readfloor"]/div[@class="floor"]/@id' ).extract() # 高亮回复id if item: for comment_id in highlights_re: # user_url = response.xpath('//div[@id=47406]/div[@class="floor_box"]/div[@class="author"]/div[@class="left"]/a/@href').extract_first() comment_item['article_id'] = item['article_id'] comment_item['comment_id'] = comment_id comment_item['comment_username'] = response.xpath( '//div[@id=$val]/div[@class="floor_box"]/div[@class="author"]/div[@class="left"]/a/text()', val=comment_id).extract_first() comment_item['comment_create_time'] = response.xpath( '//div[@id=$val]/div[@class="floor_box"]/div[@class="author"]/div[@class="left"]/span[@class="stime"]/text()', val=comment_id).extract_first() comment_item['comment_uid'] = response.xpath( '//div[@id=$val]/div[@class="floor_box"]/div[@class="author"]/div[@class="left"]/span/@uid', val=comment_id).extract_first() comment_item['comment_content'] = response.xpath( '//div[@id=$val]/div[@class="floor_box"]/table/tbody/tr/td/text()', val=comment_id).extract_first() if len(comment_item['comment_content']) < 3: comment_item['comment_content'] = response.xpath( '//div[@id=$val]/div[@class="floor_box"]/table/tbody/tr/td', val=comment_id).xpath('string(.)').extract_first() logging.log(logging.INFO, '用户评论 :' + comment_item['comment_content']) comment_item['highlights_num'] = response.xpath( '//div[@id=$val]/div[@class="floor_box"]/div[@class="author"]/div[@class="left"]/span/span//span/text()', val=comment_id).extract_first() yield comment_item item['highlights_re'] = ','.join(highlights_re) artcile_post_time = response.xpath( '//div[@class="floor-show"]/div[@class="floor_box"]/div[@class="author"]//div[@class="left"]/span[@class="stime"]/text()' ).extract() item['article_post_time'] = artcile_post_time[ 0] if artcile_post_time else '' # 文章详情页里面的 发帖时间 yield item user_info_url = 'https://my.hupu.com/' + item['author_id'] + '/profile' # user_info_url = 'https://my.hupu.com/' + '52011257892977' + '/profile' # 4822766296690 52011257892977 189695810822085 yield scrapy.Request(user_info_url, meta={'item': item}, callback=self.user_parse, cookies=self.cookie_dict)
import logging LOG_FORMAT = "%(asctime)s=====%(levelname)s+++++%(message)s" logging.basicConfig(filename="tulingxueyuan.log", level=logging.DEBUG, format=LOG_FORMAT) logging.log(logging.DEBUG, "This is a debug log") logging.log(logging.WARNING, "This is a warning log")
def logToRoot(message, *args, **kwargs): logging.log(level_num, message, *args, **kwargs)
def get_questions_list_page(self, url, params, keyword): """ 知乎搜索出来的列表页,其中包含问答类信息和文章类信息,所以在函数中页做出了适当的判断 :param url: :param params: 参数 :return: """ headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', # 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1', 'cookie': '_zap=cd474fe5-5293-4e25-ab70-819460f8d305; _xsrf=2a505282-ea51-4636-b146-c278f149958b; d_c0="AABTV9V_GBGPTg9rjZFaDmhprKiKQ1WJO20=|1586481899"; _ga=GA1.2.76494546.1586481899; _gid=GA1.2.1240789277.1586481899; _gat_gtag_UA_149949619_1=1; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1586481899; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1586481899; KLBRSID=4efa8d1879cb42f8c5b48fe9f8d37c16|1586481901|1586481899; capsion_ticket="2|1:0|10:1586481901|14:capsion_ticket|44:NTg0YzcwYmI4NGU3NGMxOGE4YThkMmMyOWI1ZDNjYzM=|250368312e63b3fdedde6029018c0c38c063d9e6a45dc281e40fccdc2948a474"', # 'referer': 'https://www.zhihu.com/search?q=%E5%AE%9D%E9%A9%AC&range=1w&type=content', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36' # 'User-Agent': '{}'.format(random.choice(self.user_agent)) } # print({'https': self.ip.strip()}) try: response = requests.get(url, headers=headers, params=params, proxies=self.proxies, timeout=120) except requests.exceptions.ProxyError: self.get_questions_list_page(url, params, keyword) return logger.log(31, '正在抓取主链接: ' + response.url) if response.text != None: data = response.content.decode() data = json.loads(data) if data['data']: # 判断获取的json数据中的data['data']的value列表是否为空,可以间接判断是否还有下一页数据 if len(data['data']) > 1: data_list = data['data'][1:] else: data_list = data['data'] for news in data_list: try: # print(news) try: question_title = news['highlight'][ 'title'].replace('<em>', '').replace('</em>', '') except: question_title = '' news_type = news['object']['type'] # 时间判断 if news_type == 'answer': # 问答类信息 answers_url = news['object']['url'] question_url = news['object']['question']['url'] try: topic_time_all = self.get_topic_time( question_url) except: continue # topic_time_all = '2019-03-04 01:03:30' question_id = question_url.split('/')[-1] view_url = 'https://www.zhihu.com/question/' + question_id views = self.get_view(view_url) # 获取浏览量 url = 'https://www.zhihu.com/api/v4/questions/{}/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics&limit=20&offset={}&sort_by=created'.format( question_id, '0') # 传入页面的url source_url = 'https://www.zhihu.com/question/{}/answers/created'.format( str(question_id)) if url not in self.set_list: # 对url进行简单的去重,避免重复的工作量 self.get_answers_page(url, question_title, source_url, keyword, views, topic_time_all) self.set_list.append(url) else: logging.log(31, '重复的url......') elif news_type == 'article': # 文章类信息 item = {} content = news['object']['content'] # item['type'] = '文章' item['platform'] = '知乎' crt_time = news['object']['created_time'] # #转换成localtime time_local = time.localtime(float(crt_time)) # 转换成新的时间格式(2016-05-05 20:28:54) dt = time.strftime( "%Y-%m-%d %H:%M:%S", time_local) # "%Y-%m-%d %H:%M:%S" date = dt.split(' ')[0] news_time = dt.split(' ')[1] item['date'] = date item['time'] = news_time author = news['object']['author']['name'] item['author'] = author.replace('<em>', '') item['title'] = question_title # content = news['content'].replace('<p>', '').replace('</p>', '').replace('<br>', '') content = etree.HTML(content) content = content.xpath('.//p//text()') content = ''.join(content) item['content'] = content articles_url = news['object']['url'].split('/')[-1] item[ 'url'] = 'https://zhuanlan.zhihu.com/p/{}'.format( str(articles_url)) item['is_topics'] = '是' item['floor'] = 0 item['keyword'] = keyword comments_count = news['object']['comment_count'] item['comments_count'] = comments_count item['views'] = '' likes = news['object']['voteup_count'] item['likes'] = str(likes) topic_id = articles_url item['topic_id'] = topic_id item['author_id'] = news['object']['author']['id'] item['topic_date'] = date item['topic_time'] = news_time item['content_id'] = topic_id item['reposts_count'] = '' item['file_code'] = '47' # 做时间判断部分--------------- 这个部分区分于另外一个部分 get_news_time = time.mktime( time.strptime(date, "%Y-%m-%d")) end_time = time.mktime( time.strptime(self.end_time, "%Y-%m-%d")) if self.start_time != '': start_time = time.mktime( time.strptime(self.start_time, "%Y-%m-%d")) else: start_time = time.mktime( time.strptime('2010-1-1', "%Y-%m-%d")) if float(get_news_time) < float(start_time): pass if float(start_time) <= float( get_news_time) <= float(end_time): # print('爬取正文数据中.....') # print(item) # self.write_news_jsonfile(item) # if topic_id not in self.set_list: if not self.redis_example.sismember( 'nike_dayli_zhihu', topic_id): self.queue.put(item) self.redis_example.sadd( 'nike_dayli_zhihu', topic_id) # self.set_list.append(topic_id) # print(self.queue.get()) if int(comments_count) > 0: comment_id = news['object']['id'] comment_url = 'https://www.zhihu.com/api/v4/articles/{}/root_comments?include=data%5B*%5D.author%2Ccollapsed%2Creply_to_author%2Cdisliked%2Ccontent%2Cvoting%2Cvote_count%2Cis_parent_author%2Cis_author&order=normal&limit=20&offset=0&status=open'.format( str(comment_id)) comment_source_url = 'https://zhuanlan.zhihu.com/p/{}'.format( str(comment_id)) self.floor_num = 1 self.get_comment_info( comment_url, question_title, comment_source_url, keyword, topic_id, dt) else: logging.log(31, '时间:' + str(date)) print('数据时间不符合') except: logger.error(traceback.format_exc()) is_end = data['paging']['is_end'] if not is_end: next_url = data['paging']['next'] self.get_questions_list_page(next_url, params, keyword) else: # print(response.text) logging.log(31, '数据为空.......')
def parser(self,user,data): if data.find('disco') != -1: if data.find('strobe') != -1: animation = threading.Thread(target=self.disco_strobe) animation.daemon = True animation.start() elif data.find('fire') != -1: animation = threading.Thread(target=self.disco_fire) animation.daemon = True animation.start() elif data.find('alternate') != -1: animation = threading.Thread(target=self.disco_alternate) animation.daemon = True animation.start() elif data.find('chase') != -1: animation = threading.Thread(target=self.disco_chase) animation.daemon = True animation.start() else: animation = threading.Thread(target=self.disco) animation.daemon = True animation.start() return True # flickr strobe if data.find('strobe') != -1: self.strobe() return True if data.find('randomcolor') != -1: rgb = twitch_bot_utils.convertcolor("random", self.random_color) self.allleds(rgb[0], rgb[1], rgb[2], 10) return True m = re.search('(\w+)\((.+(?:\|[a-zA-Z0-9#]+)*)\)', data, re.IGNORECASE) if m: logging.log(logging.DEBUG, "regex passed") parts = m.group(2).split("|") if m.group(1).lower() == "chase": if len(parts) > 0: while len(parts) > 6: parts.pop(6) for part in parts: rgb = twitch_bot_utils.convertcolor(part, self.random_color) if rgb: num = round(6 / len(parts)) self.chase(rgb[0], rgb[1], rgb[2], int(num)) time.sleep(1) else: logging.log(logging.ERROR, "Invalid color: %s" % part) self.modedefault() return True else: logging.log(logging.ERROR, "Not enough colors to chase!") if m.group(1).lower() == "centerchase": if len(parts) > 0: while len(parts) > 6: parts.pop(6) for part in parts: rgb = twitch_bot_utils.convertcolor(part, self.random_color) if rgb: num = round(6 / len(parts)) self.centerchase(rgb[0], rgb[1], rgb[2], int(num)) time.sleep(1) else: logging.log(logging.ERROR, "Invalid color: %s" % part) self.modedefault() return True else: logging.log(logging.ERROR, "Not enough colors to centerchase!") if m.group(1).lower() == "bounce": if len(parts) > 0: while len(parts) > 6: parts.pop(6) for part in parts: rgb = twitch_bot_utils.convertcolor(part, self.random_color) if rgb: num = round(6 / len(parts)) self.bounce(rgb[0], rgb[1], rgb[2], int(num)) time.sleep(1) else: logging.log(logging.ERROR, "Invalid color: %s" % part) self.modedefault() return True else: logging.log(logging.ERROR, "Not enough colors to bounce!") if m.group(1).lower() == "cycle": if len(parts) > 0: while len(parts) > 6: parts.pop(6) for part in parts: rgb = twitch_bot_utils.convertcolor(part, self.random_color) if rgb: num = round(6 / len(parts)) self.allleds(rgb[0], rgb[1], rgb[2], num) else: logging.log(logging.ERROR, "Invalid color: %s" % part) return True else: logging.log(logging.ERROR, "Not enough colors to cycle!") if len(parts) == 1: rgb = twitch_bot_utils.convertcolor(parts[0], self.random_color) if rgb: if m.group(1).lower() == "rgb": self.allleds(rgb[0], rgb[1], rgb[2], 10) time.sleep(1) if len(parts) == 2: rgb = twitch_bot_utils.convertcolor(parts[0], self.random_color) rgb2 = twitch_bot_utils.convertcolor(parts[1], self.random_color) if rgb: if rgb2: if m.group(1).lower() == "fire": self.fire(rgb[0], rgb[1], rgb[2], rgb2[0], rgb2[1], rgb2[2]) time.sleep(1) return True if m.group(1).lower() == "alternate": self.alternate(rgb[0], rgb[1], rgb[2], rgb2[0], rgb2[1], rgb2[2]) time.sleep(1) return True else: logging.log(logging.ERROR, "Invalid color: %s" % parts[1]) else: logging.log(logging.ERROR, "Invalid color: %s" % parts[0]) return True # html color keys (single color, no animation) # todo replace with color converter for key, value in sorted(twitch_bot_colors.colors.iteritems()): if data.find(key.lower()) != -1: self.set_animating(1) logging.log(logging.INFO, "key: %s value: %s : %s,%s,%s" % ( key, value, int("0x" + value[0:2], 0), int("0x" + value[2:4], 0), int("0x" + value[4:6], 0))) self.irc.msg("%s!!!" % key.upper()) self.ser.write( "#%c%c%c\xff!" % (int("0x" + value[0:2], 0), int("0x" + value[2:4], 0), int("0x" + value[4:6], 0))) self.user_wait(5) self.modedefault() return True
import logging LOG_FORMAT = "%(asctime)s====%(levelname)s++++++%(message)s" logging.basicConfig(filename="ken.log", level=logging.DEBUG, format=LOG_FORMAT) LOG_FORMAT = "%(asctime)s====%(levelname)s++++++%(message)s" logging.debug("this is a debug log") logging.warning("this is a warning log") logging.log(logging.DEBUG, "this is a debug log")
def _http_packets_logger(packet, method='GET'): if str(packet).find(method) > 0: logging.log(logging.INFO, _http_parser(packet, method))
def handleHello(data): helloResult = data logging.log(logging.DEBUG, f"flags={helloResult.status.flags}") global stoneHasBeenSetUp stoneHasBeenSetUp = helloResult.status.hasBeenSetUp logging.log(logging.INFO, f"stoneHasBeenSetUp={stoneHasBeenSetUp}")
rest = { 'enabled': config.getboolean('rest', 'enabled'), 'server': config.get('rest', 'server'), 'port': config.getint('rest', 'port'), 'user': config.get('rest', 'user'), 'passwd': config.get('rest', 'passwd'), 'userasjid': config.get('rest', 'userasjid'), } overridedefault = {} for option in config.options('defaultnodeconfig'): overridedefault[option] = config.get('defaultnodeconfig', option) pubsub = sleekpubsub.PublishSubscribe(xmpp, config.get('pubsub', 'dbfile'), settings, rest, overridedefault) #pubsub.start("session_start") #pubsub.subscribeNode('fb_node','panos@gic/spark') #pubsub.publish('fb_node',ET.Element('sdsds')) #pubsub.registerNodeType(sleekpubsub.jobnode) #print "step check 1" if xmpp.connect(): xmpp.process(threaded=True) #xmpp.disconnect() logging.info("Saving...") pubsub.save() #sys.exit(retCode) else: logging.log(logging.CRITICAL, "Unable to connect.")
def __onPluginLogMessage(logLevel, message): """Triggered when a plugin message is received""" logging.log(logLevel, str(message))
def write(self, msg): logging.log(self._logging_level, msg[:-1])
import logging logging.basicConfig(level=logging.DEBUG) logging.info('info') logging.debug('debug') logging.warning('warning') logging.error('error') logging.fatal('fatal') logging.critical('critical') logging.log()
from time import time from coffeehouse.lydia import LydiaAI from coffeehouse.api import API from telethon import events from sedenbot.events import sedenify from sedenbot import BOTLOG, BOTLOG_CHATID, CMD_HELP, bot from sedenbot import LYDIA_API_KEY logging.basicConfig( format='[%(levelname) 5s/%(asctime)s] %(name)s: %(message)s', level=logging.WARNING) try: from sedenbot.moduller.sql_helper.lydia_sql import get_s, get_all_s, add_s, remove_s except: logging.log(level=logging.WARNING, msg="Lydia veritabanı bağlantısı başarısız oldu") # SQL dışı mod ACC_LYDIA = {} if LYDIA_API_KEY: api_key = LYDIA_API_KEY api_client = API(api_key) lydia = LydiaAI(api_client) @sedenify(outgoing=True, pattern="^.repcf$") async def repcf(event): if event.fwd_from: return await event.edit("İşleniyor...")
def log_message(msg, _object_name, level='DEBUG'): desired_level = getattr(logging, level.upper()) logging.log(desired_level, LOGGING_MESSAGE_FORMAT % { '_object': _object_name, 'msg': msg })
# pylint: disable=E1101 """test checking use of the logging module """ __revision__ = '' # Muck up the names in an effort to confuse... import logging as renamed_logging import os as logging # Statements that should be flagged: renamed_logging.warn('%s, %s' % (4, 5)) renamed_logging.exception('%s' % 'Exceptional!') renamed_logging.log(renamed_logging.INFO, 'msg: %s' % 'Run!') # Statements that should not be flagged: renamed_logging.warn('%s, %s', 4, 5) renamed_logging.log(renamed_logging.INFO, 'msg: %s', 'Run!') renamed_logging.warn('%s' + ' the rest of a single string') logging.warn('%s, %s' % (4, 5)) logging.log(logging.INFO, 'msg: %s' % 'Run!')
def tick(self): for elevator in self.elevators: elevator.tick() logging.log(logging.INFO, f"{self.time}: {elevator}") self.time += 1
def handle_jsonrpc(self, request_text): response = self._marshaled_dispatch(request_text, self.client_ip) logging.log(logging.INFO, 'Content-Type: application/json-rpc') logging.log(logging.INFO, 'Content-Length: %d' % len(response)) sys.stdout.write(response)
def log_with_custom_levels(): logging.log(logging.DEBUG-1, Message('below debug')) logging.log(logging.INFO-1, 'between debug and info') logging.log(logging.INFO+1, 'between info and warning') logging.log(logging.WARNING+5, 'between warning and error') logging.log(logging.ERROR*100,'above error')
def range(self): logging.log(logging.INFO, 'Laser on') super().range() logging.log(logging.INFO, 'Laser off')
def log_to_root(message, *args, **kwargs): logging.log(num, message, *args, **kwargs)
if command == 'register': destination = self.rfile.readline().strip() logging.log(logging.INFO, "Received request from %s for %s to %s", toolname, route, destination) red.hset(redis_key, route, destination) logging.log(logging.DEBUG, "Set redis key %s with key/value %s:%s", redis_key, route, destination) self.request.send('ok') elif command == 'unregister': logging.log(logging.INFO, "Cleaning up request from %s for %s", toolname, route) red.hdel(redis_key, route) logging.log(logging.DEBUG, "Removed redis key %s with key %s", redis_key, route) self.request.send('ok') else: logging.log(logging.ERROR, "Unknown command received: %s", command) self.request.send('fail') self.request.close() if __name__ == '__main__': logging.log(logging.INFO, "Starting server on port %s", PORT) server = SocketServer.ThreadingTCPServer((HOST, PORT), RouteRequestHandler) server.serve_forever()
def train_model(name: str, model_class: Type[nn.Module], graph: SparseGraph, model_args: dict, learning_rate: float, reg_lambda: float, idx_split_args: dict = { 'ntrain_per_class': 20, 'nstopping': 500, 'nknown': 1500, 'seed': 2413340114 }, stopping_args: dict = stopping_args, test: bool = False, device: str = 'cuda', torch_seed: int = None, print_interval: int = 10) -> Tuple[nn.Module, dict]: labels_all = graph.labels idx_np = {} idx_np['train'], idx_np['stopping'], idx_np['valtest'] = gen_splits( labels_all, idx_split_args, test=test) idx_all = {key: torch.LongTensor(val) for key, val in idx_np.items()} logging.log(21, f"{model_class.__name__}: {model_args}") if torch_seed is None: torch_seed = gen_seeds() torch.manual_seed(seed=torch_seed) logging.log(22, f"PyTorch seed: {torch_seed}") nfeatures = graph.attr_matrix.shape[1] nclasses = max(labels_all) + 1 model = model_class(nfeatures, nclasses, **model_args).to(device) reg_lambda = torch.tensor(reg_lambda, device=device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) dataloaders = get_dataloaders(idx_all, labels_all) early_stopping = EarlyStopping(model, **stopping_args) attr_mat_norm_np = normalize_attributes(graph.attr_matrix) attr_mat_norm = matrix_to_torch(attr_mat_norm_np).to(device) epoch_stats = {'train': {}, 'stopping': {}} start_time = time.time() last_time = start_time for epoch in range(early_stopping.max_epochs): for phase in epoch_stats.keys(): if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0 running_corrects = 0 for idx, labels in dataloaders[phase]: idx = idx.to(device) labels = labels.to(device) optimizer.zero_grad() with torch.set_grad_enabled(phase == 'train'): log_preds = model(attr_mat_norm, idx) preds = torch.argmax(log_preds, dim=1) # Calculate loss cross_entropy_mean = F.nll_loss(log_preds, labels) l2_reg = sum( (torch.sum(param**2) for param in model.reg_params)) loss = cross_entropy_mean + reg_lambda / 2 * l2_reg if phase == 'train': loss.backward() optimizer.step() # Collect statistics running_loss += loss.item() * idx.size(0) running_corrects += torch.sum(preds == labels) # Collect statistics epoch_stats[phase]['loss'] = running_loss / len( dataloaders[phase].dataset) epoch_stats[phase]['acc'] = running_corrects.item() / len( dataloaders[phase].dataset) if epoch % print_interval == 0: duration = time.time() - last_time last_time = time.time() logging.info( f"Epoch {epoch}: " f"Train loss = {epoch_stats['train']['loss']:.2f}, " f"train acc = {epoch_stats['train']['acc'] * 100:.1f}, " f"early stopping loss = {epoch_stats['stopping']['loss']:.2f}, " f"early stopping acc = {epoch_stats['stopping']['acc'] * 100:.1f} " f"({duration:.3f} sec)") if len(early_stopping.stop_vars) > 0: stop_vars = [ epoch_stats['stopping'][key] for key in early_stopping.stop_vars ] if early_stopping.check(stop_vars, epoch): break runtime = time.time() - start_time runtime_perepoch = runtime / (epoch + 1) logging.log( 22, f"Last epoch: {epoch}, best epoch: {early_stopping.best_epoch} ({runtime:.3f} sec)" ) # Load best model weights model.load_state_dict(early_stopping.best_state) train_preds = get_predictions(model, attr_mat_norm, idx_all['train']) train_acc = (train_preds == labels_all[idx_all['train']]).mean() stopping_preds = get_predictions(model, attr_mat_norm, idx_all['stopping']) stopping_acc = (stopping_preds == labels_all[idx_all['stopping']]).mean() logging.log(21, f"Early stopping accuracy: {stopping_acc * 100:.1f}%") valtest_preds = get_predictions(model, attr_mat_norm, idx_all['valtest']) valtest_acc = (valtest_preds == labels_all[idx_all['valtest']]).mean() valtest_name = 'Test' if test else 'Validation' logging.log(22, f"{valtest_name} accuracy: {valtest_acc * 100:.1f}%") result = {} result['predictions'] = get_predictions(model, attr_mat_norm, torch.arange(len(labels_all))) result['train'] = {'accuracy': train_acc} result['early_stopping'] = {'accuracy': stopping_acc} result['valtest'] = {'accuracy': valtest_acc} result['runtime'] = runtime result['runtime_perepoch'] = runtime_perepoch return model, result
def initialize(self): logging.log(9, "Initializing parquet sink") self.writer = pq.ParquetWriter(self.path, self.schema, flavor="spark", compression=self.compression)
def get_personality( self, profileList, ): """ Return Big Five predictions for all profiles in profileList Features are calculated for whole profileList at once. An exception is raised if no matching GloVe vectors could be found. Do prediction with previously saved models for each dimension and save to returnDict. Parameters ---------- profileList : list, default=None List of Profile objects for which predictions should be carried out. Returns ------- returnDict : dict Dictionary containing Big Five, word coverage, and word count results. """ # feature pipeline pipeline = ModelApplication.glove_pipeline # calculate features try: features = pipeline.fit_transform(profileList) except NoGloveValueError: # this means, that the user's tweets # are not compatible with the used GloVe values # no words had a match -> therefore user not suitable eString = "User's tweets have no matching words." raise NotASuitableUserError(eString) # build return dict returnDict = { 'big5_openness': None, 'big5_conscientiousness': None, 'big5_extraversion': None, 'big5_agreeableness': None, 'big5_neuroticism': None, 'coverage': ModelApplication.featuresObj.coverageStatistics, 'wordCount': ModelApplication.featuresObj.wordCounts } # for every big5 dimension apply prediction for dimension in self.big5List: model = getattr(ModelApplication, dimension) # apply prediction big5result = model.predict(features) # save result in returnDict # contains predictions for all profiles returnDict[dimension] = big5result # instead of print, do log logging.log(level=logging.INFO, msg="Finished prediction") return returnDict
def Logging(self, log_messages, context=None): yield beam_fn_api_pb2.LogControl() for log_message in log_messages: for log in log_message.log_entries: logging.log(self.LOG_LEVEL_MAP[log.severity], str(log))
def finalize(self): logging.log(9, "Finalizing parquet sink") self.writer.close()
def inject_verbose_info(self): logging.VERBOSE = 15 logging.verbose = lambda x: logging.log(logging.VERBOSE, x) logging.addLevelName(logging.VERBOSE, "VERBOSE")
def log(sql, args=()): logging.log('SQL: %s' % sql)