def _write_data_chunk(self, key, chunk, offset, session): logger.info( 'Key {key_id}: writing data chunk, offset {offset}, size {size}'.format( key_id=key, offset=offset, size=len(chunk), ) ) try: self.all_request( session.write_plain( elliptics.Id(key), json='', data=chunk, data_offset=offset, ) ) except EllipticsError as e: logger.error( 'Key {key_id}: writing data chunk failed, offset {offset}: {error}'.format( key_id=key, offset=offset, error=e, ) ) raise
def initialize_project(self): for worker, master in zip(self.cluster_spec.workers, self.cluster_spec.yield_masters()): state.env.host_string = worker run('killall -9 celery', quiet=True) for bucket in self.buckets: logger.info('Intializing remote worker environment') qname = '{}-{}'.format(master.split(':')[0], bucket) temp_dir = '{}-{}'.format(self.temp_dir, qname) r = run('test -d {}'.format(temp_dir), warn_only=True, quiet=True) if r.return_code == 0: if self.reuse_worker == 'true': return logger.error('Worker env exists, but reuse not specified') sys.exit(1) run('mkdir {}'.format(temp_dir)) with cd(temp_dir): run('git clone {}'.format(REPO)) with cd('{}/perfrunner'.format(temp_dir)): run('virtualenv -p python2.7 env') run('PATH=/usr/lib/ccache:/usr/lib64/ccache/bin:$PATH ' 'env/bin/pip install ' '--download-cache /tmp/pip -r requirements.txt')
def wait_for_indexes_to_become_online(self, host, index_name=None): # POLL to ensure the indexes become online url = 'http://{}:8093/query/service'.format(host) data = { 'statement': 'SELECT * FROM system:indexes' } if index_name is not None: data = { 'statement': 'SELECT * FROM system:indexes WHERE name = "{}"'.format(index_name) } ready = False while not ready: time.sleep(10) resp = requests.Session().post(url=url, data=data) if resp.json()['status'] == 'success': results = resp.json()['results'] for result in results: if result['indexes']['state'] == 'online': ready = True else: ready = False break else: logger.error('Query:{} => Did not return a success!'.format(data['statement'])) if index_name is None: logger.info('All Indexes: ONLINE') else: logger.info('Index:{} is ONLINE'.format(index_name))
def remove(self, key, from_groups=None): if not from_groups: raise InvalidDataError( 'Groups are not properly defined for key "{0}"'.format(key)) logger.info('Removing key %s from groups %s' % (key, from_groups)) session = self.session.clone() session.add_groups(from_groups) eid = elliptics.Id(key) try: session.remove(eid).get() except elliptics.NotFoundError: # keys are already removed from destination groups logger.info('Key {0} is not present in group {1}, skipped'.format(key, from_groups)) pass except elliptics.Error as e: # Group is not available (No such device ot address: -6) logger.error('Key {0} is not present in group {1}: {2}'.format(key, from_groups, e)) raise except Exception as e: logger.error('Key {0}: failed to remove from group {1}: {2}'.format(key, from_groups, e)) raise ConnectionError('Failed to remove key %s: %s' % (key, e))
def regenerate_cluster_certificate(self, host_port): api = 'http://{}/controller/regenerateCertificate'.format(host_port) response = self.post(url=api) if not response.ok: logger.error("Unable to regenerateCertificate", response.content) else: logger.debug("Regenerated Certificate", response.content)
def async_reader(conn, loop): data = conn.recv() logger.debug("RECV DATA: %s", data) if data is None: return if data == b'': if conn in FD_MAP: forward_server = FD_MAP[conn] loop.remove_reader(forward_server.fd) forward_server.shutdown() del FD_MAP[forward_server] del FD_MAP[conn] loop.remove_reader(conn.fd) conn.shutdown() return if conn not in FD_MAP: target_host = find_host(data) if target_host is None: return forward_server = ProxySocket.get_client(host=target_host, port=80) forward_server.connect() if forward_server and forward_server.fd != -1: FD_MAP[conn] = forward_server FD_MAP[forward_server] = conn loop.add_reader(forward_server.fd, async_reader, forward_server, loop) logger.info("Create a connection to %s", target_host) else: logger.error("FAIL to connect to target host {0}".format(target_host)) FD_MAP[conn].sendall(data)
def process_message(self, ircmsg): msg = Message(ircmsg) # ignore anything from #speedrunslive to avoid flooding it accidentally if msg.channel == "#speedrunslive": pass # kill command to force disconnect the bot from the server # WARNING: the bot will not reconnect until manually reset elif msg.command in {"!kill", ".kill"}: logger.debug("Kill request detected from " + msg.sender.lower()) if self.has_op(msg.sender): # actually kills the bot if the sender is privelaged self.send("QUIT Kill requested by " + msg.sender + "\n") raise KillException else: for command in self.commands: try: command(self, msg) except NameException as e: logger.error(traceback.format_exc()) message = "There was a problem looking up data for " + str(e) + ". " message += "Do they have an SRL profile?" self.sendmsg(msg.channel, message) except Exception as e: logger.error(traceback.format_exc()) self.sendmsg(msg.channel, "Something weird happened...")
def update_document(p_cursor, source_doc, p_id_field, p_table): # Manage SQL parameters sql_fields = "({0})".format(",".join(source_doc.keys())) sql_update_fields_values = ",".join(["{field}=%s".format(field=field) for field in source_doc.keys() if field != p_id_field]) try: insert_sql = "INSERT INTO {table} {fields} SELECT {values}".format( table=p_table, fields=sql_fields, values=('%s,' * len(source_doc.values()))[:-1] ) update_sql = "UPDATE {table} SET {update_fields_values} WHERE {id_field} = {id_value}".format( table=p_table, update_fields_values=sql_update_fields_values, id_field=p_id_field, id_value=source_doc[p_id_field] ) sql = "WITH upsert AS ({update_sql} RETURNING *) {insert_sql} WHERE NOT EXISTS (SELECT * FROM upsert);".format( update_sql=update_sql, insert_sql=insert_sql ) parameters = [source_doc[key] for key in source_doc.keys() if key != p_id_field] + [source_doc[key] for key in source_doc.keys()] p_cursor.execute(sql, parameters) except psycopg2.Error as e: logger.error("Document not inserted in PostgreSQL Database %s", source_doc) logger.error(e)
def main(): size = 10000 # lengt of vector v ITER = 50 # number of iterations to run for each report comm = MPI.COMM_WORLD # size = comm.Get_size() # Configure and connect to the Kafka data backbone server. # This is done only once by the rank 0 MPI process. # if comm.rank == MASTER: # Parse the command line # try: options = parse_commandline() except Exception, ex: logger.error("Command line parsing error: %s", str(ex)) comm.Abort(-1) # Connect to Kafka # kw_cfg = KafkaWriterConfig(Host=options.KafkaHost, Topic=options.KafkaTopic) try: kw = KafkaWriter(kw_cfg) kw.connect() logger.info('connecting to Kafka backbone via %s', kw_cfg.Host) except Exception, ex: logger.error("Error connecting to Kafka backbone via %s: %s", kw_cfg.Host, str(ex) ) comm.Abort(-1)
def __init__(self, request, client_address, server): try: self.sqlite = DatabaseObject(DB_CONFIG['SQLITE']) self.table_name = 'proxy' except Exception, e: self.sqlite = '' logger.error('SQLite error: %s', e)
def resolve_dependencies(self): # update the definition from a reference, if there is one if self.content_reference is not None: if '#' != self.content_reference[:1]: raise Exception("Only relative 'contentReference' element definitions are supported right now") elem = self.element.profile.element_with_id(self.content_reference[1:]) if elem is None: raise Exception("There is no element definiton with id \"{}\", as referenced by {} in {}" .format(self.content_reference, self.path, self.profile.url)) self._content_referenced = elem.definition # resolve bindings if self.binding is not None and self.binding.is_required and (self.binding.valueSet is not None or self.binding.legacy_uri is not None or self.binding.legacy_canonical is not None): uri = self.binding.valueSet or self.binding.legacy_canonical or self.binding.legacy_uri if 'http://hl7.org/fhir' != uri[:19]: logger.debug("Ignoring foreign ValueSet \"{}\"".format(uri)) return # remove version from canonical URI, if present, e.g. "http://hl7.org/fhir/ValueSet/name-use|4.0.0" if '|' in uri: uri = uri.split('|')[0] valueset = self.element.profile.spec.valueset_with_uri(uri) if valueset is None: logger.error("There is no ValueSet for required binding \"{}\" on {} in {}" .format(uri, self.name or self.prop_name, self.element.profile.name)) else: self.element.valueset = valueset self.element.enum = valueset.enum
def player_buy(self, player, coins, card_name): """ A player tries to buy a card from a buying pile. """ card_class = self.piles[card_name] # enough money and cards left in the pile if coins >= card_class.cost: if card_class.qty_left > 0: card = card_class(self) if card.qty_left == 0: self.num_piles_gone += 1 player.buy_card(card) for p in self.table: p.ntf_buy(player, card) else: # the client should not have sent a buy for that card logger.error( "player %s" % player.name + " wants to buy a %s" % card_class.name + " but there are %d left in the pile" % card_class.qty_left ) else: # the client should not have sent a buy for that card logger.error( "player %s" % player.name + " wants to buy card %s" % card_class.name + " but he has only %d coins" % coins )
def main(): args = parse_argument() try: if args.socks5[0] and args.socks5[1]: if args.proxy: logger.error('invalid proxy protocol count.') raise SystemExit socks.set_default_proxy(socks.SOCKS5, args.socks5[0], int(args.socks5[1]), True, args.socks5[2], args.socks5[3]) socket.socket = socks.socksocket except Exception as e: logger.error('invalid socks5 proxy arguments.') raise SystemExit t = Thread(args.board, args.thread) if not args.downloading: thread_info = t.thread_info() logger.info('/{}/ - {} - {}'.format(args.board, thread_info['sub'], const.BOARDS[args.board])) logger.info('total images - {}'.format(thread_info['images'])) else: downloader = Downloader(path=args.path, threads=args.threads, timeout=args.timeout, is_thumb=args.thumb) q = t.detail_queue() downloader.fetch(q)
def monitor_index_state(self, host, index_name=None): logger.info('Monitoring index state') if index_name is not None: statement = 'SELECT * FROM system:indexes' else: statement = 'SELECT * FROM system:indexes WHERE name = "{}"'.format(index_name) is_building = True while is_building: time.sleep(self.POLLING_INTERVAL) response = self.exec_n1ql_statement(host, statement) if response['status'] == 'success': for result in response['results']: if result['indexes']['state'] != 'online': break else: is_building = False else: logger.error(response['status']) if index_name is None: logger.info('All Indexes: ONLINE') else: logger.info('Index "{}" is ONLINE'.format(index_name))
def check_response(self): "Error codes: https://dev.twitter.com/docs/error-codes-responses" try: api_call = (self.api_type, int(self.status['x-rate-limit-remaining']), int(self.status['x-rate-limit-limit']), int(self.status['x-rate-limit-reset']), self.status) # Sometime get a wrong answer from twitter like expiration in 1981 # Retry after a pause. Need to check later if the error is not # something I do wrong but seems wrong on their behalf except KeyError: # api_call = (self.api_type, None, None, None, self.status) print(self.resp) logger.error(self.resp) logger.error(self.status) time.sleep(30) return self.create_URL() if self.status_code == 200: if 'error' in self.response: logger.error('Error in response: {}'.format(self.response)) # not existing resource if self.response['error'][0]['code'] == '34': return TwitterResponse(34, None, self.response_time, api_call) # rate limit for the specific resource elif self.response['error'][0]['code'] == '88': if self.control_pause: logger.info('Pause - 88') return TwitterResponse(88, None, self.response_time, api_call) else: return self.pause_API() else: return TwitterResponse(200, self.response, self.response_time, api_call) # Supposedly not right resource, seems to raise when # Try to get information from a secured account elif self.status_code == 401: logger.error(self.response) return TwitterResponse(401, 'protected', self.response_time, api_call) elif self.status_code == 429: if self.control_pause: return TwitterResponse(429, None, self.response_time, api_call) else: return self.pause_API() elif self.status_code in [500, 502, 503, 504]: logger.error('Twitter Internal error pause for 30 sec: {}'.format(self.response)) # print 'Check Response: Twitter Internal error: Pause for 30s' time.sleep(30) return self.create_URL() elif self.status_code == 404: return TwitterResponse(404, None, self.response_time, api_call) elif self.status_code == 403: return TwitterResponse(403, 'User suspended', self.response_time, api_call) else: return TwitterResponse(int(self.status_code), None, self.response_time, api_call)
def get(self, model): try: while self.lock[model]: time.sleep(0.5) return self.conn[model] except KeyError: logger.error('KeyError: Error model name.')
def init(self, model, colume): try: cur = self.conn[model].cursor() cur.execute('CREATE TABLE item(%s)'%colume) self.conn[model].commit() except KeyError: logger.error('KeyError: Error model name.')
def request_assignments(self): available_devices = self.get_available_devices() if len(available_devices) > 0: try: viable_requests = utils.get_viable_process_requests( self.host, self.token, method=self.method ) for request in viable_requests['data']: # Request assignments for the number of available devices if len(available_devices) <= 0: return # request ProcessRequest assignment logger.info( "Requesting assignment of PR %s", str(request["id"]) ) utils.request_pr_assignment( self.host, self.token, request['id'], method=self.method ) available_devices.pop() except Exception: logger.error( "Error trying to request assignments", exc_info=True ) return
def graph(): query = request.args.get('q', None) _render = request.args.get('render', 'True') _render = False if _render == 'False' else True logger.debug('QUERY: ' + query) start_timer = datetime.now() if query is not None and _render: start, end, expressions, data_frame = parse_query(query) if expressions[0].name == 'metric': chart = generate_graph(query, start, end, data_frame) table = '' elif expressions[0].name == 'log': chart = '{}' table = generate_table(start, end, data_frame) else: logger.error("Unknown expression type: %s" % str(type(expressions[0]))) chart = '{}' table = '' else: chart = "{}" table = '' # query = '' end_timer = datetime.now() logger.debug('Took %0.02fs to process this request' % (end_timer-start_timer).total_seconds()) return render('graph.html', 'Graph', chart=chart, table=table, q=query)
def create(self, user, name, descripton=u''): """ Create a new group :param user: Owner :param name: Group name :param descripton: Group description :return: a group entity """ assert user is not None and name, u"'user' and 'name' is required arguments" group = self.db.Group() group.user = user group.name = name group.description = descripton try: group.validate() group.save() except Exception as ex: logger.error(u"Group.create", ex.message) return None return group
def rename(self, user, name, new_name): """ Renames user's group. Method finds group by name and try rename it. :param user: Owner :param name: Current group name :param new_name: New group name :return: Group with new name or None """ assert user is not None and name and new_name, u"'user', 'name' and 'new_name' is required parameters" group = self.single(user=user, name=name) if group is None: message = u'GroupServicce.rename. Group {0} not found'.format(name) logger.error(message) return None group.name = new_name try: group.validate() group.save() except Exception as ex: error(u"Group.rename", ex.message) return None return group
def login(self): logger.info('login...') if os.path.isfile(COOKIE): self.cookieJar.load(ignore_discard=True, ignore_expires=True) else: self._verifycode = self._getverifycode() self.pswd = self._preprocess(self._pw, self._verifycode) self._headers.update({"Referer":"http://ui.ptlogin2.qq.com/cgi-bin/login?target=self&style=5&mibao_css=m_webqq&appid={0}&enable_qlogin=0&no_verifyimg=1&s_url=http%3A%2F%2Fweb.qq.com%2Floginproxy.html&f_url=loginerroralert&strong_login=1&login_state=10&t=20121029001".format(self.appid)}) url = "http://ptlogin2.qq.com/login?u={0}&p={1}&verifycode={2}&aid={3}&u1=http%3A%2F%2Fweb.qq.com%2Floginproxy.html%3Flogin2qq%3D1%26webqq_type%3D10&h=1&ptredirect=0&ptlang=2052&from_ui=1&pttype=1&dumy=&fp=loginerroralert&action=3-25-30079&mibao_css=m_webqq&t=1&g=1".format(self.qq, self.pswd, self._verifycode[1], self.appid) res = self._request(url=url, cookie=True) if res.find("成功") != -1: pass elif res.find("验证码") != -1: print("验证码错误") self._getverifycode() self.login() else: logger.error(res) raise Exception("登陆错误") self.cookies.update(dict([(x.name,x.value) for x in self.cookieJar])) tmp = self.get_login_info() logger.debug(tmp) if os.path.isfile(COOKIE) and tmp: #cookie timeout self._login_info.update(tmp) self.name_info() self.__poll() else: self.login()
def run( arguments ): '''run( arguments ) Parses and executes the given command-line `arguments`. Parameters: - arguments: A list of strings representing the command-line arguments to the ``restblog`` executable, e.g. ``sys.argv[1:]`` ''' names = getCommandNames() if arguments and arguments[0] in names: # We are invoking a subcommand, e.g. restblog list name = arguments[0] arguments = arguments[1:] else: # Running main command by itself, i.e. restblog name = 'restblog' try: command = loadCommandByName( name ) command.run( arguments ) except Exception, ex: logger.error( 'Unable to execute %(name)s command.', locals() ) logger.error( 'Details: %(ex)s', locals() ) print 'Type \'restblog %(name)s --help\' for usage.' % locals()
def image_download_handler(self): while True: #logger.info(self.url_queue.qsize()) url_info = self.url_queue.get() query = url_info[0] url = url_info[1] order_number = url_info[2] flag = url_info[3] try: image_type = "." + url.split(".")[-1][:5] filename = query+str(order_number)+image_type self.download.down_load_image((query, filename, url, int(get_conf.find(("image", ))["try_cnt"]))) if flag == 1: pass elif flag == 0: with global_lock: with open(get_conf.find(("file", ))["finished_home"], "a+") as f: f.write(query+"\n") else: self.url_queue.put(url_info) with global_lock: with open(get_conf.find(("file", ))["finished_home"], "a+") as f: fp1 = f.readlines() if fp1[-1].strip() != query: f.write(query + "\n") break except Exception as e: logger.error(e)
def _get_selected_file(directory): files = directory.get_files() if (len(files) > 0): _print_files_list(files) return _select_file(files) else: logger.error("The input directory hasn't files\n")
def getBestArtistMatch(artist_name): with mb_lock: attempt = 0 if any((c in set('!?*')) for c in artist_name): artist_name = '"' + artist_name + '"' while attempt < 10: try: artist_results = query.getArtists(webservice.ArtistFilter(query=artist_name, limit=1)) if artist_results: includes = webservice.ArtistIncludes(releases=(model.Release.TYPE_ALBUM, model.Release.TYPE_OFFICIAL)) artist = artist_results[0].artist # Unfortunately, the search results do not contain release information, we have to query again for it... return query.getArtistById(utils.extractUuid(artist.id), includes) else: return None break except WebServiceError, e: logger.error('Attempt to query MusicBrainz for Artist %s failed: %s. Retrying in 10 seconds...' % (artist_name, e)) attempt += 1 time.sleep(10)
def __init__(self): """Create or get a new instance (singleton). """ # keneldict holds all configuration self._kerneldict = dict() cwd = os.path.dirname(os.path.realpath(__file__)) config_files = glob.glob("{0}/configs/*.json".format(cwd)) try: for f in config_files: logger.info("Loading kernel configurations from {0}".format(f)) print("Loading kernel configurations from {0}".format(f)) # load file into a dictionary json_data=open(f) data = json.load(json_data) kernel_name = data["kernel_name"] kernel_cfgs = data["kernel_configs"] logger.debug("Found MD kernel '{0}' configurations for {1}".format(kernel_name, kernel_cfgs.keys())) self._kerneldict[data["kernel_name"]] = data["kernel_configs"] except Exception, ex: logger.error("Error loading JSON file: {0}".format(str(ex))) raise
def post(self, group_id): """ :param group_id: :return: """ try: cards = json.loads(request.data) user = get_user() success = [] for id in cards: card = db.Card.find_one({'_id': ObjectId(id)}) if not card: break card.is_studying = False card.save() self._add_to_training(user, card) success.append(id) return { 'data': success } except Exception as ex: logger.error(ex.message) api.abort(500)
def update_local_cache_groups(self): new_cache_groups = set() local_addresses = [] local_ips = h.ips_set(self.hostname) logger.info('Local ips: {0}'.format(local_ips)) for address in self.session.routes.addresses(): if address.host in local_ips: local_addresses.append(address) for address in local_addresses: try: s = self.session.clone() s.set_direct_id(address) msre = s.monitor_stat(address, categories=elliptics.monitor_stat_categories.backend).get()[0] for backend in msre.statistics['backends'].itervalues(): if backend['status']['state'] != 1: continue backend_base_path = backend.get('backend', {}).get('config', {}).get('data', None) if backend_base_path and backend_base_path.startswith(self.cache_path_prefix): new_cache_groups.add(backend['backend']['config']['group']) except Exception as e: logger.error('Failed to fetch monitor stat from address {0}: {1}\n{2}'.format( address, e, traceback.format_exc())) continue logger.info('Updated list of local cache groups (local addresses: {0}): {1}'.format( local_addresses, list(new_cache_groups))) self.__local_cache_groups = new_cache_groups
def added(self, id = None, page=0, play='false', f = None, date = None, dir = None): try: page = int(page) except: page = 0 limit = 17 if play == 'true' and id is not None: if date is None: id = None play = 'false' else: try: callingDate = datetime.strptime(date, "%a %b %d %H:%M:%S %Y %Z") currentDate = datetime.utcnow() - timedelta(seconds = 60) # Sat Feb 23 19:35:57 2013 GMT popcorn hour example call #callingDate = datetime.utcfromtimestamp(date) if callingDate < currentDate: id = None play = 'false' logger.warn('added', 'Play request time < now - 60 (%s < %s).', str(callingDate), str(currentDate)) except: id = None play = 'false' logger.error('added', 'Movie.py', 'Error converting UTC Javascript date for %s.', date) if id is not None: self.content.update_media_watched("movie",id) filters = () if f is not None: pFilter = Filter.ParseFilter(f) if pFilter is not None: filters += pFilter, movies=self.content.get_movies(page, limit, filters, 'm.added DESC') total = self.content.get_total_items("movie m", "m.id", None, Filter("movie", {"movie": "m"}, filters)) pPage = self.content.get_prev_page(page, limit, "movie", total) nPage = self.content.get_next_page(page, limit, "movie", total) filterLetters = self.content.get_available_letters("movie m", "m.file_name", None, None) tmpl = lookup.get_template("movies/movies.html") startOn = 'moviename_0' if id is not None and play == 'true': startOn = 'movieid_' + id elif dir is not None: startOn = 'moviename_' + str(len(movies)-1) return tmpl.render(movies = movies , prevPage = pPage , nextPage = nPage , totalPages = int(ceil(total/limit)) , page = page , play = play , selected = Filter.getFilterValue(Filter.FILTER_LETTER, filters) , filterUrl = "f=" + (f if f is not None else '') , filterLetters = filterLetters , pageName = 'added' , id = id , startOn = startOn)
def _check_valid_merge_map(self, merge_map: Dict[str, str]): # Check merge_map is not empty. if len(merge_map) == 0: logger.error(f'len(merge_map) == 0') raise Exception # Check that the object settings are valid. self._check_valid_object_settings() # Check that all of the classes in merge_map exist in the object settings. invalid_classes = [] for src_class, dst_class in merge_map.items(): if src_class not in self.obj_config.exported_object_classes: invalid_classes.append(src_class) if dst_class not in self.obj_config.exported_object_classes: invalid_classes.append(dst_class) if len(invalid_classes) > 0: logger.error(f'Found invalid classes in merge_map.') logger.error(f'Invalid classes: {invalid_classes}') logger.error( f'self.obj_config.exported_object_classes: {self.obj_config.exported_object_classes}' ) raise Exception # Check that the maps are possible availability_map = {} erroneous_maps = {} for class_name in self.obj_config.exported_object_classes: availability_map[class_name] = None for src_class, dst_class in merge_map.items(): if availability_map[src_class] is None: if availability_map[dst_class] is not None: logger.error( f'Cannot map {src_class} to {dst_class} because {dst_class} is mapped to {availability_map[dst_class]}' ) raise Exception availability_map[src_class] = dst_class else: if src_class not in erroneous_maps: erroneous_maps[src_class] = [dst_class] else: erroneous_maps[src_class].append(dst_class) if len(erroneous_maps) > 0: logger.error(f'Invalid merge_map: {merge_map}') for class_name, dst_class_names in erroneous_maps.items(): logger.error( f'{class_name} is already mapped to {availability_map[class_name]}' ) logger.error( f'Failed to map {class_name} to: {erroneous_maps[class_name]}' ) raise Exception
def rescale_shift_until_valid(self, frame_shape: list, target_aspect_ratio: float, max_retry_count: int = 5) -> ConstantAR_BBox: result = self mode = 'c' pad_direction = 'height' frame_h, frame_w = frame_shape[:2] retry_count = -1 success = False backup = self.copy() while retry_count < max_retry_count: retry_count += 1 bounds, edge_orientation, new_rect = result.rescale_shift_bbox( frame_shape=frame_shape, target_aspect_ratio=target_aspect_ratio, pad_direction=pad_direction, mode=mode) [x_is_in_bounds, y_is_in_bounds] = bounds [is_left_edge, is_right_edge, is_top_edge, is_bottom_edge] = edge_orientation [new_xmin, new_ymin, new_xmax, new_ymax] = new_rect if x_is_in_bounds and y_is_in_bounds: success = True result = ConstantAR_BBox.from_list( [new_xmin, new_ymin, new_xmax, new_ymax]) break elif x_is_in_bounds and not y_is_in_bounds: new_ymin, new_ymax = 0, frame_h pad_direction = 'width' if is_left_edge and not is_right_edge: mode = 'cl' elif is_right_edge and not is_left_edge: mode = 'cr' else: mode = 'c' result = ConstantAR_BBox.from_list( [new_xmin, new_ymin, new_xmax, new_ymax]) elif not x_is_in_bounds and y_is_in_bounds: new_xmin, new_xmax = 0, frame_w pad_direction = 'height' if is_top_edge and not is_bottom_edge: mode = 'ct' elif is_bottom_edge and not is_top_edge: mode = 'cb' else: mode = 'c' result = ConstantAR_BBox.from_list( [new_xmin, new_ymin, new_xmax, new_ymax]) elif not x_is_in_bounds and not y_is_in_bounds: new_ymin, new_ymax = 0, frame_h new_xmin, new_xmax = 0, frame_w pad_direction = 'height' mode = 'c' result = ConstantAR_BBox.from_list( [new_xmin, new_ymin, new_xmax, new_ymax]) else: raise Exception if not success: logger.error( f"Couldn't obtain target aspect ratio within {max_retry_count} retries." ) raise Exception result.check_bbox_aspect_ratio(target_aspect_ratio=target_aspect_ratio) return result
def request(self, method, url, **kwargs): """ Constructs and sends a :py:class:`requests.Request`. Returns :py:class:`requests.Response` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or \ a (`connect timeout, read timeout <user/advanced.html#timeouts>`_) tuple. :type timeout: float or tuple :param allow_redirects: (optional) Set to True by default. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. """ # prepend url with hostname unless it's already an absolute URL url = self._build_url(url) logger.debug(" Start to {method} {url}".format(method=method, url=url)) logger.debug(" kwargs: {kwargs}".format(kwargs=kwargs)) # store meta data that is used when reporting the request to locust's statistics request_meta = {} # set up pre_request hook for attaching meta data to the request object request_meta["method"] = method request_meta["start_time"] = time.time() if "httpntlmauth" in kwargs: from requests_ntlm import HttpNtlmAuth auth_account = kwargs.pop("httpntlmauth") kwargs["auth"] = HttpNtlmAuth( auth_account["username"], auth_account["password"]) response = self._send_request_safe_mode(method, url, **kwargs) request_meta["url"] = (response.history and response.history[0] or response) \ .request.path_url # record the consumed time request_meta["response_time"] = int((time.time() - request_meta["start_time"]) * 1000) self._response_time = request_meta["response_time"] # get the length of the content, but if the argument stream is set to True, we take # the size from the content-length header, in order to not trigger fetching of the body if kwargs.get("stream", False): request_meta["content_size"] = int(response.headers.get("content-length") or 0) else: request_meta["content_size"] = len(response.content or "") request_meta["request_headers"] = response.request.headers request_meta["request_body"] = response.request.body request_meta["status_code"] = response.status_code request_meta["response_headers"] = response.headers request_meta["response_content"] = response.content logger.debug(" response: {response}".format(response=request_meta)) try: response.raise_for_status() except RequestException as e: logger.error(" Failed to {method} {url}! exception msg: {exception}".format( method=method, url=url, exception=str(e))) else: logger.debug( """ status_code: {}, response_time: {} ms, response_length: {} bytes""" \ .format(request_meta["status_code"], request_meta["response_time"], \ request_meta["content_size"])) return response
def inner(*args, **kwargs): try: return f(*args, **kwargs) except IndexError as error: logger.error(f"""IndexError Error: {error} traceback : {traceback.format_exc()}""") except KeyError as error: logger.error(f"""KeyError Error: {error} traceback : {traceback.format_exc()}""") except NameError as error: logger.error(f"""NameError Error: {error} traceback : {traceback.format_exc()}""") except SyntaxError as error: logger.error(f"""SyntaxError Error: {error} traceback : {traceback.format_exc()}""") except TypeError as error: logger.error(f"""TypeError Error: {error} traceback : {traceback.format_exc()}""") except RuntimeError as error: logger.error(f"""RuntimeError Exception {error} traceback : {traceback.format_exc()}""") except Exception as error: logger.error(f""" {f.__name__} requests UNEXPECTED FAILURE {error} traceback : {traceback.format_exc()}""")
def run_import(type_doc=None, source_file=None): conf = json.load(open('./init-conf.json')) # Command line args arguments = docopt(__doc__, version=conf['version']) configure(conf['log']['level_values'][conf['log']['level']], conf['log']['dir'], conf['log']['filename'], conf['log']['max_filesize'], conf['log']['max_files']) # # Création du mapping # es_mappings = json.load(open('data/es.mappings.json')) # Connexion ES métier try: param = [{ 'host': conf['connectors']['elasticsearch']['host'], 'port': conf['connectors']['elasticsearch']['port'] }] es = Elasticsearch(param) logger.info('Connected to ES Server: %s', json.dumps(param)) except Exception as e: logger.error('Connection failed to ES Server : %s', json.dumps(param)) logger.error(e) # Création de l'index ES metier cible, s'il n'existe pas déjà index = conf['connectors']['elasticsearch']['index'] if not es.indices.exists(index): logger.debug("L'index %s n'existe pas : on le crée", index) body_create_settings = { "settings": { "index": { "number_of_shards": conf['connectors']['elasticsearch']['number_of_shards'], "number_of_replicas": conf['connectors']['elasticsearch']['number_of_replicas'] }, "analysis": { "analyzer": { "lower_keyword": { "type": "custom", "tokenizer": "keyword", "filter": "lowercase" } } } } } es.indices.create(index, body=body_create_settings) # On doit attendre 5 secondes afin de s'assurer que l'index est créé avant de poursuivre time.sleep(2) # Création des type mapping ES for type_es, properties in es_mappings['geopro'].items(): logger.debug("Création du mapping pour le type de doc %s", type_es) es.indices.put_mapping(index=index, doc_type=type_es, body=properties) time.sleep(2) # # Import des données initiales # # Objet swallow pour la transformation de données swal = Swallow() # Tentative de récupération des paramètres en argument type_doc = arguments['--type_doc'] if not type_doc else type_doc source_file = arguments['--source_file'] if not source_file else ( './upload/' + source_file) if arguments['--update']: if type_doc in [ 'referentiel_activites', 'referentiel_communes', 'communes', 'activites_connexes' ]: # Suppression des docs logger.debug("Suppression des documents de type %s", type_doc) p_es_conn.delete(index=p_es_index, doc_type=type_doc) time.sleep(3) # On lit dans un fichier if type_doc == "communes": reader = JsonFileio() swal.set_reader(reader, p_file=source_file) else: reader = CSVio() swal.set_reader(reader, p_file=source_file, p_delimiter=';') # On écrit dans ElasticSearch writer = ESio(conf['connectors']['elasticsearch']['host'], conf['connectors']['elasticsearch']['port'], conf['connectors']['elasticsearch']['bulk_size']) swal.set_writer(writer, p_index=conf['connectors']['elasticsearch']['index'], p_timeout=30) # On transforme la donnée avec la fonction swal.set_process(file_to_elasticsearch, p_type=type_doc, p_es_conn=es, p_es_index=conf['connectors']['elasticsearch']['index'], p_arguments=arguments) if arguments['--init']: logger.debug("Opération d'initialisation") elif arguments['--update']: logger.debug("Opération de mise à jour") else: logger.error("Type d'opération non défini") logger.debug("Indexation sur %s du type de document %s", conf['connectors']['elasticsearch']['index'], type_doc) swal.run(1) logger.debug("Opération terminée pour le type de document %s ", type_doc)
def __checker_init(self): self.__s = Solver() secret_list = [] mask_list = [] unimportant_list = [] variables = [] self.__variables_stable = {} self.__variables_transient = {} variables_activation = {} for node in self.__circuit.nodes(): if self.__circuit[node]['node_type'] == 'port': for label in self.__labels[str(node)]: variables += [label] label_type = label.split('_')[0] if label_type == 's': secret_list += [label] elif label_type == 'm': mask_list += [label] elif label_type == 'y': unimportant_list += [label] else: logger.error('Unknown label type of the label {} for the node {}'.format(label, node)) exit(-1) variables = sorted(set(variables) - set(unimportant_list)) secret_list = set(secret_list) mask_list = set(mask_list) self.__masks = mask_list self.__secrtes = secret_list indep_activation_sums = {} for share in self.__shares: indep_activation_sums[share] = Int('share_activation_sum_{}'.format(share)) indep_activation_sums['output'] = Int('share_activation_sum_output') for node in self.__circuit.nodes(): self.__variables_stable[node] = [Bool('{}_{}_stable'.format(v, node)) for v in variables] variables_activation[node] = Bool('activation_{}'.format(node)) if self.__mode == 'transient': self.__variables_transient[node] = [Bool('{}_{}_transient'.format(v, node)) for v in variables] shares_acivation = {} for share in self.__shares: shares_acivation[share] = {} for n in self.__shares[share]: shares_acivation[share][n] = Bool('activation_share_{}'.format(n)) shares_acivation['output'] = {} for o in self.__outputs: shares_acivation['output'][o] = Bool('activation_share_{}'.format(o)) for share in self.__shares: indep_activation_sums[share] = Sum([If(shares_acivation[share][n], 1, 0) for n in self.__shares[share]]) self.__s.add(indep_activation_sums[share] <= self.__order) self.__s.add(indep_activation_sums[share] >= 0) indep_activation_sums['output'] = Sum([If(shares_acivation['output'][n], 1, 0) for n in shares_acivation['output']]) self.__s.add(indep_activation_sums['output'] <= self.__order) self.__s.add(indep_activation_sums['output'] >= 0) variables_checking_gate = {} for var in variables: variables_checking_gate[var] = Bool('{}_checking_gate'.format(var)) lst = [] for share in shares_acivation: for node in shares_acivation[share]: try: ind = [str(i) for i in self.__variables_transient[int(node)]].index('{}_{}_transient'.format(var, node)) lst += [And(shares_acivation[share][node], self.__variables_transient[int(node)][ind])] except: ind = [str(i) for i in self.__variables_transient[node]].index('{}_{}_transient'.format(var, node)) lst += [And(shares_acivation[share][node], self.__variables_transient[node][ind])] variables_checking_gate[var] = self.__xor_list(lst) checking_secrets = [] checking_masks = [] for var in variables_checking_gate: v = '_'.join(str(var).split('_')[:2]) if v in secret_list: checking_secrets += [variables_checking_gate[var]] elif v in mask_list: checking_masks += [variables_checking_gate[var]] self.__s.add(And([Or(checking_secrets)] + [Not(v) for v in checking_masks]))
from cassandra.cluster import Cluster from logger import logger import json try: cas_clusters = Cluster(['192.168.85.128', '192.168.85.130']) cas_db_session = cas_clusters.connect('naqaba') logger.info("Target nodes connected") except Exception as e: logger.error(e) def get_bus_locations(**kwrgs): cql_query = 'select lat, long, speed, cast(record_time as text) as rec_time ' \ 'from vehicle_locations where bus_id=' + "'%s'" % kwrgs['bus_id'] + ' ALLOW FILTERING;' data = [] raw_data = cas_db_session.execute(cql_query) columns = raw_data.column_names for row in raw_data.current_rows: data.append(dict(zip(columns, row))) response = json.dumps(data) return response
def show(self, memory, event_handler, clear_page_and_print_title, menu_header): clear_page_and_print_title(BETTER_FOOD) ic() subcategory_name = memory["subcategory_name"] product = memory["product"] best_products = memory["best_products"] print() print(subcategory_name) print() print("Produit à remplacer: ", product.product_name, end=" ") print(" | Nutriscore: ", product.nutriscore, end=" | Marque: ") # Below, str form is possible because of __repr__ in Product class print(product.list_of_brands) print() print("Que souhaitez-vous faire ?") print(f"{menu_header}") if best_products: print("Ou Ajouter un produit en favori ?") print() for i, pair in enumerate(best_products): product = pair[0] print(f"[{INDEX_OF_FIRST_PROD + i}] {product}", end=" | ") print(f"Nutriscore: {product.nutriscore}", end=" | ") print(f"Marque: {product.list_of_brands}") print(f" Ingrédients: {product.ingredients}") print(f" Lieux de vente: {product.list_of_stores}") print("_______________________________") print() else: print("Il n'y a pas de produit de substitution pour cet", end=" ") print("article dans la base de données !") print() choice = input(": ") try: choice = int(choice) event_handler( SUBSTITUTE, GET_A_BETTER_FOOD_PAGE, choice, ) except IndexError: event_handler(SUBSTITUTE, GET_A_BETTER_FOOD_PAGE, CHOICE_ERROR) except ValueError: event_handler(SUBSTITUTE, GET_A_BETTER_FOOD_PAGE, CHOICE_ERROR) except Exception as e: e_traceback = traceback.format_exc() logger.error(f""" {ERROR_COLOR} ****************************************** {e_traceback} ****************************************** {str(e)}""") sys.exit(ic()) self.show(controller_food_categories, event_handler, clear_page_and_print_title, menu_header, get_food_cat_index)
def getGoodInfos(self, category, pageurl): try: logger.debug('pageurl: {}'.format(pageurl)) result_datas = [] timeout = 60 startTime = datetime.now() endTime = datetime.now() while (endTime - startTime).seconds < timeout: try: page_source = self.getHtml(pageurl, self.header) pattern = re.compile(r'g_page_config = [\s\S]*?\};', re.M) page_infos = pattern.findall(page_source)[0] if page_infos.startswith('g_page_config = null;'): raise ValueError page_infos = page_infos[16:-1] page_infos = page_infos.replace('false', 'False').replace( 'true', 'True') page_infos = ast.literal_eval(page_infos) break except Exception: endTime = datetime.now() else: raise TimeoutException('getGoodInfos timeout') goods_infos = page_infos.get('mods').get('itemlist').get( 'data').get('auctions') for goods_info in goods_infos: resultData = {} resultData['CHANNEL'.lower()] = self.Channel resultData['KIND'.lower()] = category resultData['SITE'.lower()] = 's.taobao' resultData['STATUS'.lower()] = '01' try: good_id = goods_info['nid'] resultData['PRODUCT_ID'.lower()] = good_id good_url = self.good_url.format(good_id) resultData['LINK'.lower()] = good_url good_img_big = goods_info['pic_url'] resultData['MAIN_IMAGE'.lower()] = self.http_url.format( good_img_big) resultData['NAME'.lower()] = goods_info['raw_title'] resultData['DESCRIPTION'.lower()] = goods_info.get('nick') resultData['Currency'.lower()] = self.currency resultData['AMOUNT'.lower()] = goods_info['view_price'] resultData['CREATE_TIME'.lower()] = time.strftime( '%Y%m%d%H%M%S', time.localtime(time.time())) good_dealcnt = goods_info.get('view_sales', '0') pattern = re.compile(r'^\d+', re.M) good_dealcnt = int(pattern.findall(good_dealcnt)[0]) resultData['DISPLAY_COUNT'.lower()] = good_dealcnt result_datas.append(resultData) except Exception, e: logger.error('error: {}'.format(e)) logger.error('goods_info: {}'.format(goods_info)) continue if len(goods_infos) != len(result_datas) or not result_datas: logger.error('len goods_infos: {},len result_datas: {}'.format( goods_infos, result_datas)) logger.error('result_datas: {}'.format(result_datas)) raise ValueError('get result_datas error') return result_datas
app.config['DATABASE_HOST'] = 'localhost' try: # Connection loading # conn = psycopg2.connect("dbname='%s' user='******' host='%s' password='******'" %( app.config['DATABASE_DB'], # app.config['DATABASE_USER'], # app.config['DATABASE_HOST'], # app.config['DATABASE_PASSWORD'])) conn = psycopg2.connect(database=app.config['DATABASE_DB'], user=app.config['DATABASE_USER'], password=app.config['DATABASE_PASSWORD'], host=app.config['DATABASE_HOST']) cursor = conn.cursor(cursor_factory=RealDictCursor) except: logger.error("ERREUR INITIALISATION ACCES DATABASE") exit() try: # Resources loading pharmacie_resource = Pharmacie(app, conn, cursor) merchandising_resource = Merchandising(app, conn, cursor) ventes_resource = Vente(app, conn, cursor) personnel_resource = Personnel(app, conn, cursor) salaire_resource = Salaire(app, conn, cursor) except: logger.error("ERREUR INITIALISATION ACCES RESOURCES") exit() ### Root REST API endpoint: display all available registered routes
def read_netcdf_file(pool, rainnc_net_cdf_file_path, source_id, variable_id, unit_id, tms_meta, fgt): """ :param pool: database connection pool :param rainnc_net_cdf_file_path: :param source_id: :param variable_id: :param unit_id: :param tms_meta: :return: rainc_unit_info: mm lat_unit_info: degree_north time_unit_info: minutes since 2019-04-02T18:00:00 """ if not os.path.exists(rainnc_net_cdf_file_path): logger.warning('no rainnc netcdf') print('no rainnc netcdf') else: """ RAINNC netcdf data extraction """ nnc_fid = Dataset(rainnc_net_cdf_file_path, mode='r') time_unit_info = nnc_fid.variables['XTIME'].units time_unit_info_list = time_unit_info.split(' ') lats = nnc_fid.variables['XLAT'][0, :, 0] lons = nnc_fid.variables['XLONG'][0, 0, :] lon_min = lons[0].item() lat_min = lats[0].item() lon_max = lons[-1].item() lat_max = lats[-1].item() print('[lon_min, lat_min, lon_max, lat_max] :', [lon_min, lat_min, lon_max, lat_max]) lat_inds = np.where((lats >= lat_min) & (lats <= lat_max)) lon_inds = np.where((lons >= lon_min) & (lons <= lon_max)) rainnc = nnc_fid.variables['RAINNC'][:, lat_inds[0], lon_inds[0]] times = nnc_fid.variables['XTIME'][:] # ts_start_date = datetime.strptime(time_unit_info_list[2], '%Y-%m-%dT%H:%M:%S') # ts_end_date = datetime.strptime(time_unit_info_list[2], '%Y-%m-%dT%H:%M:%S') + timedelta( # minutes=float(sorted(set(times))[-2])) # # start_date = datetime_utc_to_lk(ts_start_date, shift_mins=0).strftime('%Y-%m-%d %H:%M:%S') # end_date = datetime_utc_to_lk(ts_end_date, shift_mins=0).strftime('%Y-%m-%d %H:%M:%S') start_date = fgt end_date = fgt nnc_fid.close() diff = get_per_time_slot_values(rainnc) width = len(lons) height = len(lats) ts = Timeseries(pool) for y in range(height): for x in range(width): lat = float('%.6f' % lats[y]) lon = float('%.6f' % lons[x]) tms_meta['latitude'] = str(lat) tms_meta['longitude'] = str(lon) station_prefix = '{}_{}'.format(lat, lon) station_id = wrf_v3_stations.get(station_prefix) if station_id is None: add_station(pool=pool, name=station_prefix, latitude=lat, longitude=lon, description="WRF point", station_type=StationEnum.WRF) tms_id = ts.get_timeseries_id_if_exists(tms_meta) logger.info("Existing timeseries id: {}".format(tms_id)) if tms_id is None: tms_id = ts.generate_timeseries_id(tms_meta) logger.info('HASH SHA256 created: {}'.format(tms_id)) run = (tms_id, tms_meta['sim_tag'], start_date, end_date, station_id, source_id, variable_id, unit_id) try: ts.insert_run(run) except Exception: logger.error( "Exception occurred while inserting run entry {}". format(run)) traceback.print_exc() else: ts.update_latest_fgt(id_=tms_id, fgt=fgt) # to run forward # ts.update_start_date(id_=tms_id, start_date=fgt) data_list = [] # generate timeseries for each station for i in range(len(diff)): ts_time = datetime.strptime( time_unit_info_list[2], '%Y-%m-%dT%H:%M:%S') + timedelta( minutes=times[i + 1].item()) t = datetime_utc_to_lk(ts_time, shift_mins=0) data_list.append([ tms_id, t.strftime('%Y-%m-%d %H:%M:%S'), fgt, float(diff[i, y, x]) ]) push_rainfall_to_db(ts=ts, ts_data=data_list)
import requests import os from bs4 import BeautifulSoup from tinydb import TinyDB, Query from notifier import send_notification from logger import logger dir = os.path.dirname(os.path.realpath(__file__)) db = TinyDB(os.path.join(dir, 'india_database.json')) try: html_doc = requests.get("https://www.mohfw.gov.in/").text soup = BeautifulSoup(html_doc, 'html.parser') rows = soup.find_all('tr')[1:-1] except Exception as e: logger.error(e) # Error in accessing the webpage updations, insertions = [], [] try: for row in rows: info = [x.get_text().strip() for x in row.find_all('td')] if len(info) == 6: _, state, tot, fr, rec, died = info State = Query() matches = db.search(State.name == state) if matches: match = matches[0] updated, obj = False, {"name": state} tot_old, fr_old, rec_old, died_old = \ match["total_cases"], match["foreign_nationals"], \
def main(): args = parse_args() ctx = mx.gpu(args.gpu) # load_classifiers('model', ctx) load_classifier('model/bottle', ctx) # load_classifier('model/box', ctx) rgb_mean = [0, 0, 0] while True: try: if not runnable: logger.info('EXIT NOW!') break data = None for priority in range(COUNT_PRIORITY_QUEUES): queue = 'SNAPSHOT_CLASSIFICATION_INPUT_{}'.format(priority) data = dequeue(queue, False) if data: logger.info('Dequeued from {}: {}'.format(queue, data)) break if not data: time.sleep(0.1) continue image_key = data['image_file'] bboxes = data['bboxes'] context = data.get('context', None) classification_type = data.get('type', 1) # 1: bottle, 2: box output_queue = data['output_queue'] from_detectron = data.get('from_detectron', False) others_threshold = data.get('others_threshold', 0.7) logger.info('Processing {}...'.format(image_key)) result = { 'context': context, 'enqueue_at': time.time() } im = image_store.get_as_image(image_key) if im is None: continue if len(bboxes) == 0: result['detections'] = [] else: if not from_detectron: # 标注工具结果 rotate = data.get('rotate', 0) if rotate != 0: rot_mat = cv2.getRotationMatrix2D((im.shape[1] / 2, im.shape[0] / 2), rotate, 1) im = cv2.warpAffine(im, rot_mat, (im.shape[1], im.shape[0])) bboxes_ = [] for bbox in bboxes: bbox[0] = min(max(bbox[0], 0), im.shape[1]) bbox[1] = min(max(bbox[1], 0), im.shape[0]) bbox[2] = min(max(bbox[2], 0), im.shape[1]) bbox[3] = min(max(bbox[3], 0), im.shape[0]) if bbox[2] - bbox[0] > 0 and bbox[3] - bbox[1] > 0: bboxes_.append(bbox) bboxes = bboxes_ patches = [] patches += generate_patches(im, rgb_mean, bboxes) if H_FLIP_AUG: patches += generate_patches_h_flip(im, rgb_mean, bboxes) if SCALE_720_AUG: patches += generate_patches_resize(im, rgb_mean, bboxes, 720) cls_names, scores = classify_patches(patches, classification_type, 128, others_threshold) count_groups = len(patches) / len(bboxes) if count_groups > 1: cls_names, scores = merge_groups(cls_names, scores, count_groups) detections = [] for i, bbox in enumerate(bboxes): detections.append(bbox + [cls_names[i]] + [np.asscalar(scores[i])]) result['detections'] = detections else: # Detectron检出后进一步分类 patches = [] patches += generate_patches(im, rgb_mean, bboxes) if H_FLIP_AUG: patches += generate_patches_h_flip(im, rgb_mean, bboxes) if SCALE_720_AUG: patches += generate_patches_resize(im, rgb_mean, bboxes, 720) cls_names, scores = classify_patches(patches, classification_type, 128, others_threshold) count_groups = len(patches) / len(bboxes) if count_groups > 1: cls_names, scores = merge_groups(cls_names, scores, count_groups) cls_dets = {} for i, bbox in enumerate(bboxes): score = np.asscalar(scores[i]) cls_dets.setdefault(cls_names[i], []).append(bbox + [score]) if score < 0.95: save_patch(image_key, im, bbox, cls_names[i], score) # if classification_type != 1 and '其他' in cls_dets: # box中过滤“其他”,很有可能是错误检出 # del cls_dets['其他'] result['class_detections'] = cls_dets logger.info('Enqueued to {}: {}'.format(output_queue, result)) enqueue(output_queue, result) except Exception as e: logger.error(e, exc_info=True)
raise ValueError('dealCategorys get no resultDatas ') format_select = 'SELECT ID,STATUS FROM {} WHERE CHANNEL="{{channel}}" AND PRODUCT_ID="{{product_id}}" ORDER BY CREATE_TIME DESC' good_datas = resultDatas select_sql = format_select.format(self.TABLE_NAME_PRODUCT) table = self.TABLE_NAME_PRODUCT replace_insert_columns = [ 'CHANNEL', 'KIND', 'SITE', 'PRODUCT_ID', 'LINK', 'MAIN_IMAGE', 'NAME', 'DETAIL_IMAGE', 'DESCRIPTION', 'Currency', 'AMOUNT', 'CREATE_TIME', 'DISPLAY_COUNT', 'STATUS' ] select_columns = ['ID', 'STATUS'] return self._saveDatas(good_datas, table, select_sql, replace_insert_columns, select_columns) except Exception, e: logger.error('dealCategorys error: {}'.format(e)) # @retry(stop_max_attempt_number=10, wait_fixed=2000) # def __getHtmlselenium(self, url): # driver = None # try: # driver = webdriver.PhantomJS(executable_path=self.phantomjs_path) # #加载页面的超时时间 # driver.set_page_load_timeout(30) # driver.set_script_timeout(30) # driver.get(url) # driver.implicitly_wait(10) # driver.find_element_by_xpath('//*[@id="mainsrp-itemlist"]/div/div/div[1]') # page = driver.page_source.encode('utf-8') if isinstance(driver.page_source, (str, unicode)) else driver.page_source # logger.debug('driver.page_source: {}'.format(page)) # return page
def file_to_elasticsearch(p_docin, p_type, p_es_conn, p_es_index, p_arguments): doc = {} if p_type == "communes_pj": commune_pj = { 'code_localite_pj': p_docin[0], 'code_localite_insee': p_docin[1], 'code_localite_insee_pj': p_docin[2], 'libelle': p_docin[3], 'principale': True if p_docin[4] == "1" else False, } doc = [{"_id": p_docin[1], "_type": p_type, "_source": commune_pj}] return doc if p_type == "communes": tab_communes = [] for commune in p_docin['features']: code_commune = commune['properties']['code'] # Enrichissement de la commune avec le code localite Pages Jaunes try: es_doc_commune_pj = p_es_conn.get(id=code_commune, doc_type='communes_pj', index=p_es_index) except TransportError as e: logger.info( "Commune %s non présente dans le référentiel communes Pages Jaunes", code_commune) else: if es_doc_commune_pj and len(es_doc_commune_pj) > 0: code_localite_pj = es_doc_commune_pj['_source'][ 'code_localite_pj'] commune['properties']['code_pj'] = code_localite_pj tab_communes.append({ "_id": code_localite_pj, "_type": p_type, "_source": commune }) else: logger.info( "Code commune %s erroné dans le référentiel communes Pages Jaunes", code_commune) return tab_communes elif p_type == "regions": tab_regions = [] for region in p_docin['features']: tab_regions.append({ "_id": 'R' + region['properties']['code'], "_type": p_type, "_source": region }) return tab_regions elif p_type == "departements": tab_departements = [] for departement in p_docin['features']: tab_departements.append({ "_id": 'D' + departement['properties']['code'], "_type": p_type, "_source": departement }) return tab_departements elif p_type == "requetes": doc = p_docin['_source'] # Si localité : enrichissement de la donnée avec le centroide de la localité if 'typegeosimple' in doc and doc['typegeosimple'] == "L": try: code_commune_pj = doc['idlocalite'] es_commune = p_es_conn.get(id=code_commune_pj, doc_type='communes', index=p_es_index) except TransportError as e: logger.info( "Commune %s non présente dans le référentiel communes", code_commune_pj) else: if es_commune and len(es_commune) > 0: doc['position'] = { 'lat': es_commune['_source']['properties']['centroide_y'], 'lng': es_commune['_source']['properties']['centroide_x'] } else: logger.error( 'Erreur lors de la récupération de la commune %s', code_commune_pj) returned_doc = {"_type": p_type, "_source": doc} return [returned_doc]
'longitude' : longitude, 'model' : model, 'version' : version, 'variable' : variable, 'unit' : unit, 'unit_type' : unit_type } """ try: config = json.loads(open('list_config.json').read()) # source details if 'wrf_dir' in config and (config['wrf_dir'] != ""): wrf_dir = config['wrf_dir'] else: logger.error("wrf_dir not specified in config file.") exit(1) if 'model' in config and (config['model'] != ""): model = config['model'] else: logger.error("model not specified in config file.") exit(1) if 'version' in config and (config['version'] != ""): version = config['version'] else: logger.error("version not specified in config file.") exit(1) if 'wrf_model_list' in config and (config['wrf_model_list'] != ""):
'{} Incorrect format for key "{}". Should be {} but was ' 'a {}'.format(error_msg, key, req_types, key_type)) # City and topic slugs must be lower case cities_topics = program_data.get('cities') + program_data.get('topics') for ct in cities_topics: if ct.lower() != ct: errors.append( '{} City/Topic slug {} must be lower cased.'.format( error_msg, ct)) elif ct not in all_cities_topics: errors.append('{} City/Topic {} not found in data. ' 'Please add it to the correct yml file.'.format( error_msg, ct)) return errors if __name__ == '__main__': parser = argparse.ArgumentParser(description='Parse files to be checked') parser.add_argument('--directory', help='The bootcamp directory to be validated') args = parser.parse_args() directory = args.directory errors = main(args.directory) if errors: logger.error('\n{}\n'.format('\n'.join(errors))) sys.exit(1) sys.exit(0)
def __init__(self, wallet_id, *args, **kwargs): super(WalletError, self).__init__(self.__doc__.lower(), *args, **kwargs) if self.log_message_template: logger.error(self.log_message_template.format(wallet_id))
except: logger.info('Cannot parse hotel star num!') hotel.star = -1.0 try: grade_temp = grade_pat.findall(content)[0] hotel.grade = grade_temp except Exception, e: logger.info('Cannot parse hotel grade num!' + str(e)) hotel.grade = 'NULL' try: address_temp = address_pat.findall(content)[0] hotel.address = address_temp.strip() except Exception, e: logger.error('Can not parse hotel address!' + str(e)) return [] try: image_url_temp = image_url_pat.findall(content) #print image_url_temp image_url = '' if len(image_url_temp) > 0: for each_url in image_url_temp: image_url = image_url + each_url.strip() + '|' if len(image_url) > 10: hotel.img_items = image_url[:-1] else: logger.info('Can not parse hotel image urls!') hotel.img_items = 'NULL' except Exception, e:
async def plot_monitor(request): """ 画图 :param request: :return: """ data = await request.post() logger.debug(f'可视化接口请求参数为{data}') host = data.get('host') # 客户端服务器IP start_time = data.get('startTime') # 监控数据开始时间 end_time = data.get('endTime') # 监控数据结束时间 type_ = data.get('type') # 可视化类型,画端口结果还是系统结果 port_pid = data.get('port') # 端口号 disk = data.get('disk') # 磁盘号 try: if type_ == 'port': res = draw_data_from_db(host=host, port=port_pid, start_time=start_time, end_time=end_time, disk=disk) if res['code'] == 0: raise Exception(res['message']) res.update( master.get_gc( host, master.slaves['port'][master.slaves['ip'].index(host)], f'getGC/{port_pid}')) return aiohttp_jinja2.render_template('figure.html', request, context={ 'img': res['img'], 'line75': res['line75'], 'line90': res['line90'], 'line95': res['line95'], 'line99': res['line99'], 'ygc': res['ygc'], 'ygct': res['ygct'], 'fgc': res['fgc'], 'fgct': res['fgct'], 'fygc': res['fygc'], 'ffgc': res['ffgc'] }) if type_ == 'pid': res = draw_data_from_db(host=host, pid=port_pid, start_time=start_time, end_time=end_time, disk=disk) if res['code'] == 0: raise Exception(res['message']) res.update( master.get_gc( host, master.slaves['port'][master.slaves['ip'].index(host)], f'getGC/{port_pid}')) return aiohttp_jinja2.render_template('figure.html', request, context={ 'img': res['img'], 'line75': res['line75'], 'line90': res['line90'], 'line95': res['line95'], 'line99': res['line99'], 'ygc': res['ygc'], 'ygct': res['ygct'], 'fgc': res['fgc'], 'fgct': res['fgct'], 'fygc': res['fygc'], 'ffgc': res['ffgc'] }) if type_ == 'system': res = draw_data_from_db(host=host, start_time=start_time, end_time=end_time, system=1, disk=disk) if res['code'] == 0: raise Exception(res['message']) return aiohttp_jinja2.render_template('figure.html', request, context={ 'img': res['img'], 'line75': res['line75'], 'line90': res['line90'], 'line95': res['line95'], 'line99': res['line99'], 'ygc': -1, 'ygct': -1, 'fgc': -1, 'fgct': -1, 'fygc': -1, 'ffgc': -1 }) except Exception as err: logger.error(err) return aiohttp_jinja2.render_template('warn.html', request, context={'msg': err})
logger.error('page_source: {}'.format(page_source)) raise ValueError('not get valid data') format_select = r'SELECT ID FROM {} WHERE CHANNEL="{{channel}}" and LINK="{{link}}" ORDER BY CREATE_TIME DESC' good_datas = result_datas select_sql = format_select.format(self.TABLE_NAME_BANNER) table = self.TABLE_NAME_BANNER replace_insert_columns = [ 'CHANNEL', 'LINK', 'TITLE', 'MAIN_IMAGE', 'CREATE_TIME', 'STATUS' ] select_columns = ['ID'] return self._saveDatas(good_datas, table, select_sql, replace_insert_columns, select_columns) except Exception, e: logger.error('Get home goods infos error:{},retry it'.format(e)) raise def main(): startTime = datetime.now() objCaptureShopee = CaptureShopee(useragent) # 获取所有类别id # objCaptureShopee.get_department() # 查询并入库所有类别的商品信息 objCaptureShopee.dealCategorys() # # 查询并入库首页推荐商品信息 # objCaptureShopee.dealHomeGoods() # print objCaptureShopee.getGoodInfos('aaaa','https://shopee.sg/Mobile-Gadgets-cat.8?page=0') # print objCaptureShopee.getHtml('https://shopee.sg/api/banner/get_list?type=activity',objCaptureShopee.header) # print objCaptureShopee.getHtmlselenium('https://shopee.sg')
def _fetch(url, dest_filename, multipart_chunksize): '''下载单个大文件''' t0 = time.time() # 如果没有指定本地保存时的文件名,则默认使用 URL 中最后一部分作为文件名 official_filename = dest_filename if dest_filename else url.split('/')[-1] # 正式文件名 temp_filename = official_filename + '.swp' # 没下载完成时,临时文件名 config_filename = official_filename + '.swp.cfg' # 没下载完成时,存储 ETag 等信息的配置文件名 # 获取文件的大小和 ETag r = custom_request('HEAD', url, info='header message') if not r: # 请求失败时,r 为 None logger.error('Failed to get header message on URL [{}]'.format(url)) return file_size = int(r.headers['Content-Length']) ETag = r.headers['ETag'] logger.debug('[{}] file size: {} bytes, ETag: {}'.format(official_filename, file_size, ETag)) # 如果正式文件存在 if os.path.exists(official_filename): if os.path.getsize(official_filename) == file_size: # 且大小与待下载的目标文件大小一致时 logger.warning('The file [{}] has already been downloaded'.format(official_filename)) return else: # 大小不一致时,提醒用户要保存的文件名已存在,需要手动处理,不能随便覆盖 logger.warning('The filename [{}] has already exist, but it does not match the remote file'.format(official_filename)) return # 首先需要判断此文件支不支持 Range 下载,请求第 1 个字节即可 headers = {'Range': 'bytes=0-0'} r = custom_request('HEAD', url, info='Range: bytes=0-0', headers=headers) if not r: # 请求失败时,r 为 None logger.error('Failed to get [Range: bytes=0-0] on URL [{}]'.format(url)) return if r.status_code != 206: # 不支持 Range 下载时 logger.warning('The file [{}] does not support breakpoint retransmission'.format(official_filename)) # 需要重新从头开始下载 (wb 模式) with tqdm(total=file_size, unit='B', unit_scale=True, unit_divisor=1024, desc=official_filename) as bar: # 打印下载时的进度条,并动态显示下载速度 r = custom_request('GET', url, info='all content', stream=True) if not r: # 请求失败时,r 为 None logger.error('Failed to get all content on URL [{}]'.format(url)) return with open(temp_filename, 'wb') as fp: for chunk in r.iter_content(chunk_size=multipart_chunksize): if chunk: fp.write(chunk) bar.update(len(chunk)) else: # 支持 Range 下载时 # 如果临时文件存在 if os.path.exists(temp_filename): start = os.path.getsize(temp_filename) # 获取临时文件的大小 if start >= file_size: # 说明此临时文件有问题,需要先删除它 os.remove(temp_filename) else: # 临时文件有效时(如果用户故意生成同名的临时文件,且大小小于要下载的目标文件的大小时,不考虑这么复杂了...) if not os.path.exists(config_filename): # 如果不存在配置文件时 os.remove(temp_filename) else: # 如果配置文件也在,则继续判断 ETag 是否一致 with open(config_filename, 'r') as fp: cfg = json.load(fp) if cfg['ETag'] != ETag: # 如果不一致 os.remove(temp_filename) # 再次判断临时文件在不在,如果不存在时,表示要从头下载 if not os.path.exists(temp_filename): start = 0 open(temp_filename, 'a').close() # 创建空的临时文件 with open(config_filename, 'w') as fp: # 创建配置文件,写入 ETag cfg = {'ETag': ETag} json.dump(cfg, fp) # 根据 HTTP headers 中的 Range 只下载文件的部分字节 (ab 模式) logger.debug('[{}] download from [Range: bytes={}-]'.format(official_filename, start)) headers = {'Range': 'bytes=%d-' % start} # start 无需+1,自己体会 with tqdm(total=file_size, initial=start, unit='B', unit_scale=True, unit_divisor=1024, desc=official_filename) as bar: # 打印下载时的进度条,并动态显示下载速度 r = custom_request('GET', url, info='Range: bytes={}-'.format(start), headers=headers, stream=True) if not r: # 请求失败时,r 为 None logger.error('Failed to download [Range: bytes={}-] on URL [{}]'.format(start, url)) return with open(temp_filename, 'ab') as fp: for chunk in r.iter_content(chunk_size=multipart_chunksize): # Range 范围可能很大,还是需要按 chunk 依次下载 if chunk: fp.write(chunk) bar.update(len(chunk)) # 整个文件内容被成功下载后,将临时文件名修改回正式文件名、删除配置文件 if os.path.getsize(temp_filename) == file_size: # 以防网络故障 os.rename(temp_filename, official_filename) if os.path.exists(config_filename): os.remove(config_filename) logger.debug('[{}] downloaded'.format(official_filename)) logger.debug('Cost {:.2f} seconds'.format(time.time() - t0)) else: logger.error('Failed to download {}'.format(official_filename))
def execute_commands(conn, command_list, rest, host_ip, testName): global couchbaseConnection, version, runStartTime failure_count = 0 for command in command_list: # print 'command', command command_succeeded = True total_elapsed_time = 0.0 total_execution_time = 0.0 if 'index' in command: key = 'index' response = rest.exec_n1ql_stmnt(host_ip, command['index']) body = response.json() # json.loads(response.data.decode('utf8')) avg_elapsed = total_elapsed_time + get_time_in_millisec( body['metrics']['elapsedTime']) avg_execution = total_execution_time + get_time_in_millisec( body['metrics']['executionTime']) elif 'query' in command: key = 'query' query = generate_prepared_query(conn, command['query']) for i in range(0, command['execution_count']): response = conn.request('POST', '/query/service', fields=query, encode_multipart=False) response.read(cache_content=False) body = json.loads(response.data.decode('utf8')) total_elapsed_time = total_elapsed_time + get_time_in_millisec( body['metrics']['elapsedTime']) total_execution_time = total_execution_time + get_time_in_millisec( body['metrics']['executionTime']) avg_elapsed = float('{0:.2f}'.format(total_elapsed_time / command['execution_count'])) avg_execution = float('{0:.2f}'.format(total_execution_time / command['execution_count'])) log = key + ' {0} - average elapsed {1}, average execution time {2}.'.format( command[key], avg_elapsed, avg_execution) if avg_elapsed > (UPPER_BOUND * command['expected_elapsed_time']): log += ' Elapsed too long - expected {0}.'.format( command['expected_elapsed_time']) command_succeeded = False elif avg_elapsed < (LOWER_BOUND * command['expected_elapsed_time']): log += ' Elapsed too short - expected {0}.'.format( command['expected_elapsed_time']) command_succeeded = False if avg_execution > (UPPER_BOUND * command['expected_execution_time']): log += ' Execution too long - expected {0}.'.format( command['expected_execution_time']) command_succeeded = False elif avg_execution < (LOWER_BOUND * command['expected_execution_time']): log += ' Execution too short - expected {0}.'.format( command['expected_execution_time']) command_succeeded = False if command_succeeded: logger.info(log) else: failure_count = failure_count + 1 logger.error(log) val = { "actualValue": avg_execution, "expectedValue": command['expected_execution_time'], "build": version, "runStartTime": runStartTime, "pass": command_succeeded, "testMetric": command['queryDesc'], "testStartTime": time.strftime("%m/%d/%y-%H:%M:%S", time.strptime(time.ctime())), "testName": testName } key = runStartTime + '-' + testName + '-' + command['queryDesc'] couchbaseConnection.add(key, val) return failure_count == 0
def getGoodInfos(self, category, category_url, page_url): headers = { "cookie": "csrftoken=se23P6QTCViDCMbZuVNgZXs2rqohW4ZA", "referer": category_url, "x-csrftoken": "se23P6QTCViDCMbZuVNgZXs2rqohW4ZA" } result_datas = [] format_url = '{}-i.{}.{}' image_url = 'https://cfshopeesg-a.akamaihd.net/file/{}' try: iterms_infos = self.getHtml(page_url, self.header) iterms_infos = json.loads(iterms_infos)['items'] page_url = 'https://shopee.sg/api/v1/items/' data = {"item_shop_ids": iterms_infos} iterms_infos = self.getHtml(page_url, headers, data=json.dumps(data)) goods_infos = json.loads(iterms_infos) for goods_info in goods_infos: resultData = {} resultData['CHANNEL'.lower()] = self.Channel resultData['KIND'.lower()] = category resultData['SITE'.lower()] = 'category' resultData['STATUS'.lower()] = '01' good_title = goods_info.get('name') good_title = self.filter_emoji(good_title) good_title = good_title.encode('utf-8') resultData['NAME'.lower()] = good_title shopid = goods_info.get('shopid') itemid = goods_info.get('itemid') link_name = good_title.replace(' ', '-').replace( '100%', '100%25').replace(':', '').replace( '™', '%E2%84%A2').replace('15%', '15') link_after = format_url.format(link_name, shopid, itemid) good_link = urljoin(self.home_url, link_after) resultData['LINK'.lower()] = good_link # try: # self.getHtml(good_link, {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}) # except Exception: # print good_link resultData['PRODUCT_ID'.lower()] = itemid resultData['MAIN_IMAGE'.lower()] = image_url.format( goods_info.get('image')) BeforePriceInfo = goods_info.get('price_before_discount', 0) resultData['Before_AMOUNT'.lower()] = self.format_price( BeforePriceInfo, 5) PriceInfo = goods_info.get('price', 0) resultData['AMOUNT'.lower()] = self.format_price(PriceInfo, 5) resultData['Currency'.lower()] = goods_info.get('currency') resultData['DISPLAY_COUNT'.lower()] = goods_info.get( 'show_discount') resultData['COMMEND_FLAG'.lower()] = '1' if goods_info.get( 'is_shopee_verified') else '0' resultData['CREATE_TIME'.lower()] = time.strftime( '%Y%m%d%H%M%S', time.localtime(time.time())) result_datas.append(resultData) if not result_datas: raise ValueError('get result_datas error') return result_datas except Exception, e: logger.error('getGoodInfos error:{}'.format(e)) logger.error('category: {},category_url: {}'.format( category, category_url)) raise
"./models/SVR_files/Model_SVR_rs_gridcv.sav") SVR_regr_CV_model_ts = joblib.load( "./models/SVR_files/Model_SVR_ts_gridcv.sav") SVR_regr_CV_model_ts_tscv = joblib.load( "./models/SVR_files/Model_SVR_ts_tscv.sav") logger.info("\nLoading Neural Nets\n") NN_regr_CV_model_rs_gridcv = joblib.load( "./models/NN_MLP_files/Model_MLP_rs_gridcv.sav") NN_regr_CV_model_ts_gridcv = joblib.load( "./models/NN_MLP_files/Model_MLP_ts_gridcv.sav") NN_regr_CV_model_ts_tscv = joblib.load( "./models/NN_MLP_files/Model_MLP_ts_tscv.sav") except: logger.error( "Something did not work! Could not load models! Execute script 4 again!" ) # print("NN_regr_CV_model_ts_tscv: ", NN_regr_CV_model_ts_tscv.__module__) # print(neural_net) # predict_test_df(NN_regr_CV_model_ts_tscv) predict_test_df(random_forest_ts_tscv, random_forest_rs_gridcv, random_forest_model_ts_gridcv, SVR_regr_CV_model_rs, SVR_regr_CV_model_ts, SVR_regr_CV_model_ts_tscv, NN_regr_CV_model_rs_gridcv, NN_regr_CV_model_ts_gridcv, NN_regr_CV_model_ts_tscv, catboost_rs, catboost_ts_gridcv, catboost_model_ts_tscv).to_sql("predicted_df", connection, if_exists="replace",
def main(table_list, partition_field, p0, days, save_local_dir): global root_dir # 参数校验 # 校验表列表 if not table_list: logger.error("表名的列表为空") return # 校验天数 try: days = int(days) except Exception as e: logger.error("p0日期格式不正确") return # 校验保存目录 if not os.path.isdir(save_local_dir): logger.error("指定本地目录不存在") return # 校验日期 try: datetime.datetime.strptime(p0, '%Y-%m-%d') except Exception as e: logger.error("p0日期格式不正确") return # 第一个切分后的文件有多少行 LINE_SIZE = 50000 # LINE_SIZE = 5000 # 文件操作目录 hdfs_file_operator_dir = "hdfs_file_temp" hdfs_file_operator_dir = root_dir + "/" + hdfs_file_operator_dir # hdfs 路径模板 HDFS_PATH_TEMPLE = "/user/hive/warehouse/leesdata.db/{tablename}/{partition_field}={p0}/" # HDFS_PATH_TEMPLE = "/user/hive/warehouse/leestest.db/{tablename}/{partition_field}={p0}/" # 循环操作给定的表 for tablename in table_list: # hdfs 路径模板替换表名 hdfs_path_table = HDFS_PATH_TEMPLE.replace("{tablename}", tablename) hdfs_path_table = hdfs_path_table.replace("{partition_field}", partition_field) # 本地路径添加表名目录 hdfs_file_table_dir = hdfs_file_operator_dir + "/" + tablename # 根据表名在临时目录下创建新目录 try: # 如果存在此目录不存在创建此目录 if not os.path.isdir(hdfs_file_table_dir): os.mkdir(hdfs_file_table_dir) except Exception as e: logger.error(e) logger.error("创建指定目录下表存储目录失败:" + hdfs_file_table_dir) return # 循环操作指定天数的数据文件 for index in range(days): # 哪一天的数据 ds = get_date(p0, index) # 当天数据文件所在的URL路径 hdfs_path = hdfs_path_table.replace("{p0}", ds) logger.info("hdfs path url : " + hdfs_path) # 在临时目录根据表名创建的目录下,根据分区名创建新的目录 hdfs_file_table_partition_dir = hdfs_file_table_dir + "/" + ds try: # 如果目录存在删除此目录 if os.path.isdir(hdfs_file_table_partition_dir): shutil.rmtree(hdfs_file_table_partition_dir) # 创建分区字段目录 os.mkdir(hdfs_file_table_partition_dir) except Exception as e: logger.error(e) logger.error("创建表存储目录下的日期目录失败: " + hdfs_file_table_partition_dir) return # 数据文件下载并解压 file_list = get_unzip_hdfs_file_from_dir(hdfs_path, hdfs_file_operator_dir) # 判断分区下是否有文件,有数据,对数据文件进行切分,没有数据跳过 if file_list: logger.info("hdfs file download and uncompressed success") logger.info("uncompressed dir is : " + hdfs_file_operator_dir) # 文件切分 split_file_from_dir(file_list, save_local_dir, LINE_SIZE) # 清空临时目录下下载的文件 exec_sh("rm -rf " + hdfs_file_table_partition_dir, hdfs_file_table_dir) logger.info("split success table: " + tablename + " date: " + ds) else: logger.info( "current partition has no data the table is : %s, partition : %s" % (tablename, ds))
def cmd_parser(): parser = OptionParser() parser.add_option('--download', dest='is_download', action='store_true', help='download doujinshi or not') parser.add_option('--id', type='int', dest='id', action='store', help='doujinshi id of nhentai') parser.add_option('--ids', type='str', dest='ids', action='store', help='doujinshi id set, e.g. 1,2,3') parser.add_option('--search', type='string', dest='keyword', action='store', help='keyword searched') parser.add_option('--page', type='int', dest='page', action='store', default=1, help='page number of search result') parser.add_option('--path', type='string', dest='saved_path', action='store', default='', help='path which save the doujinshi') parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5, help='thread count of download doujinshi') parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30, help='timeout of download doujinshi') parser.add_option('--proxy', type='string', dest='proxy', action='store', default='', help='use proxy, example: http://127.0.0.1:1080') args, _ = parser.parse_args() if args.ids: _ = map(lambda id: id.strip(), args.ids.split(',')) args.ids = set(map(int, filter(lambda id: id.isdigit(), _))) if args.is_download and not args.id and not args.ids and not args.keyword: logger.critical('Doujinshi id/ids is required for downloading') parser.print_help() raise SystemExit if args.id: args.ids = (args.id, ) if not args.ids else args.ids if not args.keyword and not args.ids: parser.print_help() raise SystemExit if args.threads <= 0: args.threads = 1 elif args.threads > 10: logger.critical('Maximum number of used threads is 10') raise SystemExit if args.proxy: import urlparse proxy_url = urlparse.urlparse(args.proxy) if proxy_url.scheme not in ('http', 'https'): logger.error('Invalid protocol \'{}\' of proxy, ignored'.format( proxy_url.scheme)) else: constant.PROXY = {proxy_url.scheme: args.proxy} return args
def __init__(self, *args): logger.error(self.__class__.__name__) super().__init__(*args)
def write_cropped_json(src_img_path: str, src_json_path: str, dst_img_path: str, dst_json_path: str, bound_type='rect', verbose: bool = False): def process_shape(shape: Shape, bbox: BBox, new_shape_handler: ShapeHandler): points = [Point.from_list(point) for point in shape.points] contained_count = 0 for point in points: if bbox.contains(point): contained_count += 1 if contained_count == 0: return elif contained_count == len(points): pass else: logger.error( f"Found a shape that is only partially contained by a bbox.") logger.error(f"Shape: {shape}") logger.error(f"BBox: {bbox}") cropped_points = [ Point(x=point.x - bbox.xmin, y=point.y - bbox.ymin) for point in points ] for point in cropped_points: if point.x < 0 or point.y < 0: logger.error(f"Encountered negative point after crop: {point}") raise Exception new_shape = shape.copy() new_shape.points = [ cropped_point.to_list() for cropped_point in cropped_points ] new_shape_handler.add(new_shape) check_input_path_and_output_dir(input_path=src_img_path, output_path=dst_img_path) check_input_path_and_output_dir(input_path=src_json_path, output_path=dst_json_path) output_img_dir = get_dirpath_from_filepath(dst_img_path) annotation = LabelMeAnnotation(annotation_path=src_img_path, img_dir=dst_img_path, bound_type=bound_type) parser = LabelMeAnnotationParser(annotation_path=src_json_path) parser.load() bbox_list = [] for rect in parser.shape_handler.rectangles: numpy_array = np.array(rect.points) if numpy_array.shape != (2, 2): logger.error( f"Encountered rectangle with invalid shape: {numpy_array.shape}" ) logger.error(f"rect: {rect}") raise Exception xmin, xmax = numpy_array.T[0].min(), numpy_array.T[0].max() ymin, ymax = numpy_array.T[1].min(), numpy_array.T[1].max() bbox_list.append(BBox.from_list([xmin, ymin, xmax, ymax])) img = cv2.imread(src_img_path) img_h, img_w = img.shape[:2] for i, bbox in enumerate(bbox_list): bbox = BBox.buffer(bbox) new_shape_handler = ShapeHandler() for shape_group in [ parser.shape_handler.points, parser.shape_handler.rectangles, parser.shape_handler.polygons ]: for shape in shape_group: process_shape(shape=shape, bbox=bbox, new_shape_handler=new_shape_handler) new_shape_list = new_shape_handler.to_shape_list() if len(new_shape_list) > 0: img_rootname, json_rootname = get_rootname_from_path( dst_img_path), get_rootname_from_path(dst_json_path) dst_img_dir, dst_json_dir = get_dirpath_from_filepath( dst_img_path), get_dirpath_from_filepath(dst_json_path) dst_img_extension = get_extension_from_path(dst_img_path) dst_cropped_img_path = f"{dst_img_dir}/{img_rootname}_{i}.{dst_img_extension}" dst_cropped_json_path = f"{dst_json_dir}/{json_rootname}_{i}.json" write_cropped_image(src_path=src_img_path, dst_path=dst_cropped_img_path, bbox=bbox, verbose=verbose) cropped_labelme_ann = annotation.copy() cropped_labelme_ann.annotation_path = dst_cropped_json_path cropped_labelme_ann.img_dir = dst_img_dir cropped_labelme_ann.img_path = dst_cropped_img_path cropped_img = cv2.imread(dst_cropped_img_path) cropped_img_h, cropped_img_w = cropped_img.shape[:2] cropped_labelme_ann.img_height = cropped_img_h cropped_labelme_ann.img_width = cropped_img_w cropped_labelme_ann.shapes = new_shape_list cropped_labelme_ann.shape_handler = new_shape_handler writer = LabelMeAnnotationWriter(cropped_labelme_ann) writer.write() if verbose: logger.info(f"Wrote {dst_cropped_json_path}")
def __init__(self, username): self.username = username logger.error(str(self))