def log_new_connnection(self, client_ip, user_agent, room_id, config_filename, config): connection_info = {} connection_info['room_id'] = room_id connection_info['config_filename'] = config_filename connection_info['config'] = config connection_info['client_ip'] = client_ip connection_info['client_ip_info'] = get_ip_information(client_ip) if user_agent: connection_info['user_agent'] = user_agent.string connection_info['user_platform'] = user_agent.platform connection_info['user_browser'] = user_agent.browser timestamp = time.time() timestamp_human_readable = datetime.datetime.fromtimestamp( timestamp).strftime('%H:%M:%S on %d/%m/%Y') connection_info['timestamp'] = timestamp connection_info['timestamp_human_readable'] = timestamp_human_readable connection_info_filename = os.path.join(self.log_folder, 'connection_info.json') tools.save_json(connection_info_filename, connection_info)
def save_drawing_to_file(self, drawing_data): drawing_folder = os.path.join(self.log_folder, DRAWING_FOLDERNAME) tools.ensure_dir(drawing_folder) drawing_files = tools.list_files(drawing_folder, ['*.json']) drawing_filename = os.path.join( drawing_folder, '{:04}.json'.format(len(drawing_files))) tools.save_json(drawing_filename, drawing_data) return drawing_filename
def listener(event): if event.exception: global data global project_dir logger.info("【{}任务退出】{}".format(event.job_id, event.exception.message)) logger.info("【data】{}".format(data)) save_json(project_dir, "9 data", data) else: logger.info("【爬取任务正常运行】")
def launch(item, command, no_cache=False): if not no_cache: # save command and increased clicks counter to the cache file; we won't cache items from the user-defined menu if command not in cache: cache[command] = 1 else: cache[command] += 1 save_json(cache, cache_file) # run the command an quit subprocess.Popen('exec {}'.format(command), shell=True) Gtk.main_quit()
def save_learner_logs_to_file(self, learner_logs): learner_logs_folder = os.path.join(self.log_folder, LEARNER_LOGS_FOLDERNAME) tools.ensure_dir(learner_logs_folder) files = tools.list_files(learner_logs_folder, ['*.json']) learner_logs_filename = os.path.join(learner_logs_folder, '{:04}.json'.format(len(files))) tools.save_json(learner_logs_filename, learner_logs) return learner_logs_filename
def main_pipeline(): ''' Base logic here ''' new_files = tools.check_input_dir() for file_name in new_files: p = Parser(dirs.INPUT_DIR + file_name) try: json_data = p.make_json() tools.save_json(json_data, file_name) save_to_db(json_data) tools.move_input_file(file_name, dirs.OK_DIR) except tools.MyLocalException: tools.move_input_file(file_name, dirs.ERR_DIR)
def loop(): # 检查是否到时间 global end_timestrap global request_points current_timestamp = get_timestamp() if end_timestrap is not None: #有设置结束时间 if current_timestamp > end_timestrap: sched.shutdown() current_time = timestamp_to_time(current_timestamp) logger.info("【开始爬取】{}".format(current_time)) # 爬取工作 global data for frame, value in request_points.items(): if value["extent"] is None: #该图幅没有img continue req_pnt = value["req_pnt"] imgs = do_get(*req_pnt) if imgs is None: logger.error("图幅{}出错:在该位置上没有获得imgs".format(frame)) continue # 处理imgs for img in imgs: timestamp = img[1] #时间戳 time = timestamp_to_time(timestamp) #时间 # 加入数据 if frame not in data: data[frame] = OrderedDict() if time not in data[frame]: data[frame][time] = { "timestamp": timestamp, "time": time, "url": img[0], "extent": img[2], "req_pnt": req_pnt, "file_path": "" } else: pass print data save_json(project_dir, "9 data", data) download() #下载 registration() #配准 return
def registration(): global data global data_rgs global params global logger rgs_dir = params["registration_dir"] for frame, frame_value in data.items(): for time, time_value in frame_value.items(): # 原始数据 origin_file_path = time_value["file_path"] #原始数据文件路径 if origin_file_path == "": # 还没有下载,退出 continue origin_file_name = time_value["file_name"] #原始数据文件名 # 配准数据 if frame not in data_rgs: data_rgs[frame] = OrderedDict() if time in data_rgs[frame]: # 该数据已经配准过了 continue # 这个数据还没有配准 rgs_file_name = "{} {}.png".format(frame, time) rgs_file_path = os.path.join(rgs_dir, rgs_file_name) # 配准数据文件路径 if os.path.exists(rgs_file_path) is True: #配准文件已经存在 continue rgs_data_item = do_rgs(time_value, rgs_file_path) if rgs_data_item is None: logger.error("[配准失败!] {}".format(origin_file_path)) else: # 配准完成,保存到data_rgs data_rgs[frame][time] = rgs_data_item save_json(project_dir, "9 data_rgs", data_rgs) #保存配准信息 pass
def epoch_step(self, logs={}): for (k, v) in logs.items(): l = self.H.get(k, []) # np.float32会报错 if not isinstance(v, np.float): v = round(float(v), 4) l.append(v) self.H[k] = l # 写入文件 if self.json_path is not None: save_json(data=self.H, file_path=self.json_path) # 保存train图像 if len(self.H["loss"]) == 1: self.paths = { key: self.file_dir / (self.arch + f'_{key.upper()}') for key in self.H.keys() } if len(self.H["loss"]) > 1: # 指标变化 # 曲线 # 需要成对出现 keys = [key for key, _ in self.H.items() if '_' not in key] for key in keys: N = np.arange(0, len(self.H[key])) plt.style.use("ggplot") plt.figure() plt.plot(N, self.H[key], label=f"train_{key}") plt.plot(N, self.H[f"valid_{key}"], label=f"valid_{key}") if self.add_test: plt.plot(N, self.H[f"test_{key}"], label=f"test_{key}") plt.legend() plt.xlabel("Epoch #") plt.ylabel(key) plt.title(f"Training {key} [Epoch {len(self.H[key])}]") plt.savefig(str(self.paths[key])) plt.close()
def download(): global data global project_dir global logger # original文件夹 # original_dir = os.path.join(project_dir, "0original") # if os.path.exists(original_dir) is False: # os.mkdir(original_dir) original_dir = params["original_dir"] for frame, frame_value in data.items(): # 图幅文件夹 frame_dir = os.path.join(original_dir, frame) if os.path.exists(frame_dir) is False: os.mkdir(frame_dir) for time, time_value in frame_value.items(): # 已下载-->退出 if time_value["file_path"] != "": continue # 下载 url = time_value["url"] fn, fp = download_file(url, frame_dir) if fn == None: #下载错误 logger.error("[下载失败!] {}".format(url)) continue #退出 else: #下载完成 time_value["file_path"] = fp time_value["file_name"] = fn # 保存头文件 hdr_fn = os.path.splitext(fn)[0] #头文件名称 save_json(frame_dir, hdr_fn, time_value) # logger.info("[下载成功] {}".format(fp) ) pass
def query_user( user: str, config_file: str, start_date: str = arrow.get().format('YYYY-MM-DD'), end_date: str = arrow.get().shift(years=-10).format('YYYY-MM-DD'), time_sleep: float = 1.1): """Get Twitter information about user profiles Arguments: user {str} -- Screen name about twitter config_file {str} -- Config yml file Keyword Arguments: time_sleep {float} -- [description] (default: {1.1}) start_date {str} -- Start date from being requested a twitter user (default: {arrow.get().format('YYYY-MM-DD')}) end_date {str} -- End date from being requested a twitter user (default: {arrow.get().shift(years=-10).format('YYYY-MM-DD')}) Raises: err -- Error when time sleeps is less than 1.1 """ settings = Settings()._load_config(config_file) # Create a connection with Elastic if settings.elasticsearch_url is not None: es = Elasticsearch(settings.elasticsearch_url) logger.info(es.info()) else: es = None # Check if time_sleep is more than 1.1 secs try: assert time_sleep >= 1.1 except: logger.error("Time Sleep less than 1.1 secs (minimum) ") raise err api = twitter.Api(consumer_key=settings.CONSUMER_KEY, consumer_secret=settings.CONSUMER_SECRET, access_token_key=settings.ACCESS_TOKEN_KEY, access_token_secret=settings.ACCESS_TOKEN_SECRET, tweet_mode='extended') user_query = "to:%s OR from:%s OR on:%s" % (user, user, user) logger.info("Scrapping query on Twitter") df = scrape_twitter_by_date(user_query, start_date, end_date) if df is not None: lst_statuses_ids = df['STATUS_ID'].tolist() hydrataded_statuses = hydratate_status(api, lst_statuses_ids) ## Save all jsons to file and load into Elastic logger.info("Processing Statuses from Twitter API to save jsons") for c_status_data in tqdm(hydrataded_statuses): cur_dict = Cut(c_status_data.AsDict()) cur_id_str = cur_dict['id_str'] #To ES improved data cur_json = _prepare_json_status(cur_dict.data) # TO FS orginal data backup cur_json_backup = json.dumps(cur_dict.data, indent=4) save_json(cur_json_backup, settings.status_json_backup + cur_id_str + ".json") if es is not None: logger.debug("Indexing: %s " % cur_id_str) es.index(index=settings.ELASTICSEARCH_STATUS_INDEX, doc_type='status', id=cur_id_str, body=cur_json)
parser.add_argument("--topic_tree_url", help="url of Khan academy topic tree", action="store", required=True) parser.add_argument("--topic_tree_backup", help="path to backup of Khan academy topic tree", action="store", required=True) parser.add_argument("--youtube_ids_backup", help="path to backup of youtube ids", action="store", required=True) args = parser.parse_args() topic_tree_url = args.topic_tree_url topic_tree_backup = args.topic_tree_backup youtube_ids_backup = args.youtube_ids_backup # download topic tree if not already downloaded if os.path.isfile(topic_tree_backup): topic_tree = tools.load_json(topic_tree_backup) else: topic_tree = tools.download_topic_tree(topic_tree_url) tools.save_json(topic_tree_backup, topic_tree) youtube_ids = tools.get_youtube_ids(topic_tree) tools.save_dump(youtube_ids_backup, youtube_ids) tools.save_json(youtube_ids_backup + '.json', youtube_ids) tools.save_yaml(youtube_ids_backup + '.yaml', youtube_ids)
def initparam(): """ 初始化参数 """ params = OrderedDict() print "【从前端获得参数】 %s" % str(request.values) print "【正在处理参数】" # 【开始时间】 params["start_time"] = request.form.get("startTime", type=str) if is_timestr(params['start_time']) is False: params['start_time'] = get_time() print "\t[开始时间] %s" % params["start_time"] # 【结束时间】 params["end_time"] = request.form.get("endTime", type=str) if is_timestr(params['end_time']) is False: params['end_time'] = "未设置" # 不结束 print "\t[结束时间] %s" %params["end_time"] # 【时间间隔】 params["interval"] = request.form.get("interval", type=int) params['step'] = params['interval'] / 5 # 步长 print "\t[时间间隔] %d" % params["interval"] print "\t[保存图片的步长] %d" % params["step"] # 【处理点】 points = { 'center_point' : request.form.get("centerPoint", type=str), 'north_west_point' : request.form.get("northWestPoint", type=str), 'north_east_point' : request.form.get("northEastPoint", type=str), 'south_east_point' : request.form.get("southEastPoint", type=str), 'south_west_point' : request.form.get("southWestPoint", type=str) } print '\t[点] %s' % str(points) for point_name,value in points.items(): params[point_name] = get_latlng( value ) # 【保存文件夹】 params["save_file_dir"] = request.form.get("saveFileDir", type=str) print "\t[保存文件夹] %s" % params["save_file_dir"] save_dir = params["save_file_dir"] if not os.path.exists(save_dir): # 选择的文件夹不存在 print "\t【WARNING】 选择的文件夹不存在,跳转到初始页面" print "【params】" + str(params) return redirect_index("文件夹路径不存在,请重新输入!") # # 【输出文件夹】 out_dir = os.path.join(save_dir, params['start_time']) params['out_dir'] = out_dir print "\t[图像输出文件夹] %s" % params["out_dir"] if os.path.exists(out_dir): # 输出文件夹已经存在 print "【params】" + str(params) return redirect_index("输出文件夹已经存在,%s<Br/>请重新输入文件夹!" % out_dir) os.makedirs(out_dir) # 项目备注 params["remark"] = request.form.get("remark") print "\t[项目备注] %s" % params["remark"] img_len , request_points = init_crawler(params) #计算每次请求的中心点 print request_points if img_len==0: return "抱歉,任务失败!<br/>框选区域没有雷达降水图。" else: # 初始化文件夹 init_dir(params, request_points) # 保存爬取参数 print "【params】" + str(params) save_json(out_dir, u"0 param - 爬取参数", params) #保存成json file_str = save_params_file(params) #保存给用户看 html_str = file_str.replace('\n', '<br/>') #输出的HTML字符串 # 开启任务,异步进程 executor.submit( start(params, request_points) ) return '任务已在后台运行!<br/>每次需要爬取{}张图片!<br/>{}'.format(img_len, html_str)
def download_api_timeline(user: str, time_sleep: float = 1.1, since: str = '0'): """Goes to twitter API an get timeline of a user_id and saves into a json file (in "json" dir) and if Elasticsearch is identified send it too Arguments: user {str} -- Twitter Screen Name elasticsearch_url {str} -- Base url of ElasticSearch Keyword Arguments: time_sleep {float} -- Time between requests (default: {1.1}) since {str} -- Status ID to start twitter extraction (default: {'0'}) """ global first_status_id settings = Settings() # Create a connection with Elastic if settings.ELASTICSEARCH_URL is not None: es = Elasticsearch(settings.ELASTICSEARCH_URL) logger.info(es.info()) else: es = None # Check if time_sleep is more than 1.1 secs try: assert time_sleep >= 1.1 except: logger.error("Time Sleep less than 1.1 secs (minimum) ") raise err api = twitter.Api(consumer_key=settings.CONSUMER_KEY, consumer_secret=settings.CONSUMER_SECRET, access_token_key=settings.ACCESS_TOKEN_KEY, access_token_secret=settings.ACCESS_TOKEN_SECRET, tweet_mode='extended') # Go to Twitter API and get statuses by id logger.info("Downloading TimeLine Statuses from Twitter API") all_statuses_data = [] logger.info("Starting at STATUS_ID: %s" % since) since_id = int(since) if since_id == 0: try: s = Search(using=es, index=STATUSES_INDEX, doc_type='status') s = s.query('match', user__screen_name=user) s = s.sort("-id", "-_id") since_id = s.execute()[0]['id'] logger.info("Starting 🐦 timeline for [%s] from: %d" % (user, since_id)) except: logger.warning( "Error retrieving last status from ES for [%s] using 0" % user) statuses = api.GetUserTimeline(screen_name=user, count=200, include_rts=True, exclude_replies=False, since_id=since_id) if len(statuses) == 0: print("%s" % since) logger.warning("There isn't new results for this Timeline") return since all_statuses_data += statuses last_status_id = statuses[-1].id first_status_id = statuses[0].id # If first status is equals to last in 'query' this means that all tweets has been readed while statuses[0].id != last_status_id and statuses is not []: statuses = api.GetUserTimeline(screen_name=user, count=200, include_rts=True, exclude_replies=False, max_id=last_status_id, since_id=since_id) logger.info("Readed: %d twts | Total: %d" % (len(statuses), len(all_statuses_data))) all_statuses_data += statuses last_status_id = statuses[-1].id sleep(time_sleep) all_statuses_data = set(all_statuses_data) # Save all jsons to file and load into Elastic logger.info("Processing TimeLine Statuses from 🐦 API to save jsons") for c_status_data in tqdm(all_statuses_data): cur_dict = Cut(c_status_data.AsDict()) cur_id_str = cur_dict['id_str'] # To ES improved data cur_json = _prepare_json_status(c_status_data) # TO FS orginal data backup cur_json_backup = json.dumps(cur_dict.data, indent=4) save_json(cur_json_backup, "./json/" + cur_id_str + ".json") if es is not None: es.index( index=settings.ELASTICSEARCH_STATUS_INDEX, #ignore=400, doc_type='status', id=cur_id_str, body=cur_json) # STDOut and STDErr print("%d" % first_status_id) return first_status_id
args = parser.parse_args() output_data_path = args.output_path video_metadata_path = args.video_metadata_path video_metadata = tools.load_json(video_metadata_path) authors_distribution = Counter(chain.from_iterable(map(lambda x: x["author_names"], chain.from_iterable(video_metadata.values())))).most_common() duration_distribution = Counter(map(lambda x: x["duration"], chain.from_iterable(video_metadata.values()))).most_common() youtube_id_distribution = Counter(map(lambda x: x["youtube_id"], chain.from_iterable(video_metadata.values()))).most_common() keywords_distribution = Counter(chain.from_iterable(map(lambda x: x["keywords"].split(','), chain.from_iterable(video_metadata.values())))).most_common() tools.save_json(os.path.join(output_data_path, 'authors_distribution.json'), authors_distribution) tools.save_yaml(os.path.join(output_data_path, 'authors_distribution.yaml'), authors_distribution) tools.save_json(os.path.join(output_data_path, 'duration_distribution.json'), duration_distribution) tools.save_yaml(os.path.join(output_data_path, 'duration_distribution.yaml'), duration_distribution) tools.save_json(os.path.join(output_data_path, 'youtube_id_distribution.json'), youtube_id_distribution) tools.save_yaml(os.path.join(output_data_path, 'youtube_id_distribution.yaml'), youtube_id_distribution) tools.save_json(os.path.join(output_data_path, 'keywords_distribution.json'), keywords_distribution) tools.save_yaml(os.path.join(output_data_path, 'keywords_distribution.yaml'), keywords_distribution)
def save(self, out_path): tools.save_json(self.as_dict(), out_path)
def query_api_statuses(query: str, elasticsearch_url: str, elasticuser: str = None, elasticpass: str = None, elasticsearch_index: str = STATUSES_INDEX, time_sleep: float = 1.1, since: str = '0'): """Goes to twitter API an get status info and saves into a json file (in "json" dir) and if Elasticsearch is identified send it too Arguments: query {str} -- Proposed query to obtain statuses on Twitter elasticsearch_url {str} -- [description] Keyword Arguments: elasticuser {str} -- [description] (default: {None}) elasticpass {str} -- [description] (default: {None}) elasticsearch_index {str} -- [description] (default: {STATUSES_INDEX}) since {str} -- Status ID to start twitter extraction (default: {'0'}) """ # Create a connection with Elastic if elasticsearch_url is not None: es = Elasticsearch(elasticsearch_url) logger.info(es.info()) else: es = None # Check if time_sleep is more than 1.1 secs try: assert time_sleep >= 1.1 except: logger.error("Time Sleep less than 1.1 secs (minimum) ") raise err api = twitter.Api(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token_key=ACCESS_TOKEN_KEY, access_token_secret=ACCESS_TOKEN_SECRET, tweet_mode='extended') since_id = int(since) logger.info("Scrapping query on Twitter") df = scrape_twitter_by_date(query, start_date=arrow.now().format('YYYY-MM-DD'), end_date=arrow.now().format('YYYY-MM-DD')) if df is not None: lst_statuses_ids = df['STATUS_ID'].tolist() hydrataded_statuses = hydratate_status(api, lst_statuses_ids) ## Save all jsons to file and load into Elastic logger.info("Processing Statuses from Twitter API to save jsons") for c_status_data in tqdm(hydrataded_statuses): cur_dict = Cut(c_status_data.AsDict()) cur_id_str = cur_dict['id_str'] # Fix twitter dates to more 'standart' date format list_all_keys_w_dots = dotter(cur_dict.data, '', []) try: for created_at_keys in list_all_keys_w_dots: if 'created_at' in created_at_keys: cur_dt = arrow.get(cur_dict[created_at_keys], TWITTER_DATETIME_PATTERN) cur_dict[created_at_keys] = cur_dt.format( "YYYY-MM-DDTHH:MM:SS") + "Z" except: logger.error("Error parsing dates on %s" % cur_id_str) cur_json = json.dumps(cur_dict.data, indent=4) save_json(cur_json, "./json/" + cur_id_str + ".json") if es is not None: logger.debug("Indexing: %s " % cur_id_str) es.index( index=elasticsearch_index, #ignore=400, doc_type='status', id=cur_id_str, body=cur_json)
'translated_youtube_lang', 'youtube_id'] topic_tree_leafs = tools.get_leafs(topic_tree) logging.info('Gathering video metadata') video_metadata = defaultdict(list) for leaf in topic_tree_leafs: video_metadata[leaf['youtube_id']].append({key: leaf[key] for key in metadata_mask}) video_metadata = dict(video_metadata) logging.info('Creating simplified map of the topic tree') topic_tree_map = tools.make_map(topic_tree) logging.info('Writing common metadata files') tools.save_json(os.path.join(output_data_path, 'video_metadata.json'), video_metadata) tools.save_yaml(os.path.join(output_data_path, 'video_metadata.yaml'), video_metadata) tools.save_json(os.path.join(output_data_path, 'topic_tree_map.json'), topic_tree_map) tools.save_yaml(os.path.join(output_data_path, 'topic_tree_map.yaml'), topic_tree_map) logging.info('Writing metadata files for individual videos') for youtube_id in video_metadata.keys(): try: prefixed_youtube_id = tools.prefix(youtube_id) output_path = os.path.join(output_data_path, prefixed_youtube_id, prefixed_youtube_id + '_metadata') # prepare output audio dir try: os.mkdir(os.path.join(output_data_path, prefixed_youtube_id)) except FileExistsError:
def main(): # exit if already running, thanks to Slava V at https://stackoverflow.com/a/384493/4040598 pid_file = os.path.join(tempfile.gettempdir(), 'sgtk-menu.pid') fp = open(pid_file, 'w') try: fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: sys.exit(0) global appendix_file parser = argparse.ArgumentParser(description="GTK menu for sway and i3") parser.add_argument("-b", "--bottom", action="store_true", help="display menu at the bottom") favourites = parser.add_mutually_exclusive_group() favourites.add_argument("-f", "--favourites", action="store_true", help="prepend 5 most used items") favourites.add_argument('-fn', type=int, help="prepend <FN> most used items") appenxid = parser.add_mutually_exclusive_group() appenxid.add_argument( "-a", "--append", action="store_true", help="append custom menu from {}".format(appendix_file)) appenxid.add_argument("-af", type=str, help="append custom menu from {}".format( os.path.join(config_dir, '<AF>'))) parser.add_argument("-l", type=str, help="force language (e.g. \"de\" for German)") parser.add_argument("-s", type=int, default=20, help="menu icon size (min: 16, max: 48, default: 20)") parser.add_argument( "-w", type=int, help="menu width in px (integer, default: screen width / 8)") parser.add_argument("-d", type=int, default=100, help="menu delay in milliseconds (default: 100)") parser.add_argument( "-o", type=float, default=0.3, help="overlay opacity (min: 0.0, max: 1.0, default: 0.3)") parser.add_argument("-t", type=int, default=30, help="sway submenu lines limit (default: 30)") global args args = parser.parse_args() if args.s < 16: args.s = 16 elif args.s > 48: args.s = 48 # Create default appendix file if not found if not os.path.isfile(appendix_file): save_default_appendix(appendix_file) # Replace appendix file name with custom - if any if args.af: appendix_file = os.path.join(config_dirs()[0], args.af) # cache stores number of clicks on each item global cache cache = load_json(cache_file) if not cache: save_json(cache, cache_file) global sorted_cache sorted_cache = sorted(cache.items(), reverse=True, key=lambda x: x[1]) global locale locale = get_locale_string(args.l) category_names_dictionary = localized_category_names(locale) # replace additional category names with main ones for name in category_names: main_category_name = additional_to_main(name) try: localized_names_dictionary[ main_category_name] = category_names_dictionary[ main_category_name] except: pass screen = Gdk.Screen.get_default() provider = Gtk.CssProvider() style_context = Gtk.StyleContext() style_context.add_provider_for_screen( screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION) # find all .desktop entries, create DesktopEntry class instances; # DesktopEntry adds itself to the proper List in the class constructor list_entries() # Overlay window global win win = MainWindow() w, h = display_dimensions() win.resize(w, h) win.menu = build_menu() global menu_items_list menu_items_list = win.menu.get_children() win.menu.propagate_key_event = False win.menu.connect("key-release-event", win.search_items) # Let's reserve some width for long entries found with the search box if args.w: win.menu.set_property("width_request", args.w) else: win.menu.set_property("width_request", int(win.screen_dimensions[0] / 8)) win.show_all() GLib.timeout_add(args.d, open_menu) Gtk.main()
def main(): # exit if already running, thanks to Slava V at https://stackoverflow.com/a/384493/4040598 pid_file = os.path.join(tempfile.gettempdir(), 'sgtk-menu.pid') fp = open(pid_file, 'w') try: fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: subprocess.run("pkill -f sgtk-menu", shell=True) sys.exit(2) global build_from_file parser = argparse.ArgumentParser( description="GTK menu for sway, i3 and some floating WMs") placement = parser.add_mutually_exclusive_group() placement.add_argument("-b", "--bottom", action="store_true", help="display menu at the bottom (sway & i3 only)") placement.add_argument("-c", "--center", action="store_true", help="center menu on the screen (sway & i3 only)") favourites = parser.add_mutually_exclusive_group() favourites.add_argument("-f", "--favourites", action="store_true", help="prepend 5 most used items") favourites.add_argument('-fn', type=int, help="prepend <FN> most used items") appendix = parser.add_mutually_exclusive_group() appendix.add_argument( "-a", "--append", action="store_true", help="append custom menu from {}".format(build_from_file)) appendix.add_argument("-af", type=str, help="append custom menu from {}".format( os.path.join(config_dir, '<AF>'))) parser.add_argument("-n", "--no-menu", action="store_true", help="skip menu, display appendix only") parser.add_argument("-l", type=str, help="force language (e.g. \"de\" for German)") parser.add_argument("-s", type=int, default=20, help="menu icon size (min: 16, max: 48, default: 20)") parser.add_argument( "-w", type=int, help="menu width in px (integer, default: screen width / 8)") parser.add_argument( "-d", type=int, default=100, help="menu delay in milliseconds (default: 100; sway & i3 only)") parser.add_argument( "-o", type=float, default=0.3, help="overlay opacity (min: 0.0, max: 1.0, default: 0.3; " "sway & i3 only)") parser.add_argument("-t", type=int, default=30, help="sway submenu lines limit (default: 30)") parser.add_argument( "-y", type=int, default=0, help="y offset from edge to display menu at (sway & i3 only)") parser.add_argument( "-css", type=str, default="style.css", help="use alternative {} style sheet instead of style.css".format( os.path.join(config_dir, '<CSS>'))) global args args = parser.parse_args() css_file = os.path.join(config_dirs()[0], args.css) if os.path.exists( os.path.join(config_dirs()[0], 'style.css')) else None if args.s < 16: args.s = 16 elif args.s > 48: args.s = 48 # We do not need any delay in other WMs if other_wm: args.d = 0 # Create default config files if not found create_default_configs(config_dir) # Replace appendix file name with custom - if any if args.af: build_from_file = os.path.join(config_dirs()[0], args.af) if css_file: screen = Gdk.Screen.get_default() provider = Gtk.CssProvider() try: provider.load_from_path(css_file) Gtk.StyleContext.add_provider_for_screen( screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION) except Exception as e: print(e) # cache stores number of clicks on each item global cache cache = load_json(cache_file) if not cache: save_json(cache, cache_file) global sorted_cache sorted_cache = sorted(cache.items(), reverse=True, key=lambda x: x[1]) global locale locale = get_locale_string(args.l) category_names_dictionary = localized_category_names(locale) # replace additional category names with main ones for name in category_names: main_category_name = additional_to_main(name) try: localized_names_dictionary[ main_category_name] = category_names_dictionary[ main_category_name] except: pass screen = Gdk.Screen.get_default() provider = Gtk.CssProvider() style_context = Gtk.StyleContext() style_context.add_provider_for_screen( screen, provider, Gtk.STYLE_PROVIDER_PRIORITY_APPLICATION) # find all .desktop entries, create DesktopEntry class instances; # DesktopEntry adds itself to the proper List in the class constructor list_entries() # Overlay window global win win = MainWindow() if other_wm: # We need this to obtain the screen geometry when i3ipc module unavailable win.resize(1, 1) win.show_all() global geometry # If we're not on sway neither i3, this won't return values until the window actually shows up. # Let's try as many times as needed. The retries int protects from an infinite loop. retries = 0 while geometry[0] == 0 and geometry[1] == 0 and geometry[ 2] == 0 and geometry[3] == 0: geometry = display_geometry() retries += 1 if retries > 500: print("\nFailed to get the current screen geometry, exiting...\n") sys.exit(2) x, y, w, h = geometry if not other_wm: win.resize(w, h) else: win.resize(1, 1) win.set_gravity(Gdk.Gravity.CENTER) if pynput: x, y = mouse_pointer.position win.move(x, y) else: win.move(0, 0) print("\nYou need the python-pynput package!\n") win.set_skip_taskbar_hint(True) win.menu = build_menu() win.menu.set_property("name", "menu") global menu_items_list menu_items_list = win.menu.get_children() win.menu.propagate_key_event = False win.menu.connect("key-release-event", win.search_items) # Let's reserve some width for long entries found with the search box if args.w: win.menu.set_property("width_request", args.w) else: win.menu.set_property("width_request", int(win.screen_dimensions[0] / 8)) win.show_all() GLib.timeout_add(args.d, open_menu) Gtk.main()
def download_api_statuses(input: str, config_file: str = None, time_sleep: float = 1.1): """Goes to twitter API an get status info (hydratated) and saves into a json file (in "json" dir) and if Elasticsearch is identified send it too Arguments: input {str} -- [description] elasticsearch_url {str} -- [description] Keyword Arguments: elasticuser {str} -- [description] (default: {None}) elasticpass {str} -- [description] (default: {None}) elasticsearch_index {str} -- [description] (default: {STATUSES_INDEX}) """ settings = Settings()._load_config(config_file) # Create a connection with Elastic if settings.elasticsearch_url is not None: es = Elasticsearch(settings.elasticsearch_url) logger.info(es.info()) else: es = None # Check if time_sleep is more than 1.1 secs try: assert time_sleep >= 1.1 except: logger.error("Time Sleep less than 1.1 secs (minimum) ") raise err # Read MSGPACK file whith statuses id if ".msg" in input.lower(): df = pd.read_msgpack(input) elif ".csv" in input.lower(): df = pd.read_csv(input) api = twitter.Api(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, access_token_key=ACCESS_TOKEN_KEY, access_token_secret=ACCESS_TOKEN_SECRET, tweet_mode='extended') all_statuses_id = df['STATUS_ID'].tolist() # Go to Twitter API and get statuses by id logger.info("Downloading Statuses from 🐦 API") all_statuses_data = hydratate_status(api, all_statuses_id, time_sleep) # Save all jsons to file and load into Elastic logger.info("Processing Statuses from 🐦 API to save jsons") for c_status_data in tqdm(all_statuses_data): cur_dict = Cut(c_status_data.AsDict()) cur_id_str = cur_dict['id_str'] # Fix twitter dates to more 'standart' date format list_all_keys_w_dots = dotter(cur_dict.data, '', []) try: for created_at_keys in list_all_keys_w_dots: if 'created_at' in created_at_keys: cur_dt = arrow.get(cur_dict[created_at_keys], TWITTER_DATETIME_PATTERN) cur_dict[created_at_keys] = cur_dt.format( "YYYY-MM-DDTHH:MM:SS") + "Z" except: logger.error("Error parsing dates on %s" % cur_id_str) # To ES improved data cur_json = _prepare_json_status(c_status_data) # TO FS orginal data backup cur_json_backup = json.dumps(c_status_data.AsDict(), indent=4) save_json(cur_json_backup, settings.status_json_backup + cur_id_str + ".json") if es is not None: es.index(index=elasticsearch_index, doc_type='status', id=cur_id_str, body=cur_json)
def init_crawler(params): """ 初始化爬取工作 :param params: :return: """ # 图幅的中心点 request_points = OrderedDict() # 四个角坐标 nepoint = params["north_east_point"] # sepoint = params["south_east_point"] swpoint = params["south_west_point"] # nwpoint = params["north_west_point"] # 边界 s_boundary, w_boundary = swpoint n_boundary, e_boundary = nepoint s_boundary = int(s_boundary) w_boundary = int(w_boundary) n_boundary = int(n_boundary + 0.5) e_boundary = int(e_boundary + 0.5) print s_boundary print w_boundary print n_boundary print e_boundary if abs(n_boundary - s_boundary) <= 1 and abs(e_boundary - w_boundary) <= 1: #小图幅 center_point = params["center_point"] #取出中心点 value = {} value["req_pnt"] = center_point #请求点 imgs = do_get(*center_point) #拿中心点请求,看是否正常 # 如果图幅正常,而且有范围exetent if imgs != None and len(imgs) > 0 and len(imgs[0]) >= 3: value["extent"] = imgs[0][2] request_points["0,0"] = value else: #不正常,len(request_points) is 0 pass else: #大图幅 # 按1°分为一个图幅 row = 0 for x in range(s_boundary, n_boundary, 1): col = 0 for y in range(w_boundary, e_boundary, 1): sheet_num = str(row) + ',' + str(col) value = {} value["req_pnt"] = (x, y) imgs = do_get(x, y) # 如果图幅正常,而且有范围exetent if imgs != None and len(imgs) > 0 and len(imgs[0]) >= 3: value["extent"] = imgs[0][2] else: value["extent"] = None request_points[sheet_num] = value col += 1 row += 1 save_path = save_json(params["out_dir"], u"1 request_points - 1度为步长的centerpoint与extent", request_points) print u"[FILE] 1度为步长的centerpoint与extent:{}".format(save_path) # 根据extent将请求点去重 prior_extent = None for key, value in request_points.items(): if prior_extent is None: prior_extent = value["extent"] continue else: now_extent = value["extent"] if prior_extent == now_extent: request_points.pop(key) else: prior_extent = now_extent # 计算request_point的长度,extent not None才是正常 img_len = 0 for key, value in request_points.items(): if value["extent"] is not None: img_len += 1 else: continue save_path = save_json(params["out_dir"], u"2 request_points - 每次爬取的中心点坐标", request_points) print u"[FILE] 每次爬取的中心点坐标:{}".format(save_path) print "【进程】每次需要爬取{}张图片".format(img_len) return img_len, request_points
def log_url_info(self, url_info): url_info_filename = os.path.join(self.log_folder, 'url_info.json') tools.save_json(url_info_filename, url_info)