def topics_from_metadata(self, parent_topic: Topic) -> List[Topic]: # note: some papers do not have categories (especially very old ones) categories = (doc_dict['categories'] for doc_dict in util.json_read(self.file_docs) if doc_dict['categories']) topic_ids = sorted(set(util.flatten(categories, generator=True))) topics = [parent_topic.add_child(topic_id) for topic_id in topic_ids] return topics
def topic_stats(topic_file: str): print("gathering stats for topics in", topic_file) dc_dict = util.json_read(topic_file) dc = DocumentCollection.from_dict(dc_dict) flat_topics = util.flatten( (doc.topics or [] for doc in dc.documents.values()), generator=True) c = Counter(flat_topics) for topic, count in c.most_common(): print("{}: {} ({})".format(topic.topic_id, count, topic.tokens))
def index(): setting_json = util.json_read() wait_now = setting_json['wait'] release_now = setting_json['release'] interval_now = setting_json['interval'] return render_template('index.html', wait=wait_now, release=release_now, interval=interval_now)
def start(): util.systemd_control('start') setting_json = util.json_read() wait_now = setting_json['wait'] release_now = setting_json['release'] interval_now = setting_json['interval'] return render_template('index.html', wait=wait_now, release=release_now, interval=interval_now)
def docs_from_metadata(self, topics: List[Topic]) -> Dict[str, Document]: # restore documents topic_dict = {t.topic_id: t for t in topics} documents = Document.restore_documents(util.json_read(self.file_docs), topic_dict) # add topics to documents (one for each category) if self.category_layer: for doc in documents.values(): if doc.categories: for category in doc.categories: doc.add_topic(topic_dict[category], 1.0) else: logger.warning("Document {} has no categories!".format( doc.doc_id)) return documents
def setting(): wait_upd = request.form['wait'] release_upd = request.form['release'] interval_upd = request.form['interval'] setting_json = util.json_read() setting_json['wait'] = wait_upd setting_json['release'] = release_upd setting_json['interval'] = interval_upd util.json_write(setting_json) util.systemd_control('stop') util.GPIO_init() util.systemd_control('start') return render_template('index.html', wait=wait_upd, release=release_upd, interval=interval_upd)
def maintenance(): # 初期設定等で長時間全開にするためのモード wait_upd = "0" release_upd = "3600" interval_upd = "0" setting_json = util.json_read() setting_json['wait'] = wait_upd setting_json['release'] = release_upd setting_json['interval'] = interval_upd util.json_write(setting_json) util.systemd_control('stop') util.GPIO_init() util.systemd_control('start') return render_template('index.html', wait=wait_upd, release=release_upd, interval=interval_upd)
def main(): # GPIO初期化 GPIO.cleanup() GPIO.setmode(GPIO.BCM) GPIO.setup(4, GPIO.OUT) # 設定読み込み setting_json = util.json_read() wait = float(setting_json['wait']) release = float(setting_json['release']) interval = float(setting_json['interval']) # 指定秒だけウェイト time.sleep(wait) # メインループ while True: time.sleep(interval) GPIO.output(4, True) time.sleep(release) GPIO.output(4, False)
def test_document_topics(self): # get best matching documents + URLs per topic topic_model = DocumentCollection.from_dict( util.json_read(self.file_topics)) docs_by_first_topic = defaultdict(list) # group documents by first topic for id, doc in topic_model.documents.items(): if doc.topics: topic, score = doc.topics[0] docs_by_first_topic[topic].append((id, score)) else: logger.warning("document {} has no topics".format(doc.doc_id)) # sort by score descending for doc_list in docs_by_first_topic.values(): doc_list.sort(key=lambda x: x[1], reverse=True) # print highest scoring documents for each topic for topic in topic_model.topics.values(): print("Topic {}: {}".format(topic.topic_id, topic.tokens)) for doc_id, score in docs_by_first_topic[topic.topic_id][:10]: print("paper https://arxiv.org/abs/{} with score {}".format( doc_id.replace('-', '/'), score))
def test_json(): old_data = {'key': [1, 2, 3]} jdata = json_write(old_data) new_data = json_read(jdata) assert_equal(old_data, new_data)
def docs_from_ids(self) -> Dict[str, Document]: return { doc_id: Document(doc_id) for doc_id in util.json_read(self.file_ids) }