def populate(self): """ Populate the agenda. """ self.events = {} for filename in self.filenames: root = orgparse.load(filename) for node in root.env.nodes[1:]: heading = node.heading heading = re.sub("\[.*\]|\<.*\>|NEXT|TODO", "", heading) heading = heading.strip() if node.deadline: d = node.deadline.start key = d.year, d.month, d.day day_events = self.events.get(key, []) day_events.append( Event(heading, node.deadline.start, special=True)) self.events[key] = day_events for date in node.get_timestamps(active=True, point=True, range=True): d = date.start key = d.year, d.month, d.day day_events = self.events.get(key, []) day_events.append(Event(heading, date.start, date.end)) self.events[key] = day_events # Sort events inside a day for date in self.events.keys(): self.events[date].sort()
def org_path2df(path: str) -> pd.DataFrame: """ convert path of org file to pandas dataframe """ root = load(path) df = org2df(root) return df
def uploadfile(self): """ Parse The Keywords Org File and add them to the sqlite database in same structure """ if os.path.exists("dict/bagofwords.db"): os.remove("dict/bagofwords.db") client = sqlite3.connect("dict/bagofwords.db") db = client.cursor() db.execute("""CREATE TABLE words (id text, keyword text, parent text)""") data = org.load("dict/bagofwords.org") for x in data[1:]: temp = {} id = bytes(x.heading.lower(), "utf-8") id = hashlib.md5(id).hexdigest() temp["_id"] = id temp["keyword"] = x.heading.lower() if x.parent is data: temp["parent"] = "" else: parent = bytes(x.parent.heading.lower(), "utf-8") parent = hashlib.md5(parent).hexdigest() temp["parent"] = parent db.execute( "INSERT INTO words VALUES (?,?,?)", (temp["_id"], temp["keyword"], temp["parent"]), ) client.commit() client.close()
def glucose_ketones_data() -> Iterable[Result]: o = orgparse.load(config.blood_log) [n] = [x for x in o if x.heading == 'glucose/ketones'] tbl = one_table(n) # todo some sort of sql-like interface for org tables might be ideal? for l in tbl.as_dicts: kets = l['ket'] glus = l['glu'] extra = l['notes'] dt = parse_org_datetime(l['datetime']) try: assert isinstance(dt, datetime) ket = try_float(kets) glu = try_float(glus) except Exception as e: ex = RuntimeError(f'While parsing {l}') ex.__cause__ = e yield ex else: yield Entry( dt=dt, ketones=ket, glucose=glu, extra=extra, )
def get_events(self): from orgparse import load import os.path events = {} for sync_info in self.__sync_list: try: root = load(os.path.expanduser(sync_info['org_file'])) except ValueError as e: raise ValueError('Faled to parse {}: {} '.format( sync_info['org_file'], e)) for node in root[1:]: sched_event = self.__to_event(sync_info['calendar_id'], node, node.scheduled, id_prefix='S-') if sched_event is None: continue events[sched_event.get_org_id()] = sched_event for node in root[1:]: deadline_event = self.__to_event(sync_info['calendar_id'], node, node.deadline, id_prefix='D-', title_prefix='締切 : ') if deadline_event is None: continue events[deadline_event.get_org_id()] = deadline_event return events
def iter_sleep_table() -> Iterator[Result]: # TODO yield error if time is weird? # TODO make sure time is unique def parse_row(row): dreamss = row['dreams'] mentals = row['mental'] wakeup = row['wakeup'] dreams = set(rpunct(dreamss).split()) extra = dreams.difference({'0', '1', '2'}) assert len(extra) == 0, extra rdreams = _dream_score_map.get(frozenset({int(x) for x in dreams}), None) assert rdreams is not None vals = {'0', '1', '2'} toks = set(rpunct(mentals).split()) nums = list(toks.intersection(vals)) assert len(nums) == 1, nums rmental = float(nums[0]) extra = toks.difference(vals) if rmental == 1.0 and 'sleepy' in extra: rmental -= 0.5 # meh return (rdreams, rmental, wakeup) import orgparse o = orgparse.load(user_config.sleep_log) table = one_table(o) # TODO use TypedTable, similar to cross_trainder df? for row in table.as_dicts: ex = RuntimeError(f'While parsing {row}') # first try to determine the timestamp (for better exception message) try: dt = parse_org_datetime(row['date']) # todo hmm. not sure if should localize here... maybe make a version of localize that falls back on utc? dt = TZ.localize(dt) except Exception as e: ex.__cause__ = e yield ex continue set_error_datetime(ex, dt) try: (dreams, mental, wakeup) = parse_row(row) except Exception as e: ex.__cause__ = e yield ex continue yield Entry( dt=dt, dreams=dreams, mental=mental, wakeup=wakeup, )
def org_to_roam(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser( description="Convert org FILE to Roam markdown.", ) parser.add_argument("file") args = parser.parse_args() org_root = orgparse.load(args.file) roam_page = org_root_to_roam_page(org_root) print(roam_page) return 0
def getAgenda(): taskList = [] for orgAgendaFile in settings["org_agenda_files"]: # Parse org file for any scheduled tasks orgRoot = orgparse.load(orgAgendaFile) scheduledTasks = getAllOrgScheduledTasks_Recursive(orgRoot) for taskKey, taskDatePair in scheduledTasks.items(): taskList.append((convertDateDateTime(taskDatePair[1].start), taskDatePair[0])) sortedTaskList = sorted(taskList, key = lambda task: task[0]) return sortedTaskList
def find_replace_note(self, message): filepath = self.get_org_file() root = load(filepath) for node in root[1:]: if node.get_property('ID') == self.get_note_id(message): old_item = "{}".format(node) new_item = old_item.replace(node.heading, message.text) with open(filepath, 'r+') as f: file_source = f.read() replace_string = file_source.replace(old_item, new_item) f.write(replace_string)
def ganttify(filename, output='gantt.png'): root = load(filename) set_dates_in_parent_tasks(root[1:],plan) plan.reverse() colors = {'summary': 'rgb(0, 0, 0)', 'detail': 'rgb(50, 70, 238)'} fig = ff.create_gantt(plan,colors=colors, index_col='Type') #fig.show() fig.layout.title="Build the dream home... or the Home build dream" fig.layout.yaxis['tickfont'] = {'family': 'Courier New', 'size': 8} fig.layout.xaxis['rangeselector']={} fig.layout.yaxis['showgrid']=True fig.layout.xaxis['showgrid']=True fig.write_image(output)
def overrides() -> Dict[str, str]: ''' Manual overrides for some entries with typos etc, to simplify further automated parsing ''' # to dump the initial table: # sqlite3 taplog.db 'SELECT printf("| %6d | %s |", _id, lower(note)) FROM log WHERE cat1="ex" ORDER BY lower(note)' import orgparse wlog = orgparse.load(user_config.workout_log) [table] = collect( wlog, lambda n: [] if n.heading != 'Taplog overrides' else [x for x in n.body_rich if isinstance(x, Table)]) res = {} for row in table.as_dicts: id = row['id'] note = row['note'] res[id] = note return res
def from_file(Self: "Org", fd: TextIO): build = Builder(Klass=Self, immutable=True) parsed = orgparse.load(fd) # read timestamp from filename created = Path(fd.name).name.split("-", maxsplit=1)[0] created = datetime.strptime(created, "%Y%m%d%H%M%S") build.add(created=created) # parsing meta information from root node root = parsed[0] build.add(id=root.get_property("ID")) meta = _parse_meta(root.body) build.add( breadcrumbs=tuple(meta["title"].split(".")), category=meta.get("category", None), ) nodes = 0 states = Counter() for node in parsed[1:]: nodes += 1 # skip aggregator nodes if node.heading in AGGREGATORS: continue if node.todo is not None: state = State(node.todo) states[state] += 1 if state in TODO: states[State.ALL_TODO] += 1 elif state in DONE: states[State.ALL_DONE] += 1 else: assert False, state build.add(nodes=nodes) build.add(states=dict(states)) return build()
def metadataGetOrgProperties(metadata): if not os.path.exists(metadata.contentFile): print('ERROR: Could not find associated org file "{}" for content file "{}"\n' '\tThe file will be missing necessary metadata'.format(metadata.contentFile, metadata.renderedFilePath)) return orgRoot = orgparse.load(metadata.contentFile) for node in orgRoot[1:]: for property, value in node.properties.items(): if property == "PUBLISHED": metadata.properties[property] = datetime.datetime.strptime(value, '%Y-%m-%d') else: metadata.properties[property] = value # Set TITLE as the first node if it's not a property if not metadata.properties["TITLE"]: metadata.properties["TITLE"] = node.heading
def process_file(self, filename: str): """Process an org file and save into database""" # Scan text before first heading to find tag hierarchies. # Tag hierarchies are preserved across all files to keep # the program simple. org_file = load(filename) for line in str(org_file).split('\n'): e = r"^#\+tags:\s+[\[{]\s+([a-z_@]+)\s+:\s+(?:([a-z_@]+)\s+)+[\]}]" m = regex.match(e, line, regex.I) if m: parent_tag = m.groups()[0] self.tag_hierarchy[parent_tag] = m.captures(2) self.__build_tag_ancestors() for node in org_file.children: self.__walk_org_node(filename, node) self.save()
def get_checklist_items(self): self.webdav = webdav3.client.Client({ 'webdav_hostname': config['REPOSITORY_WEBDAV_URL'], 'webdav_login': config['REPOSITORY_WEBDAV_USER'], 'webdav_password': config['REPOSITORY_WEBDAV_PASSWORD'], }) self.webdav.check('/checklist_flight_planning.org') self.webdav.download_sync( remote_path='/checklist_flight_planning.org', local_path='/tmp/checklist_flight_planning.org') self.checklist = orgparse.load('/tmp/checklist_flight_planning.org') checklist_items = self.checklist.children[6][1:] return [item.heading for item in checklist_items]
def cross_trainer_data(): # FIXME some manual entries in python # I guess just convert them to org import orgparse # todo should use all org notes and just query from them? wlog = orgparse.load(config.workout_log) [table] = collect( wlog, lambda n: [] if n.heading != 'Cross training' else [x for x in n.body_rich if isinstance(x, Table)] ) cross_table = TypedTable(table) def maybe(f): def parse(s): if len(s) == 0: return None return f(s) return parse def parse_mm_ss(x: str) -> timedelta: hs, ms = x.split(':') return timedelta(seconds=int(hs) * 60 + int(ms)) # todo eh. not sure if there is a way of getting around writing code... # I guess would be nice to have a means of specifying type in the column? maybe multirow column names?? # need to look up org-mode standard.. mappers = { 'duration': lambda s: parse_mm_ss(s), 'date' : lambda s: tzify(parse_org_datetime(s)), 'comment' : str, } for row in cross_table.as_dicts: # todo make more defensive, fallback on nan for individual fields?? try: d = {} for k, v in row.items(): # todo have something smarter... e.g. allow pandas to infer the type?? mapper = mappers.get(k, maybe(float)) d[k] = mapper(v) # type: ignore[operator] yield d except Exception as e: # todo add parsing context yield {'error': str(e)}
def extract_from_file(fname: PathIsh) -> Results: """ Note that org-mode doesn't keep timezone, so we don't really have choice but make it tz-agnostic """ path = Path(fname) o = orgparse.load(str(path)) root = o.root fallback_dt = file_mtime(path) for wr in walk_node(node=root, dt=fallback_dt): if isinstance(wr, Exception): yield wr continue (parsed, node) = wr dt = parsed.dt assert dt is not None # shouldn't be because of fallback for r in iter_org_urls(node): # TODO get body recursively? not sure try: # maybe use a similar technique as in exercise parser? e.g. descent until we mee a candidate that worth a separate context? tagss = '' if len( node.tags) == 0 else f' :{":".join(sorted(node.tags))}:' # TODO not sure... perhaps keep the whole heading intact? unclear how to handle file tags though ctx = parsed.heading + tagss + '\n' + get_body_compat(node) except Exception as e: yield e ctx = 'ERROR' # TODO more context? if isinstance(r, Url): yield Visit( url=r, dt=dt, locator=Loc.file( fname, line=getattr( node, 'linenumber', None ), # make it defensive so it works against older orgparse (pre 0.2) ), context=ctx, ) else: # error yield r
def dedupe_notes(orgdata): """Remove duplicate entries in an org-mode file.""" orgtree = orgparse.load(orgdata) # Flatten the org tree so we can see level 2 nodes nodes_lvl1 = [child for parent in orgtree.children for child in parent.children] this_node = nodes_lvl1[0] ids_lvl1 = [node.properties['Custom_ID'] for node in nodes_lvl1 if 'Custom_ID' in node.properties] # Go through each node that appears more than once and see if it's an exact duplicate duplicates = find_duplicates(ids_lvl1) exact_duplicates = [] inexact_duplicates = [] for nodeid in duplicates: duplicate_ids = [n for n in nodes_lvl1 if n.properties.get('Custom_ID') == nodeid] is_exact_match, attrs = nodes_are_equal(*duplicate_ids) if is_exact_match: exact_duplicates.append(nodeid) else: inexact_duplicates.append("{} ({})".format(nodeid, attrs)) return exact_duplicates, inexact_duplicates
def _get_outlines(f: Path) -> Iterable[Res[OrgNode]]: def extract(cur: OrgNode) -> Iterable[Res[OrgNode]]: has_org_tag = _TAG in cur.tags if has_org_tag: heading = cur.heading kinds = parser.kinds(heading) if len(kinds) == 1: yield cur return else: yield parse_error( RuntimeError(f'expected single match, got {kinds}'), org=cur, ) for c in cur.children: yield from extract(c) yield from extract(orgparse.load(f))
def main(): raw_file = open('assets/test.org') root = load('assets/test.org') output = open('assets/test.md', 'w') frontmatter = parse_frontmatter(raw_file) frontmatter = convert_frontmatter(frontmatter) for item in frontmatter: output.write(item) for n in root[1:]: h = make_heading(n) output.write(h) source = parse_source_block(n) if source: for item in source: output.write(item) output.close()
def get_title(filename): logging.info("Getting title of %s", filename) try: org = orgparse.load(filename) except FileNotFoundError as e: logging.info("FileNotFoundError: %s", e) logging.info("looking in root dir") return get_title(os.path.basename(filename)) except e: logging.info("Error: %s", e) try: x = org.get_file_property("title") if x: return x except RuntimeError as e: logging.info("Trying to see if title was list") x = org.get_file_property_list("title") if x: return ": ".join(x) except _: logging.info("Unrecognized error getting title, falling back") return filename #fallback
def blood_tests_data() -> Iterable[Result]: o = orgparse.load(config.blood_tests_log) tbl = one_table(o) for d in tbl.as_dicts: try: dt = parse_org_datetime(d['datetime']) assert isinstance(dt, datetime), dt F = lambda n: try_float(d[n]) yield Entry( dt=dt, vitamin_d=F('VD nm/L'), vitamin_b12=F('B12 pm/L'), hdl=F('HDL mm/L'), ldl=F('LDL mm/L'), triglycerides=F('Trig mm/L'), source=d['source'], extra=d['notes'], ) except Exception as e: ex = RuntimeError(f'While parsing {d}') ex.__cause__ = e yield ex
def _iterate(self, f: Path) -> Iterable[OrgNote]: o = orgparse.load(f) for x in o: yield to_note(x)
def collect_all(self, collector) -> Iterable[orgparse.OrgNode]: for f in self.files: o = orgparse.load(f) yield from collect(o, collector)
tx.run('UNWIND $nodes as row ' 'MATCH (atom:Atom {id: row.id}) ' 'SET atom += row.properties ', nodes=nodes) def document_to_neo4j(root, nodes, driver): with driver.session() as session: session.write_transaction(add_atoms, nodes) session.write_transaction(add_root, root) def parse_arguments(): parser = argparse.ArgumentParser(description='Upload directory of org files to neo4j.') parser.add_argument('--org_directory', help='Path to directory with org files in it') parser.add_argument('--neo4j_db_uri', help='URI for neo4j DB') parser.add_argument('--neo4j_db_username', help='username for neo4j DB') parser.add_argument('--neo4j_db_password', help='password for neo4j DB') return parser.parse_args() if __name__ == '__main__': args = parse_arguments() org_directory = args.org_directory or input('Specify org directory here: ') neo4j_db_uri = args.neo4j_db_uri or os.getenv('NEO4J_DB_URI') or input('Specify Neo4j DB URI here: ') neo4j_db_username = args.neo4j_db_username or os.getenv('NEO4J_DB_USER') or input('Specify Neo4j DB username here: ') neo4j_db_password = args.neo4j_db_password or os.getenv('NEO4J_DB_PASS') or getpass('Specify Neo4j DB password here: ') driver = GraphDatabase.driver(neo4j_db_uri, auth=(neo4j_db_username, neo4j_db_password)) for org_file in tqdm(glob(os.path.join(os.path.expanduser(org_directory), '*.org'))): orgparse_tree = orgparse.load(org_file) filename = os.path.basename(org_file) root, nodes = extract_nodes(orgparse_tree, filename) document_to_neo4j(root, nodes, driver) driver.close()
def list_todos(file) -> List[Text]: org = load(file) todos = filter(lambda node: node.todo == "TODO", org[1:]) headlines = map(lambda node: node.heading, todos) return list(headlines)
def __init__(self, org_file="final.org", anki_dir="tmp"): self.tree = orgparse.load(org_file) self.max_depth = self.get_max_depth() self.mode = Parser.INCLUDE_MODE self.basic_cards = {} self.cloze_cards = []
def get_orgnodes(filename): """Cached version of `orgparse.load`.""" return get_cache("org:{0}".format(filename), lambda: list(orgparse.load(filename)[1:]), os.path.getmtime(filename))
def parse(self): root = orgparse.load(self.filename) slides = [] buffer = [] # iterates over the root *, aka the slides for i in root.children: level = 0 for j in i[:]: fg = i.get_property("fg") if fg is not None: buffer.append( BlockHtml(obj={ "type": "block_html", "text": "<!-- fg=" + fg + " -->" })) bg = i.get_property("bg") if bg is not None: buffer.append( BlockHtml(obj={ "type": "block_html", "text": "<!-- bg=" + bg + " -->" })) effect = i.get_property("effect") if effect is not None: buffer.append( BlockHtml( obj={ "type": "block_html", "text": "<!-- effect=" + effect + " -->", })) if bool(j.heading): level = level + 1 buffer.append( Heading(obj={ "children": [{ "text": j.heading }], "level": level })) if bool(j.body): t = str(j.body) # if true that means we have unordered lists, and we gotta parse them ;-; if "\n- " in t: lines = t.split("\n") text = [] # iterates over the lines for line in lines: # checks if it's part of a list if line.strip().startswith("- "): # gets how many spaces there are, so that the level can be calculated spaces = line.split("-")[0] level = 1 # calculates level for i in range(len(spaces)): level = level + 0.5 level = int(level) # checks if we have to make a new list element, because the last one is not a list if (len(text) != 0 and type(text[-1]) is not str and text[-1].get("type") == "list"): # if it's a item on another level, we have to append it differently, the markdown parsing lib is weird #print(level, text) if level >= 2: child = text[-1]["children"][-1][ 'children'] for i in range(level - 1): child = child[-1][ 'children'] #[-1]['children'] #print() child.append({ "type": "list", "children": [self.__list__(line, level)], 'ordered': False, 'level': level }) else: text[-1].get("children").append( self.__list__(line, level)) else: text.append({ "type": "list", "children": [self.__list__(line, 1)], }) else: if (len(text) != 0 and text[-1] != 0 and type(text[-1]) is not dict): text[-1] = text[-1] + ("\n" + line) else: text.append(line) print(text) for element in text: if type(element) is str: buffer.append(self.__text__(element)) elif (type(element) is dict and element.get("type") == "list"): buffer.append(List(obj=element)) else: buffer.append(self.__text__(t)) slides.append(Slide(elements=buffer)) buffer = [] return slides
def preprocess_string(self, file_source: str) -> str: res = "" for line_num, line in enumerate(file_source.splitlines(keepends=True), start=1): m = self.REGEXP_COMMAND.search(line) if m: level = len(m.group("stars")) command = m.group("command") parts = m.group("args").split("@") if len(parts) == 1: include_title = parts[0] include_path = self.origin_file else: include_title = parts[0] include_path = parts[1] or self.origin_file include_org_file = orgparse.load(str(include_path)) self.current_file = include_path if command == "OL": res += line node = self._find_node_in_tree(include_title, include_org_file) if node is None: self._abort_preprocessing(include_title, line_num) return "" res += self._process_body(node._lines[1:]) for child in node.children: res += self._include_node(level + 1, child) elif command == "OI": node = self._find_node_in_tree(include_title, include_org_file) if node is None: self._abort_preprocessing(include_title, line_num) return "" res += self._process_body(node._lines[1:]) for child in node.children: res += self._include_node(level, child) elif command == "OIS": node = self._find_node_in_tree(include_title, include_org_file) if node is None: self._abort_preprocessing(include_title, line_num) return "" res += self._process_body(node._lines[1:]) else: # This branch is here in case any of the titles start with [TAG] prefix res += line else: res += line return res
def get_orgnodes(filename): """Cached version of `orgparse.load`.""" return get_cache('org:{0}'.format(filename), lambda: list(orgparse.load(filename)[1:]), os.path.getmtime(filename))