Exemplo n.º 1
0
def set_titles_from_spreadsheet(dir_path, dry_run=False):
    MdFile.fix_titles(
        md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path),
        spreadhsheet_id="1sNH1AWhhoa5VATqMdLbF652s7srTG0Raa6K-sCwDR-8",
        worksheet_name="कुम्भकोणाध्यायाः", id_column="क्रमाङ्कम्", title_column="अन्तिमशीर्षिका", md_file_to_id=mahaabhaarata.get_adhyaaya_id, dry_run=dry_run
    )
    MdFile.devanaagarify_titles(md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), dry_run=dry_run)
def dump_text(base_dir):
    unit_info_file = os.path.join(os.path.dirname(text_data.__file__),
                                  "vedaH/vAjasaneyi/samhitA.json")

    for kaanda_index in text_data.get_subunit_list(json_file=unit_info_file,
                                                   unit_path_list=[]):
        logging.info("adhyAya %d", kaanda_index)

        outfile_path = os.path.join(base_dir, "%02d.md" % (kaanda_index))
        if os.path.exists(outfile_path):
            logging.info("Skipping " + outfile_path)
            continue

        url = "http://vedicheritage.gov.in/samhitas/yajurveda/shukla-yajurveda/vajasaneyi-kanva-samhita-chapter-%02d/" % (
            kaanda_index)
        logging.info("url %s to %s", url, outfile_path)
        browser.get(url=url)
        try:
            text = browser.find_element_by_id("videotext").text
            text = text.replace("\n", "  \n")
            title = "%02d" % kaanda_index
            title = sanscript.transliterate(title, sanscript.HK,
                                            sanscript.DEVANAGARI)
            md_file = MdFile(file_path=outfile_path)
            md_file.dump_to_file(metadata={"title": title},
                                 md=text,
                                 dry_run=False)
        except NoSuchElementException:
            logging.warning("Page missing! %s ", url)
Exemplo n.º 3
0
def dump_devanaagarii(source_html, dest_file):
    if os.path.exists(dest_file):
        logging.warning("Skipping %s as it exists", dest_file)
        return
    logging.info("Processing %s to %s", source_html, dest_file)
    with codecs.open(source_html, "r", 'utf-8') as file_in:
        contents = file_in.read()
        soup = BeautifulSoup(contents, 'lxml')
        metadata = {}
        metadata["title"] = soup.title.text.strip()
        lines = soup.text.split("\n")
        english_lines = itertools.takewhile(
            lambda x: x.strip() !=
            "http://gretil.sub.uni-goettingen.de/gretil.htm", lines)
        intro = "\n\n## Intro\n%s" % ("  \n".join(english_lines))
        iast_lines = itertools.dropwhile(
            lambda x: x.strip() !=
            "http://gretil.sub.uni-goettingen.de/gretil.htm", lines)
        text = "  \n".join(list(iast_lines)[1:])
        text = regex.sub("(  \n){3,}", "\n\n", text)
        text = sanscript.transliterate(data=text,
                                       _from=sanscript.IAST,
                                       _to=sanscript.DEVANAGARI)
        text = "%s\n\n## पाठः\n%s" % (intro, text)
        out_file = MdFile(file_path=dest_file, frontmatter_type="toml")
        out_file.dump_to_file(metadata=metadata, md=text, dry_run=False)
Exemplo n.º 4
0
def dump_text(base_dir):
    unit_info_file = os.path.join(os.path.dirname(text_data.__file__),
                                  "vedaH/shaunaka/samhitA.json")

    for kaanda_index in text_data.get_subunit_list(json_file=unit_info_file,
                                                   unit_path_list=[]):
        subunit_list = text_data.get_subunit_list(
            json_file=unit_info_file, unit_path_list=[kaanda_index])
        for subunit_index in subunit_list:
            logging.info("kaanDa %d adhyaaya %d", kaanda_index, subunit_index)

            outfile_path = os.path.join(base_dir, "%02d" % (kaanda_index),
                                        "%03d.md" % subunit_index)
            if os.path.exists(outfile_path):
                logging.info("Skipping " + outfile_path)
                continue

            url = "http://vedicheritage.gov.in/samhitas/atharvaveda-samhitas/shaunaka-samhita/kanda-%02d-sukta-%03d/" % (
                kaanda_index, subunit_index)
            logging.info("url %s to %s", url, outfile_path)
            browser.get(url=url)
            text = browser.find_element_by_id("videotext").text
            text = text.replace("\n", "  \n")
            title_tags = browser.find_elements_by_css_selector(
                "#videotext  strong")
            title = "%03d" % subunit_index
            if len(title_tags) > 0:
                title = "%03d %s" % (subunit_index, title_tags[0].text)
            title = sanscript.transliterate(title, sanscript.HK,
                                            sanscript.DEVANAGARI)
            md_file = MdFile(file_path=outfile_path)
            md_file.dump_to_file(metadata={"title": title},
                                 md=text,
                                 dry_run=False)
Exemplo n.º 5
0
def dump_markdown(src_file, dest_file):
    logging.info("Processing %s to %s", src_file, dest_file)
    metadata = get_metadata(src_file=src_file)
    text = get_text(src_file=src_file)
    metadata["title"] = sanscript.transliterate(data=metadata["itxtitle"],
                                                _from=sanscript.OPTITRANS,
                                                _to=sanscript.DEVANAGARI)
    md_file = MdFile(file_path=dest_file, frontmatter_type=MdFile.TOML)
    md_file.dump_to_file(metadata=metadata, md=text, dry_run=False)
Exemplo n.º 6
0
def fix_paths(dir_path, dry_run=False):
    files = glob.glob(os.path.join(dir_path, '**/2*.md'), recursive=True)
    for file_path in files:
        base_name = os.path.basename(file_path)
        year_str = base_name.split("-")[0]
        month_str = base_name.split("-")[1]
        dest_path = os.path.join(dir_path, year_str, month_str, base_name)
        logging.info("Move %s to %s", file_path, dest_path)
        if not dry_run:
            os.makedirs(name=os.path.dirname(dest_path), exist_ok=True)
            shutil.move(src=file_path, dst=dest_path)
    MdFile.fix_index_files(dir_path=dir_path, dry_run=dry_run)
Exemplo n.º 7
0
def dump_all_texts(dest_dir, overwrite=False):
    soup = scraping.get_soup(url="https://adishila.com/unicodetxt-htm/")
    links = soup.select("div.wp-block-group a")
    for link in links:
        (title, text) = get_text(link["href"])
        filename = file_helper.clean_file_path("%s.md" % title)
        dest_path = os.path.join(dest_dir, filename)
        if not overwrite and os.path.exists(dest_path):
            logging.warning("Skipping %s since it exists", dest_path)
            continue
        logging.info("Getting %s", link["href"])
        md_file = MdFile(file_path=dest_path, frontmatter_type=MdFile.TOML)
        md_file.dump_to_file(metadata={"title": title}, md=text, dry_run=False)
Exemplo n.º 8
0
def get_adhyaaya_md_files(md_file_path):
    md_files = MdFile.get_md_files_from_path(
        dir_path=md_file_path,
        file_pattern="**/*.md",
        file_name_filter=lambda x: len(
            regex.findall("\\d\\d\\d", os.path.basename(x))) > 0)
    return md_files
Exemplo n.º 9
0
def scrape_post_markdown(url, dir_path, dry_run):
    #construct file_name from the posts url
    parsed_url = urlsplit(url=url)
    file_name = (parsed_url.path).strip()
    #remove slashes, replace with dashes when dealing with urls like https://manasataramgini.wordpress.com/2020/06/08/pandemic-days-the-fizz-is-out-of-the-bottle/
    file_name = regex.sub("/(....)/(..)/(..)/(.+)/", r"\1-\2-\3_\4.md", file_name)
    file_path = file_helper.clean_file_path(file_path=os.path.join(dir_path, file_name))

    if os.path.exists(file_path):
        logging.warning("Skipping %s : exists", file_name)
        return 
    (title, post_html) = get_post_html(url=url)
    logging.info("Dumping %s to %s with title %s.", url, file_path, title)

    md_file = MdFile(file_path=file_path, frontmatter_type=MdFile.TOML)
    md_file.import_content_with_pandoc(metadata={"title": title}, content=post_html, source_format="html", dry_run=dry_run)
Exemplo n.º 10
0
def dump_text(start_url,
              out_path,
              base_url="https://sa.wikisource.org/",
              transliteration_source=None,
              dry_run=False):
    next_url_getter = lambda soup: souper.next_url_from_soup_css(
        soup=soup, css="div.gen_header_forelink a", base_url=base_url)

    def html_fixer(soup):
        souper.tag_replacer(soup=soup, css_selector="big", tag_name="h2")
        souper.tag_replacer(soup=soup, css_selector="table", tag_name="div")
        souper.tag_replacer(soup=soup, css_selector="tbody", tag_name="div")
        souper.tag_replacer(soup=soup,
                            css_selector="span[style*=\"font-weight:bold;\"]",
                            tag_name="b")
        souper.tag_remover(soup=soup, css_selector=".noprint")

    def title_maker(soup, title_prefix):
        title = souper.title_from_element(soup=soup,
                                          title_css_selector="h1",
                                          title_prefix=title_prefix)
        title = regex.sub(" .+/", " ", title).strip()
        return title

    dumper = lambda url, outfile_path, title_prefix, dry_run: souper.dump_text_from_element(
        url=url,
        outfile_path=outfile_path,
        text_css_selector="div.mw-parser-output",
        title_maker=title_maker,
        title_prefix=title_prefix,
        html_fixer=html_fixer,
        dry_run=dry_run)
    souper.dump_series(start_url=start_url,
                       out_path=out_path,
                       dumper=dumper,
                       next_url_getter=next_url_getter,
                       dry_run=dry_run)
    MdFile.set_filenames_from_titles(
        dir_path=out_path,
        dry_run=dry_run,
        transliteration_source=transliteration_source)
    MdFile.fix_index_files(dir_path=out_path,
                           dry_run=dry_run,
                           transliteration_target=transliteration_source)
Exemplo n.º 11
0
def process_catalog_page_selenium(url, out_dir):
    logging.info("Processing catalog %s", url)
    browser.get(url=url)
    text_links = browser.find_elements_by_link_text("View in Unicode transliteration")
    if len(text_links) == 0:
        logging.warning("%s does not have text", url)
        return

    catalog_body = browser.find_element_by_css_selector(".catalog_record_body")
    metadata = get_front_matter(catalog_body.get_attribute('innerHTML'))
    logging.info(metadata)

    dest_file_path = get_file_path(out_dir=out_dir, title_iast=metadata["title_iast"], author_iast=metadata.get("author_iast", ""), catalog_number=metadata.get("Catalog number", ""))
    if os.path.exists(dest_file_path):
        logging.warning("Skipping %s - already exists.", dest_file_path)

    text_url = text_links[0].get_attribute("href")
    file = MdFile(file_path=dest_file_path, frontmatter_type="toml")
    text = get_text(url=text_url)
    text = text.replace("\n", "  \n")
    file.dump_to_file(metadata=metadata, md=text, dry_run=False)
Exemplo n.º 12
0
def transform():
    json_paths = glob.glob(
        "/home/vvasuki/sanskrit/raw_etexts/veda/Rg/shakala/saMhitA/sAyaNabhAShyam/*/*/*.json",
        recursive=True)
    suukta_id_to_md = {}
    for json_path in sorted(json_paths):
        with codecs.open(json_path, "r") as fp:
            rk = json.load(fp)
            suukta_id = "%02d/%03d" % (int(rk["classification"]["mandala"]),
                                       int(rk["classification"]["sukta"]))
            suukta_md = suukta_id_to_md.get(suukta_id, "")
            bhaashya = regex.sub("<.+?>", "", rk["sayanaBhashya"])
            rk_number = sanscript.transliterate(
                "%02d" % int(rk["classification"]["rik"]), sanscript.IAST,
                sanscript.DEVANAGARI)
            attribute_str = "%s। %s। %s।" % (rk["attribute"]["devata"],
                                             rk["attribute"]["rishi"],
                                             rk["attribute"]["chandas"])
            padapaatha_lines = rk["padapaatha"]["lines"]
            if isinstance(padapaatha_lines, str):
                padapaatha_lines = [padapaatha_lines]
            samhita_lines = rk["samhitaAux"]["lines"]
            if isinstance(samhita_lines, str):
                samhita_lines = [samhita_lines]
            rk_md = "%s\n\n%s %s॥\n\n%s\n\n%s" % (
                attribute_str, "  \n".join(samhita_lines), rk_number,
                "  \n".join(padapaatha_lines), bhaashya)
            suukta_md += "\n\n%s" % rk_md
            if bhaashya == "":
                logging.warning("No bhAShya for %s", rk["id"])
            suukta_id_to_md[suukta_id] = suukta_md

    for suukta_id in suukta_id_to_md.keys():
        dest_path = os.path.join(dest_dir, suukta_id + ".md")
        md_file = MdFile(file_path=dest_path)
        title = sanscript.transliterate(
            suukta_id.split("/")[-1], sanscript.IAST, sanscript.DEVANAGARI)
        md_file.dump_to_file(metadata={"title": title},
                             md=suukta_id_to_md[suukta_id],
                             dry_run=False)
Exemplo n.º 13
0
def test_panchanga_chennai_2019():
    panchaanga_2019 = Panchaanga.read_from_file(
        filename=os.path.join(TEST_DATA_PATH, 'Chennai-2019.json'))
    # We dump to md.txt rather than md to avoid slow checks on intellij ide.
    orig_md_file = os.path.join(TEST_DATA_PATH,
                                'Chennai-2019-devanagari.md.txt')
    current_md_output = os.path.join(TEST_DATA_PATH,
                                     'Chennai-2019-devanagari.md.txt.local')
    md_file = MdFile(file_path=current_md_output)
    md_file.dump_to_file(metadata={"title": str(2019)},
                         md=md.make_md(panchaanga=panchaanga_2019),
                         dry_run=False)
    if not os.path.exists(orig_md_file):
        logging.warning(
            "%s not present. Assuming that it was deliberately deleted to update test files.",
            orig_md_file)
        md_file = MdFile(file_path=orig_md_file)
        md_file.dump_to_file(metadata={"title": str(2019)},
                             md=md.make_md(panchaanga=panchaanga_2019),
                             dry_run=False)

    with open(orig_md_file) as orig_tex:
        with open(current_md_output) as current_tex:
            assert current_tex.read() == orig_tex.read()
Exemplo n.º 14
0
def dump_summary(year, city, script=xsanscript.DEVANAGARI, computation_system=ComputationSystem.MULTI_NEW_MOON_SIDEREAL_MONTH_ADHIKA__CHITRA_180):
  year_type = era.ERA_GREGORIAN
  logging.info("Generating summary panchaanga for %s year %d (%s), with computation system %s ", city.name, year, year_type, str(computation_system))
  panchaanga = annual.get_panchaanga_for_year(city=city, year=year, computation_system=computation_system, year_type=year_type, allow_precomputed=True)
  year_table = to_table_dict(panchaanga=panchaanga )
  out_path = get_canonical_path(city=panchaanga.city.name, computation_system_str=str(panchaanga.computation_system), year=year, year_type=year_type)
  os.makedirs(os.path.dirname(out_path), exist_ok=True)
  with codecs.open(out_path + ".toml", "w") as fp:
    toml.dump(year_table, fp)
  MdFile.fix_index_files(dir_path=output_dir, transliteration_target=None, dry_run=False)

  computation_params = get_computation_parameters_md(panchaanga=panchaanga, scripts=[script])
  out_path_md = out_path + "_summary.md"
  md = """## Intro\n%s\n\n## Table
  <div class="spreadsheet" src="../%s.toml" fullHeightWithRowsPerScreen=4> </div>""" % (computation_params, 
    str(year))
  md_file = MdFile(file_path=out_path_md)
  md_file.dump_to_file(metadata={"title": "%d Summary" % (year)}, md=md, dry_run=False)
Exemplo n.º 15
0
from doc_curation.md_helper import MdFile

# Remove all handlers associated with the root logger object.
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.DEBUG,
    format="%(levelname)s:%(asctime)s:%(module)s:%(lineno)d %(message)s")


def set_titles_from_spreadsheet(dir_path, dry_run=False):
    MdFile.fix_titles(
        md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path),
        spreadhsheet_id="1sNH1AWhhoa5VATqMdLbF652s7srTG0Raa6K-sCwDR-8",
        worksheet_name="कुम्भकोणाध्यायाः", id_column="क्रमाङ्कम्", title_column="अन्तिमशीर्षिका", md_file_to_id=mahaabhaarata.get_adhyaaya_id, dry_run=dry_run
    )
    MdFile.devanaagarify_titles(md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), dry_run=dry_run)


def get_upaakhyaana_and_titles_from_path(dir_path, file_pattern="**/*.md"):
    md_files = MdFile.get_md_files_from_path(dir_path=dir_path, file_pattern=file_pattern)
    titles = [md_file.get_title() for md_file in md_files]
    upaakhyaanas = [md_file.get_upaakhyaana() for md_file in md_files]
    for row in zip(upaakhyaanas, titles):
        print ("\t".join([str(i) for i in row]))

dir_path = "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/purANam/mahAbhAratam/03-vana-parva/"
# set_titles_from_filenames(dir_path=dir_path, dry_run=True)
# get_upaakhyaana_and_titles_from_path(dir_path=dir_path)
MdFile.fix_index_files(dir_path=dir_path, dry_run=False)
# set_titles_from_spreadsheet(dir_path=dir_path, dry_run=False)
Exemplo n.º 16
0
# MdFile.fix_index_files(dir_path="/home/vvasuki/vvasuki-git/jyotiSham/content/history/equinoctial_records.md", dry_run=False, transliteration_target=None)
#
# MdFile.apply_function(dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/atharva/paippalAda/", fn=MdFile.set_title_from_filename, transliteration_target=sanscript.DEVANAGARI, dry_run=False)

# MdFile.apply_function(dir_path="/home/vvasuki/vvasuki-git/pALi/content", fn=MdFile.ensure_ordinal_in_title, transliteration_target=sanscript.DEVANAGARI, dry_run=False)

# MdFile.set_titles_from_filenames(dir_path="/home/vvasuki/vvasuki-git/notes-hugo/content/history/history_of_the_indian_people", transliteration_target=None, dry_run=False)

# MdFile.apply_function(fn=MdFile.prepend_file_index_to_title, dir_path="/home/vvasuki/hindutva/hindutva-hugo/content/main/books/vivekAnanda", dry_run=False)

# doc_curation.clear_bad_chars(file_path="/home/vvasuki/sanskrit/raw_etexts/mImAMsA/mImAMsA-naya-manjarI.md", dry_run=False)

MdFile.apply_function(
    fn=MdFile.split_to_bits,
    dir_path=
    "/home/vvasuki/vvasuki-git/notes-hugo/content/skills/clothing/extremities.md",
    frontmatter_type=MdFile.TOML,
    dry_run=False,
    source_script=sanscript.DEVANAGARI,
    indexed_title_pattern=None)

# MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/atharva/paippalAda", dry_run=False, source_script=sanscript.DEVANAGARI)
# MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="", dry_run=False, source_script=None, indexed_title_pattern=None)

# MdFile(file_path="",frontmatter_type=MdFile.TOML).split_to_bits(dry_run=False, source_script=None, indexed_title_pattern=None)
#  , indexed_title_pattern=None

# md_helper.import_md_recursive(source_dir="/home/vvasuki/Downloads/peterFreund", file_extension="txt")
# file_helper.copy_file_tree(source_dir="/home/vvasuki/Downloads/peterFreund", dest_dir="/home/vvasuki/sanskrit/raw_etexts/mixed/peterFreund", file_pattern="**/*.md")

# MdFile(file_path="/home/vvasuki/sanskrit/raw_etexts/vedAntam/dvaitam/mAdhvam/TikA-tippaNi/chidgagana_tika.md",frontmatter_type=MdFile.TOML).fix_lazy_anusvaara(dry_run=False, ignore_padaanta=True, omit_yrl=True)
# MdFile.apply_function(fn=MdFile.fix_lazy_anusvaara, dir_path="/home/vvasuki/sanskrit/raw_etexts", file_name_filter=None, start_file="/home/vvasuki/sanskrit/raw_etexts/vyAkaraNam/aShTAdhyAyI_central-repo/vAsu/pada-1.1/1.1.1.md", dry_run=False, ignore_padaanta=True, omit_yrl=True)
Exemplo n.º 17
0
import logging

from curation_projects import raamaayana
from doc_curation.md_helper import MdFile

# Remove all handlers associated with the root logger object.
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.DEBUG,
    format="%(levelname)s:%(asctime)s:%(module)s:%(lineno)d %(message)s")

md_file_path = "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/purANam/rAmAyaNam/AndhrapAThaH"
# MdFile.fix_index_files(dir_path=md_file_path, dry_run=False)
# MdFile.fix_titles(
#     md_files=raamaayana.get_adhyaaya_md_files(md_file_path),
#     spreadhsheet_id="1xqVBhDwRzcEL7HlCJhxmnG1aOFFk6B8gGZ4GuBZynf8",
#     worksheet_name="शीर्षिकाः", id_column="id", title_column="अन्तिमशीर्षिका", md_file_to_id=raamaayana.get_adhyaaya_id, dry_run=False
# )
MdFile.devanaagarify_titles(
    md_files=raamaayana.get_adhyaaya_md_files(md_file_path), dry_run=False)
Exemplo n.º 18
0
def get_upaakhyaana_and_titles_from_path(dir_path, file_pattern="**/*.md"):
    md_files = MdFile.get_md_files_from_path(dir_path=dir_path, file_pattern=file_pattern)
    titles = [md_file.get_title() for md_file in md_files]
    upaakhyaanas = [md_file.get_upaakhyaana() for md_file in md_files]
    for row in zip(upaakhyaanas, titles):
        print ("\t".join([str(i) for i in row]))
Exemplo n.º 19
0
                                heading_class="subhead")


def makedirs():
    tree = dir_helper.tree_from_file(
        "/home/vvasuki/sanskrit-coders/doc_curation/curation_projects/tipiTikA/dirs.txt"
    )
    tree.root.set_ordinals()
    logging.info(tree.root)
    tree.root.regularize_keys()
    logging.info(tree.root)
    tree.root.make_dirs(base_dir="/home/vvasuki/paali-bhaasaa/raw_etexts/",
                        dry_run=False)


def dump_files():
    LINK_FILE_PATH = "/home/vvasuki/sanskrit-coders/doc_curation/curation_projects/tipiTikA/links.txt"
    with open(LINK_FILE_PATH) as linkfile:
        for line in linkfile.readlines():
            get_file(outdir="/home/vvasuki/paali-bhaasaa/raw_etexts/tipiTaka",
                     url=line.strip())


# MdFile.fix_index_files(dir_path="/home/vvasuki/paali-bhaasaa/raw_etexts/", dry_run=False)
# MdFile.fix_title_numbering_in_path(dir_path="/home/vvasuki/paali-bhaasaa/raw_etexts/", dry_run=False)
# MdFile.fix_title_numbering_in_path(dir_path="/home/vvasuki/vvasuki-git/pALi/content/01_tipiTaka", dry_run=False)
MdFile.set_filenames_from_titles(
    dir_path=
    "/home/vvasuki/vvasuki-git/tipiTaka/content/01_mUlam/02_suttapiTaka/04_anguttaranikAyo",
    transliteration_source=sanscript.DEVANAGARI,
    dry_run=False)
Exemplo n.º 20
0
def dump_ics_md_pair(panchaanga, period_str):
  ics_calendar = ics.compute_calendar(panchaanga)
  (year_type, year) = period_str.split("/")
  year = int(year)
  out_path = get_canonical_path(city=panchaanga.city.name, computation_system_str=str(panchaanga.computation_system), year=year, year_type=year_type)
  output_file_ics = os.path.join(out_path + ".ics")
  ics.write_to_file(ics_calendar, output_file_ics)

  md_file = MdFile(file_path=output_file_ics.replace(".ics", ".md"), frontmatter_type=MdFile.YAML)
  intro = "## 00 Intro\n### Related files\n- [ics](../%s)\n" % str(os.path.basename(output_file_ics))
  md_content = "%s\n%s" % (intro, md.make_md(panchaanga=panchaanga))
  md_file.dump_to_file(metadata={"title": year}, md=md_content, dry_run=False)

  monthly_file_path = md_file.file_path.replace(".md", "_monthly.md")
  monthly_dir = monthly_file_path.replace(".md", "/")
  shutil.rmtree(path=monthly_dir, ignore_errors=True)
  logging.info("%s exists? %s", monthly_dir, os.path.exists(monthly_dir))
  logging.info("Copying to %s", monthly_file_path)
  shutil.copy(md_file.file_path, monthly_file_path)
  monthly_md_file = MdFile(file_path=monthly_file_path)
  monthly_md_file.set_title_from_filename(dry_run=False, transliteration_target=None)
  monthly_md_file.split_to_bits(source_script=None, dry_run=False, indexed_title_pattern=None)
  MdFile.apply_function(fn=MdFile.split_to_bits, dir_path=monthly_dir, frontmatter_type=MdFile.TOML, source_script=None, dry_run=False, indexed_title_pattern=None)
  logging.info("%s exists? %s", monthly_dir, os.path.exists(monthly_dir))

  MdFile.fix_index_files(dir_path=output_dir, transliteration_target=None, dry_run=False)
Exemplo n.º 21
0
from doc_curation.md_helper import MdFile

# MdFile.fix_index_files(dir_path="/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/kathAsaritsAgaraH/", dry_run=False)

MdFile.set_titles_from_filenames(
    dir_path=
    "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/kathAsaritsAgaraH/12/",
    dry_run=False)
Exemplo n.º 22
0
# wordpress.fix_paths(dir_path="/home/vvasuki/sanskrit/raw_etexts_english/blogs/manasataramgini", dry_run=False)
# wordpress.fix_paths(dir_path="/home/vvasuki/vvasuki-git/notes-hugo/content/history/paganology/Aryan/indo-iranian/indo-aryan/persons/brAhma/sage-bloodlines/bhRguH/dvitIyajanmani_bhRguH/chyavanaH/ApnavAna/aurvaH/jamadagniH/MT_charitram/", dry_run=False)

# MdFile.apply_function(fn=MdFile.transliterate_content, dir_path="/home/vvasuki/sanskrit/raw_etexts/mixed/sarit-markdown", source_scheme=sanscript.IAST)

#
# MdFile.fix_index_files(dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/atharva", dry_run=False, transliteration_target=None)
#
# MdFile.apply_function(dir_path="/home/vvasuki/vvasuki-git/vedAH/content/sAma/kauthumam", fn=MdFile.set_title_from_filename, transliteration_target=sanscript.DEVANAGARI, dry_run=False)

# MdFile.apply_function(dir_path="/home/vvasuki/vvasuki-git/pALi/content", fn=MdFile.ensure_ordinal_in_title, transliteration_target=sanscript.DEVANAGARI, dry_run=False)

# MdFile.set_titles_from_filenames(dir_path="/home/vvasuki/vvasuki-git/notes-hugo/content/history/history_of_the_indian_people", transliteration_target=None, dry_run=False)

MdFile.set_filenames_from_titles(
    dir_path="/home/vvasuki/vvasuki-git/sanskrit/content/vyAkaraNam/whitney",
    transliteration_source=sanscript.DEVANAGARI,
    dry_run=False)

# MdFile.apply_function(fn=MdFile.prepend_file_index_to_title, dir_path="/home/vvasuki/hindutva/hindutva-hugo/content/main/books/vivekAnanda", dry_run=False)

# doc_curation.clear_bad_chars(file_path="/home/vvasuki/sanskrit/raw_etexts/mImAMsA/mImAMsA-naya-manjarI.md", dry_run=False)

# MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/vvasuki-git/saMskAra/content/kalpe_svamatam/social-cultivation/violence/animal-protection.md", frontmatter_type=MdFile.TOML, dry_run=False, source_script=sanscript.DEVANAGARI)

# MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/sAma/brAhmaNam/chandogya_brahmana", dry_run=False)
# MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/vvasuki-git/saMskAra/content/kalpe_svamatam/social-cultivation/violence/animal-sacrifice.md", dry_run=False, source_script=None, indexed_title_pattern=None)

# MdFile(file_path="",frontmatter_type=MdFile.TOML).split_to_bits(dry_run=False, source_script=None, indexed_title_pattern=None)
#  , indexed_title_pattern=None

# md_helper.import_md_recursive(source_dir="/home/vvasuki/Downloads/peterFreund", file_extension="txt")