Exemplo n.º 1
0
def main():
    global map_catalog
    global base_catalog

    args = parse_args()
    map_id = args.map_id

    catalog = adm.read_json(catalog_path)
    map_catalog = catalog['maps']
    base_catalog = catalog['base']
    #for k in catalog.keys():
      #print(k)

    is_map = map_id in map_catalog
    is_base = map_id in base_catalog

    if not is_base and not is_map:
        print('Download URL not found in Map Catalog: %s'%args.map_id)
        sys.exit(1)

    # create init.json which sets initial coords and zoom
    if is_map:
        init = {}
        map = map_catalog[map_id]
        init['region'] = map['region']
        init['zoom'] = map['zoom']
        init['center_lon'] = map['center_lon']
        init['center_lat'] = map['center_lat']
        init_fn = viewer_path + '/init.json'
        adm.write_json_file(init, init_fn)

    installed_maps = get_installed_tiles()
    print('installed_maps')
    print(repr(installed_maps))
    write_vector_map_idx_v2(installed_maps)
Exemplo n.º 2
0
def do_content(this_preset_dir, noscan):
    global content
    if role_stats['kalite']['active']:
        content["kalite"] = {'lang_code': 'en', 'topics': []} # defaults

    content_file = this_preset_dir + 'content.json'

    if os.path.exists(content_file):
        old_content = adm.read_json(content_file)
    else:
        old_content = {}

    if noscan:
        content_from_menu(this_preset_dir)
    else:
        content_from_files()

    # read list of maps
    if os.path.exists(map_path):
        excl_maps = ['']
        map_list = os.listdir(map_path)
        for fname in map_list:
            content["maps"].append(fname)

    # preserve any kalite for now
    content["kalite"] = old_content.get("kalite", {})
    if role_stats['kalite']['active']:
        lang = get_kalite_lang()
        content["kalite"]["lang_code"] = lang
        get_kalite_complete('khan/', lang)
        content["kalite"]["topics"] = kalite_topics

    adm.write_json_file(content, content_file)
Exemplo n.º 3
0
def main():
    global download_source
    global zim_path

    args = parse_args()
    zim_fname = args.zim_file

    if args.path:  # allow override of path
        zim_path = args.path

    if args.source:  # allow override of download source
        download_source = args.source

    zim_full_path = zim_path + '/' + zim_fname

    if not os.path.isfile(zim_full_path):
        print('Zim file ' + zim_full_path + ' not found.')
        sys.exit(1)

    # create temp library.xml and read back json
    kiwix_library_xml = "/tmp/library.xml"
    try:
        os.remove(kiwix_library_xml)
    except OSError:
        pass

    iiab.add_libr_xml(kiwix_library_xml, zim_path, zim_fname, '')
    zims_installed, _ = iiab.read_library_xml(kiwix_library_xml,
                                              kiwix_exclude_attr=[''])

    zim_id = list(zims_installed)[0]

    name_parts = zim_fname.split('_')
    name = '_'
    if name_parts[-2] in ['mini', 'nopic', 'maxi']:
        flavour = '_' + name_parts[-2]
        name = name.join(name_parts[:-2])
    else:
        flavour = ''
        name = name.join(name_parts[:-1])

    url = download_source + zim_fname
    zims_installed[zim_id]['url'] = url

    zims_installed[zim_id]['name'] = name
    zims_installed[zim_id]['flavour'] = flavour

    zim_json = zim_fname.replace('.zim', '.json')
    adm.write_json_file(zims_installed, zim_json)

    try:
        os.remove(kiwix_library_xml)
    except OSError:
        pass

    sys.exit()
Exemplo n.º 4
0
def do_preset(this_preset_dir):
    preset_file = this_preset_dir + 'preset.json'
    preset = {}
    preset["name"] = "Put a name or title here"
    preset["description"] = "Put a longer description here"
    preset["default_lang"] = "en or another code"
    preset["location"] = "Optional location this was installed"
    preset["size_in_gb"] = 115
    today = str(date.today())
    preset["last_modified"] = today
    if not os.path.exists(preset_file):
        adm.write_json_file(preset, preset_file)
Exemplo n.º 5
0
def write_vector_map_idx_v2(installed_maps):
    # modified from adm_lib for new maps
    catalog = map_catalog
    catalog.update(base_catalog)
    map_dict = {}
    idx_dict = {}
    for fname in installed_maps:
        map_id = fname
        map_dict = catalog.get(map_id, None)
        if not map_dict : continue

        # Create the idx file in format required by js-menu system
        item = map_dict['perma_ref']
        idx_dict[item] = {}
        idx_dict[item]['file_name'] = fname # ? os.path.basename(map_dict['detail_url'])
        idx_dict[item]['menu_item'] = map_dict['perma_ref']
        idx_dict[item]['size'] = map_dict['size']
        idx_dict[item]['date'] = map_dict['date']
        idx_dict[item]['region'] = map_dict['region']
        #idx_dict[item]['language'] = map_dict['perma_ref'][:2]
        idx_dict[item]['language'] = 'en'

    adm.write_json_file(idx_dict, vector_map_idx_dir + '/vector-map-idx.json')
Exemplo n.º 6
0
for topic in topics:
    category = topic.li.a.text
    print(category)
    items = topic.ul.find_all('li')
    for item in items:
        title = item.a.text
        print(title)
        zip_url = item.a['href']
        moddir = zip_url.split('/')[-1]
        moddir = moddir.split('.zip')[0]
        if moddir[0:3] == 'tr_':
            moddir = moddir[3:]
        else:
            moddir = 'en-' + moddir

        item_info = {}
        item_info = deepcopy(item_base)

        item_info['module_id'] = str(uuid.uuid4())
        item_info['moddir'] = moddir
        item_info['category'] = category
        item_info['title'] = title
        item_info['category'] = category
        item_info['lang'] = moddir[0:2]  # may not work for Chinese
        item_info['category'] = category
        item_info['zip_http_url'] = zip_url

        gcf_catalog[moddir] = item_info

adm.write_json_file(gcf_catalog, 'gcf-catalog.json')
Exemplo n.º 7
0
#article_parts = page.find_all("div", {"class":"card"})
alpha_block = page.find('ul',{'class':'alpha-block-list'})
items = alpha_block.find_all(['li'])

for item in items:
    link = item.find('a')
    alpha_pages.append(link['href'])

# OR string.ascii_uppercase (but is string, not array) and 0-9

for sub_menu in alpha_pages:
    response = requests.get('https://rarediseases.info.nih.gov' +  sub_menu)
    response.encoding = 'utf-8'  # to be safe
    html = response.text
    soup = BeautifulSoup(html, "html.parser")
    list_block = soup.find('ul',{'class':'listing-diseases'})
    items = list_block.find_all(['li'])
    for item in items:
        print(item)
        try:
            link = item.find('a')
            url = link['href']
            disease_catalog[url] = link.text
        except:
            continue

# div id = what's new for new or changed articles


adm.write_json_file(disease_catalog, 'disease-catalog.json')
Exemplo n.º 8
0
def main ():
    global verbose
    global download_flag

    oer2go_catalog = {}
    err_num = 0
    err_str = "SUCCESS"

    args = parse_args()
    if args.verbose:
        verbose = True
    if args.no_download:
        download_flag = False

    # make sure we have menu js_menu_dir if args.menu true
    if args.menu:
        if not os.path.isdir(adm.CONST.js_menu_dir):
            sys.stdout.write("GET-OER2GO-CAT ERROR - iiab-menu not installed and --menu option given\n")
            sys.stdout.flush()
            sys.exit(99)

    # always get our catalog
    # failure is fatal
    try:
        url_handle = urllib.request.urlopen(adm.CONST.iiab_module_cat_url)
        iiab_catalog_json = url_handle.read()
        url_handle.close()
        iiab_catalog = json.loads(iiab_catalog_json)
    except (urllib.error.URLError) as exc:
        sys.stdout.write("GET-OER2GO-CAT ERROR - " + str(exc.reason) +'\n')
        sys.stdout.flush()
        sys.exit(2)

    # for now we will assume that old modules are still in the current catalog
    # get new oer2go catalog unless told not to

    if download_flag:
        err_num, err_str, oer2go_catalog = get_oer2go_cat()
        if err_num != 0:
            download_flag = False
    if not download_flag: # get local copy
        local_oer2go_catalog = adm.read_json(adm.CONST.oer2go_catalog_file)
        oer2go_catalog = local_oer2go_catalog['modules']

    # start with iiab_catalog.json
    for item in iiab_catalog:
        moddir = item['moddir']
        id = item['module_id']
        module = item
        iiab_oer2go_catalog[moddir] = module

    working_dir = adm.CONST.rachel_working_dir + str(uuid.uuid4()) + "/"
    os.mkdir(working_dir)
    #os.mkdir(iiab_menu_download_dir)

    for item in oer2go_catalog: # structure of local and remote catalogs is different
        if not download_flag: # local
            moddir = item
            module = oer2go_catalog[moddir]
            module_id = module['module_id']
        else: # remote
            moddir = item['moddir']
            module_id = item['module_id']
            module = item

        if moddir is None: # skip items with no moddir
            continue

        menu_item_name = moddir

        if str(module_id) in dup_list:
            msg = "Skipping module not needed by Internet in a Box"
            if verbose:
                print("%s %s %s" % (msg, module_id, moddir))
            continue
        if module.get('type') != 'html':
            continue

        is_downloaded, has_menu_def = adm.get_module_status (module)
        #if args.menu and is_downloaded:
        if args.menu:
            if not has_menu_def:
                menu_item_name = adm.create_module_menu_def(module, working_dir, incl_extra_html = False)
                msg = "Generating menu files"
                if verbose:
                    print("%s %s %s" % (msg, module_id, moddir))
            if is_downloaded:
                adm.update_menu_json(menu_item_name) # only adds if not already in menu

        iiab_oer2go_catalog[moddir] = module

    # write catalog even if not downloaded as our could have changed
    dated_oer2go_cat = {}
    dated_oer2go_cat['download_date'] = time.strftime("%Y-%m-%d.%H:%M:%S")
    dated_oer2go_cat['modules'] = iiab_oer2go_catalog

    adm.write_json_file(dated_oer2go_cat, adm.CONST.oer2go_catalog_file)

    shutil.rmtree(working_dir)

    sys.stdout.write(err_str)
    sys.stdout.flush()
    sys.exit(err_num)
# derived from https://www.ncbi.nlm.nih.gov/books/NBK430685/

import os
import requests
from bs4 import BeautifulSoup
import json
import iiab.adm_lib as adm

stat_pearl_catalog = {}

with open('stat-pearl-catalog.html', 'r') as f:
    cat_html = f.read()

page = BeautifulSoup(cat_html, "html.parser")

items = page.find_all(['li'])

for i, item in enumerate(items):
    # has class of form 'toc_itm_NBK430685_' but NBK430685 is not the nih code which in this case is 554556
    # actually NBK430685 is the number of the catalog, not the article and is in every <li>
    links = item.find_all(['a', 'link'])
    for j, link in enumerate(links):
        if link.has_attr('href'):
            url = link['href']
            if 'article-' in url:
                article_id = url.split('article-')[1].split('/')[0]
                article_info = {'url': url, 'title': link.text}
                stat_pearl_catalog[article_id] = article_info

adm.write_json_file(stat_pearl_catalog, 'stat-pearl-catalog.json')