Exemplo n.º 1
0
def process_obs_to_html(dataset_file):
    """
    Looks up each Observation in the MOLES catalogue, matches phenomena to it 
    from ES and then writes HTML pages listing them. 
    """
    lines = util.read_file_into_list(dataset_file)
    summary_info = []
    counter = 0
    for line in lines:
        path = line.split("=")[1].rstrip()
        try:
            print "searching path {}".format(path)
            results = fbs_api.get_dir_info(path)
        except:
            continue

        #if len(results["formats"]) > 0:
            #print "Formats in directory {} are {} and some files {}".format(data_path, results["formats"], results["sample_names"])
        record = (line, results["formats"], results["sample_names"])
        summary_info.append(record)
        #counter += 1
        #if counter >10:
        #    break

    print create_html_table(summary_info)
Exemplo n.º 2
0
def process_obs_to_html(paths_page="http://catalogue.ceda.ac.uk/export/paths/"):
    """
    Looks up each Observation in the MOLES catalogue, matches phenomena to it 
    from ES and then writes HTML pages listing them. 
    """
    lines = urllib.urlopen(paths_page).readlines()
    lines.sort()
    n = len(lines)

    SPLIT = 100
    page_number = 1
    TEMPL = "extracted_phenomena_%02d.html"

    while lines:
        lines_to_process = lines[:SPLIT]
        lines = lines[SPLIT:]
        content = ""

        for i, line in enumerate(lines_to_process):
            if i > 50000000: 
                lines = []
                break

            data_path, ob_url = line.strip().split()
            print "Working on: %s" % data_path

            try:
                results = fbs_api.get_dir_info(data_path)
                if len(results["formats"]) > 0:
                    print results["formats"]
            except:
                continue

            html = render_results(ob_url, data_path, results, i + 1, n)
            content += "\n" + html

        title = "File-based search review of MOLES records: %d" % page_number
        previous_link = TEMPL % (page_number - 1)
        next_link = TEMPL % (page_number + 1)
        page = PAGE_TEMPLATE % vars()

        fpath = "%s/extracted_phenomena_%02d.html" % (OUT_DIR, page_number)
        with open(fpath, "w") as html_writer:
            html_writer.write(page)

        print "Wrote: %s" % fpath
        page_number += 1