示例#1
0
    def tab_mix_table(self, mix_data, _verbose=False, brainz=False, format=""):
        mix_data_key_bpm = self.replace_key_bpm(mix_data)
        mix_data_nl = self.trim_table_fields(mix_data_key_bpm)

        # for row in mix_data_nl:  # DEBUG
        #    log.debug(str(row))
        # log.debug("")

        if _verbose:
            self.p(
                tab(mix_data_nl,
                    tablefmt='pipe' if not format else format,
                    headers=self.cols_mixtracks.headers_dict(short=True)))
        elif brainz:
            mix_data_brainz = self.replace_brainz(mix_data_key_bpm)
            mix_data_brainz_nl = self.trim_table_fields(mix_data_brainz,
                                                        exclude=['methods'])
            self.p(
                tab(mix_data_brainz_nl,
                    tablefmt='grid' if not format else format,
                    headers=self.cols_mixtracks_brainz.headers_dict()))
        else:
            self.p(
                tab(mix_data_nl,
                    tablefmt='pipe' if not format else format,
                    headers=self.cols_mixtracks_basic.headers_dict()))
示例#2
0
 def tab_mix_info_header(self, mix_info):
     self.print_help(
         tab([mix_info],
             tablefmt="plain",
             headers=[
                 "Mix", "Name", "Created", "Updated", "Played", "Venue"
             ]))
示例#3
0
 def tab_online_search_results(self, _result_list):
     self.print_help(
         tab(_result_list,
             tablefmt="simple",
             headers=[
                 "ID", "Artist", "Release", "Label", "C", "Year", "Format"
             ]))
示例#4
0
def main():
    __mkpro()

    athletes = pd.DataFrame([])
    rides = pd.DataFrame([])

    zips = [f for f in os.listdir(zip_dir) if f.endswith('.zip')]
    for z in zips:
        print('Processing file: {}'.format(z))
        z_name = os.path.splitext(z)[0]
        z_path = os.path.join(zip_dir, z)
        z_out = os.path.join(pro_dir, z_name)

        x = StravaExport(z_path, z_out)
        x.extract_zip()
        df1 = x.athlete_pd()
        df2 = x.rides_pd()

        athletes = athletes.append(df1, sort=True)
        rides = rides.append(df2, sort=True)

    rides = rides[rides.ftp > 0]
    rc = rides.groupby('id').size().rename('num_rides')
    athletes = athletes.merge(rc.to_frame(), left_on='id', right_on='id')

    print(tab(athletes, headers='keys', tablefmt='psql'))
    print('Saving output to: {}'.format(pro_dir))

    athletes.to_csv(os.path.join(pro_dir, 'athletes.csv'), index=False)
    rides.to_csv(os.path.join(pro_dir, 'rides.csv'), index=False)
示例#5
0
 def tab_mix_table(self, _mix_data, _verbose=False):
     if _verbose:
         self.print_help(
             tab(_mix_data,
                 tablefmt="pipe",
                 headers=[
                     "#", "Release", "Track\nArtist", "Track\nName",
                     "Track\nPos", "Key", "BPM", "Key\nNotes",
                     "Trans.\nRating", "Trans.\nR. Notes", "Track\nNotes"
                 ]))
     else:
         self.print_help(
             tab(_mix_data,
                 tablefmt="pipe",
                 headers=[
                     "#", "Release", "Tr\nPos", "Trns\nRat", "Key", "BPM"
                 ]))
示例#6
0
 def tab_mixes_list(self, mixes_data):
     mixes_short_timestamps = self.shorten_mixes_timestamps(mixes_data)
     tabulated = tab(
         self.trim_table_fields(mixes_short_timestamps),
         tablefmt="simple",
         headers=self.cols_mixes.headers_dict()  # data is dict, headers too
     )
     self.p(tabulated)
示例#7
0
文件: mcts.py 项目: mustitz/pa-mcts
    def __call__(self, nodes):
        sum_games = 0
        qnodes = len(nodes)
        assert qnodes > 0

        scores = np.zeros((qnodes))
        qgames = np.zeros((qnodes))
        zero = []

        for i, node in enumerate(nodes):
            if node.qgames > 0:
                n = self.weights[0] * node.qgames
                scores[i] = node.score / node.qgames
                qgames[i] = n
                sum_games += n
            elif node.estimation.weight > 0:
                n = self.weights[1] * node.estimation.weight
                scores[i] = node.estimation.value
                qgames[i] = n
                sum_games += n
            else:
                zero.append(node)

        if zero:
            return random.choice(zero)

        log_sum_games = np.log(1.0 + sum_games)
        ucb = scores + self.C * np.sqrt(log_sum_games / qgames)
        index = np.random.choice(np.flatnonzero(ucb == ucb.max()))

        if self.log:
            from itertools import count

            print('UcbMoveSelection:')
            print('=================')
            print()

            rows = []
            for i, node, ucb in zip(count(), nodes, ucb):
                rows.append([i, ucb, '*' if i == index else ' '] +
                            node.dump_row())
            if TABULATE:
                columns = [
                    '#', 'UCB', '!', 'view', 'locked', 'score', 'eval',
                    'qgames', 'qmoves', 'es.value', 'es.weight'
                ] + ['r' + str(i) for i in range(1, 10)]
                print(tab(rows, tablefmt='plain', headers=columns))
            else:
                print('\n'.join('\t'.join(str(value) for value in row)
                                for row in rows))

            print('SELECTED:', index, '-', nodes[index].view)
            print()

        return nodes[index]
示例#8
0
文件: 2nd_exp.py 项目: tscott8/cs306
def display_analysis(functions, arr, text_file):
    """
    """
    rounder = 1000
    print(tab([['Data Type', str(type(arr[1][0][0]))],
               ['List Size', str(len(arr[1][0])) + ' elements']],
              headers=['Stat', 'Value'],
              tablefmt="orgtbl"),
          file=text_file)
    table = []
    sort_proof = []
    for i in range(len(functions)):
        analysis = empirical_analysis(functions[i][0], arr)
        time_collections = [] + [analysis['ordered']['time']] + [
            analysis['random']['time']
        ] + [analysis['reversed']['time']]
        time_collections.sort()
        table += [[
            functions[i][1],
            str(time_collections[0] * rounder)[:6] +
            ' \N{GREEK SMALL LETTER MU}s' +
            get_instance(analysis, time_collections[0]),
            str(time_collections[1] * rounder)[:6] +
            ' \N{GREEK SMALL LETTER MU}s' +
            get_instance(analysis, time_collections[1]),
            str(time_collections[2] * rounder)[:6] +
            ' \N{GREEK SMALL LETTER MU}s' +
            get_instance(analysis, time_collections[2]),
            str(analysis['ordered']['comparisons'])
        ]]
        if i is 0:
            sort_proof += [[
                'Input', analysis['random']['unsorted_arr'][:5] + ['...']
            ], ['Output', analysis['random']['sorted_arr'][:5] + ['...']]]
    if len(sort_proof) > 0:
        print(tab(sort_proof, tablefmt="orgtbl"), file=text_file)
    print('', file=text_file)
    print(tab(table,
              headers=['Algorithm', 'Best ', 'Average', 'Worst', 'Compares'],
              tablefmt='orgtbl'),
          file=text_file)
    return table
示例#9
0
文件: mcts.py 项目: mustitz/pa-mcts
def dump_nodes(nodes):
    if TABULATE:
        rows = (node.dump_row() for node in nodes)
        columns = [
            'view', 'locked', 'score', 'qgames', 'eval', 'qmoves', 'es.value',
            'es.weight'
        ] + ['r' + str(i) for i in range(1, 10)]
        return tab(rows, tablefmt='plain', headers=columns)
    else:
        return '\n'.join('\t'.join(str(value) for value in node.dump_row())
                         for node in nodes)
示例#10
0
 def tab_online_search_results(self, _result_list):
     self.p(
         tab(_result_list,
             tablefmt="simple",
             headers={
                 'id': 'ID',
                 'artist': 'Artist',
                 'title': 'Release',
                 'label': 'Label',
                 'country': 'C',
                 'year': 'Year',
                 'format': 'Format'
             }))
示例#11
0
def main_logic():

    host, user, password, instance = set_db_connection_data()
    cursor_connect = connect_to_db(host, user, password, instance)
    cursor_execute = execute_query(cursor_connect)
    inventory_files_list = fetch_query_results(cursor_execute)
    write_s3_search_file(inventory_files_list)
    profile, proxy = set_s3_connection_data()
    session = connect_to_s3(profile, proxy)
    raw_path_list, sliced_paths = create_path_list()
    s3_found_files = s3_file_search(sliced_paths, session)
    datasets = create_dataset_list()
    report = create_df_report(datasets)
    df_s3_count = write_file_counts(s3_found_files, datasets, 'S3 Count')
    report = pd.concat([report, df_s3_count], axis=1)
    df_inventory_count = write_file_counts(raw_path_list, datasets,
                                           'Inventory Count')
    report = pd.concat([report, df_inventory_count], axis=1)
    df_s3_to_inventory_ratio = calculate_ratios(report, 'S3 Count',
                                                'Inventory Count')
    report = pd.concat([report, df_s3_to_inventory_ratio], axis=1)
    df_inventory_to_s3_ratio = calculate_ratios(report, 'Inventory Count',
                                                'S3 Count')
    report = pd.concat([report, df_inventory_to_s3_ratio], axis=1)
    df_s3_missing_files = calculate_missing_files(report, 'S3 Count',
                                                  'Inventory Count')
    report = pd.concat([report, df_s3_missing_files], axis=1)
    df_inventory_missing_files = calculate_missing_files(
        report, 'Inventory Count', 'S3 Count')
    report = pd.concat([report, df_inventory_missing_files], axis=1)
    report.to_csv('search_report.csv')

    print('\n')
    print(
        '\033[4mSEARCH RESULTS BELOW. CHECK search_report.csv IN YOUR WORKSPACE:\033[0m'
    )
    print('\n')
    print(tab(report, headers=report.columns))
    print('\n')

    answer = input('SAVE THE INVENTORY TABLE FULL SEARCH LOG TO DISK? Y/N: ')
    write_search_results(s3_found_files, inventory_files_list,
                         'INVENTORY FILE', 'INVENTORY', 'S3',
                         'inventory_log.csv', answer)
    print('\n')

    answer = input('SAVE THE S3 FULL SEARCH LOG TO DISK? Y/N: ')
    write_search_results(inventory_files_list, s3_found_files, 'S3 FILE', 'S3',
                         'INVENTORY', 's3_log.csv', answer)
def seats(train, src, dest, date):
    data = {
        'train_no': F"{train}.",
        'stn_from': src,
        'stn_to': dest,
        'journey_date': date
    }
    response = requests.post(BR_API_SEATS, json=data)
    data = []
    headers = [
        'Class', 'Fare (Adult)', 'Fare (Child)', 'Counter Seat', 'Mobile Seat'
    ]
    for seat in response.json()['DATA']:
        data.append([
            seat['CLASS'], seat['FARE'], seat['FARE_C'], seat['COUNTER_SEAT'],
            seat['MOBILE_SEAT']
        ])
    print(tab(data, headers=headers))
示例#13
0
 def tab_all_releases(self, releases_data):
     table = [dict(row) for row in releases_data]
     for i, row in enumerate(table):
         links_str = self.join_links_to_str(row)
         row['artist_title_links'] = '{} - {}\n{}\n '.format(
             row['d_artist'], row['discogs_title'], links_str)
         del (table[i]['m_rel_id_override'])
         del (table[i]['m_rel_id'])
         del (table[i]['discogs_id'])
         del (table[i]['d_artist'])
         del (table[i]['discogs_title'])
     table = self.trim_table_fields(table, 40)
     print(
         tab(table,
             tablefmt="grid",
             headers={
                 'd_catno': 'CatNo',
                 'artist_title_links': 'Release: Artist - Title - Links'
             }))
示例#14
0
 def tab_stats(self, releases_total, releases_matched, tracks_total,
               tracks_matched, releases_collection_flag,
               releases_collection_online, mixtracks_total,
               mixtracks_unique, tracks_key_brainz, tracks_key_manual,
               tracks_bpm_brainz, tracks_bpm_manual):
     stats = [
         ['Releases in DiscoBASE', releases_total],
         ['Releases in Collection (DB flag)', releases_collection_flag],
         ['Releases in Collection (Discogs)', releases_collection_online],
         ['Releases matched with *Brainz', releases_matched],
         ['Tracks in DiscoBASE', tracks_total],
         ['Tracks matched with *Brainz', tracks_matched],
         ['Tracks with *Brainz key', tracks_key_brainz],
         ['Tracks with *Brainz BPM', tracks_bpm_brainz],
         ['Tracks with user-provided key', tracks_key_manual],
         ['Tracks with user-provided BPM', tracks_bpm_manual],
         ['Tracks in mixes', mixtracks_total],
         ['Unique tracks in mixes', mixtracks_unique],
     ]
     self.p(tab(stats, tablefmt='plain'), lead_nl=True)
def search(src, dest, date, adult, child, class_):
    params = (
        ('journey_date', date),
        ('from_station', src),
        ('to_station', dest),
        ('class', class_),
        ('adult', adult),
        ('child', child),
    )
    response = requests.get(BR_API_TRAINS, params=params)
    data = []
    headers = [
        'Train #', 'Train Name', 'Departure Time', 'Duration', 'Train Left'
    ]
    for train in response.json():
        data.append([
            train['trn_no'], train['trn_name'], train['dpt_time'],
            train['duration'], train['isTrainLeft']
        ])
    print(tab(data, headers=headers))
示例#16
0
文件: 2nd_exp.py 项目: tscott8/cs306
def build_graph_tables(tables, text_file):
    new_table = []
    bubble = ['Bubble']
    select = ['Selection']
    insert = ['Insertion']
    shell = ['Shell']
    merge = ['Merge']
    heap = ['Heap']
    for table in tables:
        bubble += [float(table[1][0][2][:6])]
        select += [float(table[1][1][2][:6])]
        insert += [float(table[1][2][2][:6])]
        shell += [float(table[1][3][2][:6])]
        merge += [float(table[1][4][2][:6])]
        heap += [float(table[1][5][2][:6])]
    new_table = [bubble] + [select] + [insert] + [shell] + [merge] + [heap]
    print(tab(new_table,
              tablefmt='orgtbl',
              numalign="right",
              headers=['Sort', 'Int', 'Float', 'String', 'Card']),
          file=text_file)
    return new_table
示例#17
0
 def tab_mixes_list(self, mixes_data):
     # make list of dicts out of the sqlite tuples list
     mixes = [dict(row) for row in mixes_data]
     for i, mix in enumerate(
             mixes):  # shorten/format timestamps in this view
         mixes[i]['created'] = self.shorten_timestamp(mix['created'],
                                                      text=True)
         mixes[i]['played'] = self.format_date_month(mix['played'],
                                                     text=True)
         mixes[i]['updated'] = self.shorten_timestamp(mix['updated'],
                                                      text=True)
     tabulated = tab(
         self.trim_table_fields(mixes),
         tablefmt="simple",  # headers has to be dict too!
         headers={
             'mix_id': '#',
             'name': 'Name',
             'played': 'Played',
             'venue': 'Venue',
             'created': 'Created',
             'updated': 'Updated'
         })
     self.p(tabulated)
示例#18
0
文件: genome.py 项目: quanrd/VCF-kit
def main(debug=None):
    args = docopt(__doc__,
                  version='VCF-Toolbox v0.1',
                  argv=debug,
                  options_first=False)
    # Setup Genomes Directory
    if args["location"] and args["<path>"]:
        if args["<path>"] == "-":
            genome_directory = get_genome_directory_file()
            os.remove(genome_directory)
            return get_genome_directory_file()
        else:
            with open(get_genome_directory_file(), "w") as f:
                genome_directory = os.path.realpath(args["<path>"])
                with indent(2):
                    puts(
                        colored.blue("\nSet genome location to: " +
                                     genome_directory + "/\n"))
                f.write(genome_directory)
                # create directory if not exists
                if not os.path.exists(genome_directory):
                    os.makedirs(genome_directory)
                return genome_directory

    if args["--directory"]:
        genome_directory = os.path.realpath(args["--ref"])
    else:
        genome_directory = get_genome_directory()

    with indent(2):
        puts(genome_directory)

    if args["location"] and not args["<path>"]:
        return genome_directory

    genome_db = get_genome_directory() + "/genomes.db"

    ################
    # List Genomes #
    ################
    if args["list"]:
        output_genome_list()

    ##################
    # Search Genomes #
    ##################
    elif args["--search"]:
        # Download and cache a list of genomes from NCBI for searching
        download_genomes(genome_db)

        # Cache result
        header = [
            "assembly_accession",  # 0
            "bioproject",  # 1
            "organism_name",  # 7
            "asm_name",  # 15
            "ftp_path"
        ]  # 19

        with indent(2):
            puts(colored.blue('\nSearching...\n'))

        with open(genome_db, "r") as f:
            results = []
            for line in f:
                if not line.startswith("#"):
                    line = line.strip().split("\t")
                    line = [
                        x for k, x in enumerate(line)
                        if k in [0, 1, 7, 15, 19]
                    ]
                    if args["--search"].lower() in line[2].lower(
                    ) and line[4] != "na":
                        results.append(line)
        with indent(4):
            puts(tab(results, headers=header))
        with indent(2):
            puts(colored.blue('\nTo download a genome and setup for use:'))
        with indent(4):
            puts(colored.green("\nvk genome ncbi --ref=<asm_name>\n"))
        return results
    elif args["--ref"]:
        # reference name.
        reference_name = args["--ref"]

        # Ensure genome db is available
        download_genomes(genome_db)

        # reference directory
        if not args["--directory"]:
            reference_directory = genome_directory + "/" + reference_name + "/"
        else:
            reference_directory = genome_directory + "/"
        if not os.path.exists(reference_directory):
            os.makedirs(reference_directory)

        # base reference filename.
        ref_filename = reference_directory + reference_name + ".tmp.fa.gz"

        if args["wormbase"]:
            asm_name = args["--ref"]
            asm_url = f"ftp://ftp.wormbase.org/pub/wormbase/releases/{asm_name}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{asm_name}.genomic.fa.gz"
            comm = f"curl {asm_url} > {ref_filename}"
            err = run_command(comm)
            if err != 0:
                raise Exception(
                    colored.red(f"Wormbase genome {args['--ref']} not found."))
            # Unzip wormbase genome
            run_command(f"gzip -df {ref_filename}", shell=True)
        else:
            # NCBI
            with open(genome_db, "r") as f:
                results = []
                for line in f:
                    if not line.startswith("#"):
                        line = line.strip().split("\t")
                        line = [
                            x for k, x in enumerate(line)
                            if k in [0, 1, 7, 15, 19]
                        ]
                        if args["--ref"] == line[3]:
                            results.append(line)
                            reference_download = results[0]
                            url = reference_download[4].replace(
                                "ftp://", "http://"
                            ) + "/" + os.path.split(
                                reference_download[4])[1] + "_genomic.fna.gz"
            if len(results) == 0:
                with indent(2):
                    puts(
                        colored.red('\nError: Genome ' + args["--ref"] +
                                    ' not found\n'))

            with indent(2):
                puts(
                    colored.green('\nDownloading: ' + reference_name + "; " +
                                  url + '\n'))

            # stack overflow: 15644964;
            r = requests.get(url, stream=True)

            with open(ref_filename, 'wb') as f:
                total_length = int(r.headers.get('content-length'))
                for chunk in progress.bar(r.iter_content(chunk_size=1024),
                                          expected_size=(total_length / 1024) +
                                          1):
                    if chunk:
                        f.write(chunk)
                        f.flush()

        # Fix chromosome names
        if not args["--accession-chrom-names"] and not args['wormbase']:
            with indent(2):
                puts(colored.green('\nFixing chromosome names\n'))

            with open(ref_filename.replace(".fa.gz", ".fa"), 'w') as outfa:
                with gzip.open(ref_filename, 'rb') as f:
                    for line in f:
                        line = line.decode("utf-8")
                        outline = line
                        if line.startswith(">"):
                            acc = line.split(" ")[0].strip(">")
                            chrom_name = fetch_chrom_name(acc)
                            if chrom_name is not None:
                                outline = ">" + chrom_name + "\n"
                            elif line.lower().find("mitochon") > 0:
                                outline = ">MtDNA\n"
                            puts(
                                colored.blue(line.strip("\n>")) + " --> " +
                                colored.blue(outline.strip("\n>")))
                        outfa.write(outline)

        if which("bgzip"):
            with indent(2):
                puts(colored.green('\nSwitching from gzip to bgzip\n'))
            # Convert to bgzip
            if args["--accession-chrom-names"]:
                run_command(f"gzip -df {ref_filename}", shell=True)
            comm_bgzip = "bgzip -fc {ref_filename} > {ref_out}"
            comm_bgzip = comm_bgzip.format(
                ref_filename=ref_filename.replace(".fa.gz", ".fa"),
                ref_out=ref_filename.replace(".tmp", ""))
            run_command(comm_bgzip, shell=True)
            ref_filename = ref_filename.replace(".tmp", "")
        else:
            with indent(2):
                puts_err(colored.red("Please install bgzip."))
            exit()

        if which("bwa"):
            with indent(2):
                puts(colored.green("\nCreating bwa index\n"))
            comm = f"bwa index {ref_filename}"
            run_command(comm, shell=True)
        else:
            with indent(2):
                puts(colored.blue("\nSkipping bwa index; bwa not installed\n"))

        if which("samtools"):
            with indent(2):
                puts(colored.green("\nCreating samtools index\n"))
            comm = f"samtools faidx {ref_filename}"
            run_command(comm, shell=True)
        else:
            with indent(2):
                puts(
                    colored.blue(
                        "\nSkipping samtools index; Samtools not installed\n"))

        if which("makeblastdb") and which("gzip"):
            with indent(2):
                puts(colored.green("\nCreating blast database\n"))
            comm = "gzip -dc {ref} | makeblastdb -in - -dbtype=nucl -title={ref} -out={ref}".format(
                ref=ref_filename)
            run_command(comm, shell=True)
        else:
            with indent(2):
                puts(
                    colored.blue(
                        "\nSkipping creation of blast database; blast is not installed\n"
                    ))

        # Remove temp files
        if args["--accession-chrom-names"]:
            os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz"))

        # Remove temporary files
        try:
            os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz"))
            os.remove(ref_filename.replace(".fa.gz", ".tmp.fa"))
        except:
            pass

        # Add error checking here...

        with indent(2):
            puts(colored.green("\nComplete!\n"))
示例#19
0
#print "Lat:", xp
#print "Lon:", yp

print "Master Global Coord:", final_coord
print "Slave Global Coord:", final_coord_d
#print "Master KPs Coord", pixel_coord
#print "Slave KPs Coord:", dst_pts
#print "Image 1 Size:", img1.shape
#print "Image 2 Size:", img2.shape
#print "RMSE per GCP Northing:", rmse_per_gcp_x
#print "RMSE per GCP Easting:", rmse_per_gcp_y
#print "RMSE per GCP:", rmse_per_gcp
#cv2.imshow("Matched", view)
#plt.imshow(view)
#print tab(final_coord, headers,numalign="right")
print tab(d)
#print tab(final_coord_d, headers,numalign="right")
#print tab(rpg)
print C, A, B, F, D2, E
#print C, A,B,F, D2,E
#plt.plot (gcp, rmse_per_gcp_x, "bs")
#plt.plot (gcp, rmse_per_gcp_y, "ro")
#cv2.imshow("Matched", view)
#cv2.waitKey(1000)
#plt.imshow(view)
plt.show(10000)

#print s
#print d
print final_coord
print final_coord_d
示例#20
0
# Reshape to (N,1)
xp.shape =(N,1)
yp.shape = (N,1)

# Global coordinate of src_pts of master image
final_coord = np.hstack((xp,yp))

headers = ["Lat", "Lon"]
headers2 = ["Master", "Slave"]
headers3 = ["X", "Y"]
headers4 =  ["RMSE per GCP"]


# print global coordinates of the source points (src_pts)
print tab(final_coord, headers,numalign="right")

# Converting slave image in jpg to tif
img = Image.open('images/Butuan.Slave.02.jpg')  # input
img.save('images/temp/Butuan.Slave.02.tiff')         # output

# slave image is not yet georeferenced
# its raw coordinates maybe checked using the GDAL affine coefficients

img2_tif = 'images/temp/Butuan.Slave.02.tiff'
slave_tif = gdal.Open(img2_tif)
c2, a2 , b2, f2, d2, e2 = slave_tif.GetGeoTransform()

print  c2, a2 , b2, f2,d2 ,e2 
# will show 0.0 1.0 0.0 0.0 0.0 1.0, still in pixel coord (offset)
示例#21
0
def create_shape_datacard(df_obs, df_rate, df_nuis, params, filename, name):
    # IDX
    dc = tab([
        ["imax * number of channels"],
        ["jmax * number of backgrounds"],
        ["kmax * number of nuisance parameters"],
    ], [],
             tablefmt="plain") + "\n" + "-" * 80 + "\n"

    # SHAPES
    df_obs = df_obs.reset_index()
    dc += tab([[
        "shapes", "*", "*", "Zinv_METnoX-ShapeTemplates_{}.root".format(name),
        "$CHANNEL/$PROCESS", "$CHANNEL/$SYSTEMATIC/$PROCESS"
    ]], [],
              tablefmt="plain") + "\n" + "-" * 80 + "\n"

    # OBS
    dc += tab([
        ["bin"] + list(df_obs["region"]),
        ["observation"] + [-1] * df_obs.shape[0],
    ], [],
              tablefmt="plain") + "\n" + "-" * 80 + "\n"

    # RATE
    df_rate = df_rate.reset_index()
    dc += tab([
        ["bin"] + list(df_rate["region"]),
        ["process"] + list(df_rate["process"]),
        ["process"] + map(int, list(df_rate["proc"])),
        ["rate"] + [-1] * df_rate.shape[0],
    ], [],
              tablefmt="plain") + "\n" + "-" * 80 + "\n"

    # NUISANCES
    nuisances = []
    for c in df_nuis.columns:
        syst = c[:-2] if c.endswith("Up") else c[:-4] if c.endswith(
            "Down") else c
        if syst not in nuisances and "nominal" not in syst:
            nuisances.append(syst)

    nuisance_block = []
    for nuis in nuisances:
        if nuis in ["lumi"]:
            nuisance_subblock = [nuis, "lnN"]
        else:
            nuisance_subblock = [nuis, "shape"]
        for up, down in zip(df_nuis[nuis + "Up"], df_nuis[nuis + "Down"]):
            if nuis in ["lumi"]:
                value = str(np.sqrt(up / down))
            else:
                value = 1

            if np.isnan([up, down]).any():
                # non-number
                value = "-"
            else:
                # number
                if np.abs(up * down - 1) < 0.005:
                    # symmetric
                    mean = np.sqrt(up / down)
                    if np.abs(mean - 1) < 1e-5:
                        # zero
                        value = "-"

            nuisance_subblock.append(value)
        nuisance_block.append(nuisance_subblock)
    dc += tab(nuisance_block, [], tablefmt="plain") + "\n" + "-" * 80 + "\n"

    # PARAMS
    if params is not None:
        dc += tab(params, [], tablefmt="plain")

    with open(filename, 'w') as f:
        f.write(dc)
    logger.info("Created {}".format(filename))
示例#22
0
def print_ip_table(avail, na_avail):
    t = {'Reachable': avail, 'Unreachable': na_avail}
    print(tab(t, headers='keys'))
示例#23
0
def pred_counts(model_or_info,
                X,
                y,
                n_top=3,
                results='s',
                n_obs=None,
                n_find=None,
                mask_zero=True,
                stateful=False,
                coder=None):
    '''
        Returns a summary of whether the top predictions were found in the actual subsequent timesteps
        - Rows represent which prediction it was (0=prediction with highest probability)
        - Columns show the percentage where this prediction occured:
            - 0 = not at all
            - 1 = as the next item
            - 2 = as the 2nd item...etc
        - E.g. 36.60% of the top predictions were found in the next position

          n_Top         0          1          2           3
        -------  --------  ---------  ---------  ----------
              0  0.612539  0.366063   0.0134987  0.00789921
              1  0.916608  0.0556944  0.0150985  0.0125987
              2  0.981702  0.0133987  0.0029997  0.00189981

    :param model_or_info:
    :param X:
    :param y:
    :param n_top:
    :param results: Set to 's' for Summary, 'c' for Counts, 'p' for Prefixes, 'd' for Detail, or combinations e.g. 'scd'
    :param n_obs:
    :param n_find:
    :param mask_zero:
    :param stateful:
    :param coder:
        model_info = model1_info
        model = model_info['model']
        y=Y
    :return:
    '''
    import collections
    from tabulate import tabulate as tab

    if (type(model_or_info) is dict):
        model = model_or_info['model']
    else:
        model = model_or_info

    print("Evaluating predictions for model {}".format(model.name))

    # model=model5
    details = True if 'd' in results else False
    summary = True if 's' in results else False
    prefixes = True if 'p' in results else False
    counts = True if 'c' in results else False

    if n_obs is None: n_obs = X.shape[0]
    n_time = X.shape[1]
    n_cats = y.shape[2]
    if n_find is None: n_find = n_time

    print("- based on {} obs with {} categories and upto {} timesteps ".format(
        n_obs, n_cats, n_time))

    # Counts summarise for each (prefix size, prediction rank, find position)
    countsD = collections.defaultdict(lambda: 0)
    detail = []

    # i=91
    #Xi = X[i,...]
    #yi = y[i,...]
    def update_for_obs(Xi, yi, prob_i, n_time_i):
        # Get actual values for all but the last timestep (which predicts a padded value)
        y_cats = np.argmax(yi[0:n_time_i - 1], axis=1)
        if coder: y_cats = coder.inverse_transform(y_cats - 1)
        # For each position 'j' in the time steps where a prediction is made
        # j=0
        for j in range(n_time_i):
            # Get predicted categories for this position in descending order: probSr = most likely category first in series
            if coder:
                probSr = pa.Series(
                    data=prob_i[j, 1:],
                    index=coder.classes_).sort_values(ascending=False)
            else:
                probSr = pa.Series(data=prob_i[j]).sort_values(ascending=False)

            # Get all the remaining actual categories
            y_rest = y_cats[j:]
            # Loop through the top predictions
            # t=2
            for t in range(n_top):
                next_top_pred = probSr.index[
                    t]  # Predicted code (or desc if coder provided)
                next_top_prob = probSr.values[t]  # Probability of this code

                # See if the predicted code occurs in the actual values
                find_preds = list(np.where(y_rest == next_top_pred)[0])
                # Check if it has been found, and how far ahead
                if (len(find_preds) > 0):
                    # Use the first occurrence of the predicted value (index 0)
                    # f=1 means found as the expect value for this time-step (i.e. it was the next item)
                    f = find_preds[0] + 1
                    # Truncate if index where prediction is found is beyond the range
                    if (f > n_find): f = n_find + 1
                else:
                    f = 0  # Not found

                # Always record count, so f=0 is count of when prediction was not found
                countsD[(j, t, f)] += 1

                if details:
                    # Get first part of X upto 'j' where the prediction is being made
                    if coder:
                        # Subtract 1 to allow for padding
                        x_pfx = coder.inverse_transform(Xi[0:j + 1] - 1)
                    else:
                        x_pfx = Xi[0:j + 1]

                    detail.append({
                        'Pfx': x_pfx,
                        'Pred': next_top_pred,
                        'Prob': next_top_prob,
                        'n_Pfx': j,
                        'n_Top': t,
                        'n_Find': f
                    })

    for i in range(n_obs):
        # Find number of non-zero items for this obs, or use fixed n_time if not masking zero
        n_time_i = np.count_nonzero(X[i]) if mask_zero else n_time
        # Predict probability of each category for every timestep for this obs
        prob_i = predict_obs(model,
                             X,
                             i,
                             n_time,
                             n_cats,
                             stateful=stateful,
                             mask_zero=mask_zero)
        # Increment count matrix for these probabilities based on the number of non-zero inputs
        update_for_obs(X[i, ...], y[i, ...], prob_i, n_time_i)

    countsDf = pa.Series(countsD).reset_index()
    countsDf.columns = (['n_Pfx', 'n_Top', 'n_Find', 'Count'])
    summaryDf_tf = pa.pivot_table(countsDf,
                                  index='n_Top',
                                  columns='n_Find',
                                  values='Count',
                                  aggfunc=np.sum)
    summaryDf_t = pa.pivot_table(countsDf,
                                 index='n_Top',
                                 values='Count',
                                 aggfunc=np.sum)
    # summaryDf_t gives count of predictions for each value of n_Top.
    # - All will be the same: the number of input values that were not-padded and so had a prediction
    n_pred = summaryDf_t.iloc[0, 0]
    summaryDf = summaryDf_tf / n_pred

    if (type(model_or_info) is dict):
        model_or_info.update({'summary': summaryDf})

    results = []
    if summary:
        results.append(summaryDf)
        print(tab(summaryDf, headers='keys'))
        print()

    if counts: results.append(countsDf)

    if prefixes:
        summaryDf2 = pa.pivot_table(countsDf,
                                    index=['n_Pfx', 'n_Top'],
                                    columns='n_Find',
                                    values='Count',
                                    aggfunc=np.sum)
        pfxDf = pa.pivot_table(countsDf,
                               index=['n_Pfx'],
                               values='Count',
                               aggfunc=np.sum)
        pfxDf = summaryDf2.join(pfxDf)
        for f in range(n_find):
            pfxDf["Pct{}".format(f)] = pfxDf[f] / pfxDf['Count']
        results.append(pfxDf.reset_index())

    if details:
        detailDf = pa.DataFrame.from_dict(detail)[[
            'Pfx', 'Pred', 'Prob', 'n_Pfx', 'n_Top', 'n_Find'
        ]]
        results.append(detailDf)

    if (len(results) == 1):
        return results[0]
    else:
        return tuple(results)
示例#24
0
 def __str__(self):
     return tab(self.data,tablefmt="grid")
示例#25
0
# ADD VALUE ON THE TOP OF VERTICAL BARS
def autolabel(rects):
    for idx, rect in enumerate(bar_plot):
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width() / 2,
                 height,
                 chartValue[idx],
                 ha='center',
                 va='bottom',
                 rotation=0)


autolabel(bar_plot)

plt.title('Registrations by month', pad=20, fontsize=15)

fig5.savefig(workDirectory + 'myplot5.png', dpi=100)
plt.show()
plt.clf()

im = Image.open(workDirectory + 'myplot5.png')
bordered = ImageOps.expand(im, border=1, fill=(0, 0, 0))
bordered.save(workDirectory + 'myplot5.png')

os.remove(workDirectory + 'myplot5.png')

print(
    tab(df_Created_count.head(30),
        headers='keys',
        tablefmt='psql',
        showindex=False))
示例#26
0
def pprint(array):
    print(tab(array[::-1], headers='keys', tablefmt='fancy_grid'))
示例#27
0
 def tab_all_releases(self, releases_data):
     self.print_help(
         tab(releases_data,
             tablefmt="plain",
             headers=["ID", "Release name", "Last import"]))
示例#28
0
def main(debug=None):
    args = docopt(__doc__,
                  version='VCF-Toolbox v0.1',
                  argv=debug,
                  options_first=False)
    # Setup Genomes Directory
    if args["location"] and args["<path>"]:
        if args["<path>"] == "-":
            genome_directory = get_genome_directory_file()
            os.remove(genome_directory)
            return get_genome_directory_file()
        else:
            with open(get_genome_directory_file(), "w") as f:
                genome_directory = os.path.realpath(args["<path>"])
                with indent(2):
                    puts(colored.blue("\nSet genome location to: " + genome_directory + "/\n"))
                f.write(genome_directory)
                # create directory if not exists
                if not os.path.exists(genome_directory):
                    os.makedirs(genome_directory)
                return genome_directory

    if args["--directory"]:
        genome_directory = os.path.realpath(args["--ref"])
    else:
        genome_directory = get_genome_directory()

    with indent(2):
        puts(genome_directory)

    if args["location"] and not args["<path>"]:
        return genome_directory

    genome_db = get_genome_directory() + "/genomes.db"

    ################
    # List Genomes #
    ################
    if args["list"]:
        output_genome_list()

    ##################
    # Search Genomes #
    ##################
    elif args["--search"]:
        # Download and cache a list of genomes from NCBI for searching
        download_genomes(genome_db)

        # Cache result
        header = ["assembly_accession",  # 0
                  "bioproject",  # 1
                  "organism_name",  # 7
                  "asm_name",  # 15
                  "ftp_path"]  # 19

        with indent(2):
            puts(colored.blue('\nSearching...\n'))

        with open(genome_db, "r") as f:
            results = []
            for line in f:
                if not line.startswith("#"):
                    line = line.strip().split("\t")
                    line = [x for k, x in enumerate(line) if k in [0, 1, 7, 15, 19]]
                    if args["--search"].lower() in line[2].lower() and line[4] != "na":
                        results.append(line)
        with indent(4):
            puts(tab(results, headers=header))
        with indent(2):
            puts(colored.blue('\nTo download a genome and setup for use:'))
        with indent(4):
            puts(colored.green("\nvk genome ncbi --ref=<asm_name>\n"))
        return results
    elif args["--ref"]:
        # reference name.
        reference_name = args["--ref"]

        # Ensure genome db is available
        download_genomes(genome_db)

        # reference directory
        if not args["--directory"]:
            reference_directory = genome_directory + "/" + reference_name + "/"
        else:
            reference_directory = genome_directory + "/"
        if not os.path.exists(reference_directory):
            os.makedirs(reference_directory)

        # base reference filename.
        ref_filename = reference_directory + reference_name + ".tmp.fa.gz"

        if args["wormbase"]:
            asm_url = "ftp://ftp.wormbase.org/pub/wormbase/releases/{asm_name}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{asm_name}.genomic.fa.gz"
            reference_download = asm_url.format(asm_name=args["--ref"])
            comm = "curl {reference_download} > {ref_filename}".format(**locals())
            print(comm)
            call(comm, shell = True)
            # Unzip wormbase genome
            call(["gunzip", "-f", ref_filename])
        else:
            # NCBI
            with open(genome_db, "r") as f:
                results = []
                for line in f:
                    if not line.startswith("#"):
                        line = line.strip().split("\t")
                        line = [x for k, x in enumerate(line) if k in [0, 1, 7, 15, 19]]
                        if args["--ref"] == line[3]:
                            results.append(line)
                            reference_download = results[0]
                            url = reference_download[4].replace("ftp://", "http://") + "/" + os.path.split(reference_download[4])[1] + "_genomic.fna.gz"
            if len(results) == 0:
                with indent(2):
                    puts(colored.red('\nError: Genome ' + args["--ref"] + ' not found\n'))

            with indent(2):
                puts(colored.green('\nDownloading: ' + reference_name + "; " + url + '\n'))

            # stack overflow: 15644964;
            r = requests.get(url, stream=True)

            with open(ref_filename, 'wb') as f:
                total_length = int(r.headers.get('content-length'))
                for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1):
                    if chunk:
                        f.write(chunk)
                        f.flush()

        # Fix chromosome names
        if not args["--accession-chrom-names"] and not args['wormbase']:
            with indent(2):
                puts(colored.green('\nFixing chromosome names\n'))

            with open(ref_filename.replace(".fa.gz", ".fa"), 'w') as outfa:
                with gzip.open(ref_filename, 'rb') as f:
                    for line in f:
                        outline = line
                        if line.startswith(">"):
                            acc = line.split(" ")[0].strip(">")
                            chrom_name = fetch_chrom_name(acc)
                            if chrom_name is not None:
                                outline = ">" + chrom_name + "\n"
                            elif line.lower().find("mitochon") > 0:
                                outline = ">MtDNA\n"
                            puts(colored.blue(line.strip("\n>")) + " --> " + colored.blue(outline.strip("\n>")))
                        outfa.write(outline)

        if which("bgzip"):
            with indent(2):
                puts(colored.green('\nSwitching from gzip to bgzip\n'))
            # Convert to bgzip
            if args["--accession-chrom-names"]:
                call(["gunzip", "-f", ref_filename])
            comm_bgzip = "bgzip -fc {ref_filename} > {ref_out}"
            comm_bgzip = comm_bgzip.format(ref_filename=ref_filename.replace(".fa.gz", ".fa"),
                                           ref_out=ref_filename.replace(".tmp", ""))
            print(comm_bgzip)
            call(comm_bgzip, shell=True)
            ref_filename = ref_filename.replace(".tmp", "")
        else:
            with indent(2):
                puts_err(colored.red("Please install bgzip."))
            exit()

        if which("bwa"):
            with indent(2):
                puts(colored.green("\nCreating bwa index\n"))
            call(["bwa", "index", ref_filename])
        else:
            with indent(2):
                puts(colored.blue("\nSkipping bwa index; bwa not installed\n"))

        if which("samtools"):
            with indent(2):
                puts(colored.green("\nCreating samtools index\n"))
            call(["samtools", "faidx", ref_filename])
        else:
            with indent(2):
                puts(colored.blue("\nSkipping samtools index; Samtools not installed\n"))

        if which("makeblastdb"):
            with indent(2):
                puts(colored.green("\nCreating blast database\n"))
            comm = "gunzip -c {ref} | makeblastdb -in - -dbtype=nucl -title={ref} -out={ref}".format(ref=ref_filename)
            call(comm, shell=True)
        else:
            with indent(2):
                puts(colored.blue("\nSkipping creation of blast database; blast is not installed\n"))

        # Remove temp files
        if args["--accession-chrom-names"]:
            os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz"))

        # Remove temporary files
        try:
            os.remove(ref_filename.replace(".fa.gz", ".tmp.fa.gz"))
            os.remove(ref_filename.replace(".fa.gz", ".tmp.fa"))
        except:
            pass

        # Add error checking here...

        with indent(2):
            puts(colored.green("\nComplete!\n"))
def from_stations():
    response = requests.get(BR_API_FROM_STATIONS)
    stations = [[i['stn_code'], i['stn_name']] for i in response.json()]
    print(tab(stations, headers=['Code', 'Station Name']))
示例#30
0
 def tab_mixes_list(self, mixes_data):
     tabulated = tab(
         mixes_data,
         tablefmt="simple",
         headers=["Mix #", "Name", "Played", "Venue", "Created", "Updated"])
     self.print_help(tabulated)
示例#31
0
#print "Lat:", xp
#print "Lon:", yp

print "Master Global Coord:", final_coord
print "Slave Global Coord:", final_coord_d
#print "Master KPs Coord", pixel_coord
#print "Slave KPs Coord:", dst_pts
#print "Image 1 Size:", img1.shape
#print "Image 2 Size:", img2.shape
#print "RMSE per GCP Northing:", rmse_per_gcp_x
#print "RMSE per GCP Easting:", rmse_per_gcp_y
#print "RMSE per GCP:", rmse_per_gcp
#cv2.imshow("Matched", view)
#plt.imshow(view) 
#print tab(final_coord, headers,numalign="right")
print tab(d)
#print tab(final_coord_d, headers,numalign="right")
#print tab(rpg)
print C, A,B,F, D2,E
#print C, A,B,F, D2,E
#plt.plot (gcp, rmse_per_gcp_x, "bs")
#plt.plot (gcp, rmse_per_gcp_y, "ro")
#cv2.imshow("Matched", view)
#cv2.waitKey(1000)
#plt.imshow(view)
plt.show (10000) 

#print s
#print d
print final_coord
print final_coord_d
def to_stations(dest):
    response = requests.get(F"{BR_API_TO_STATIONS}/{dest}")
    stations = [[i['stn_code'], i['dest']] for i in response.json()]
    print(tab(stations, headers=['Code', 'Destination Name']))
示例#33
0
im = Image.open(workDirectory+'myplot10.png')
bordered = ImageOps.expand(im, border=1, fill=(0, 0, 0))
bordered.save(workDirectory+'myplot10.png')

# INSERT IN EXCEL
img = openpyxl.drawing.image.Image(workDirectory+'myplot10.png')
img.anchor = 'E4'

workbook['Degrees'].add_image(img)
workbook.save(outputExcelFile)


# REMOVE PICTURES
os.remove(workDirectory+'myplot1.png')
os.remove(workDirectory+'myplot2.png')
os.remove(workDirectory+'myplot3.png')
# os.remove(workDirectory+'myplot4.png')
os.remove(workDirectory+'myplot5.png')
os.remove(workDirectory+'myplot6.png')
os.remove(workDirectory+'myplot7.png')
os.remove(workDirectory+'myplot8.png')
os.remove(workDirectory+'myplot10.png')
os.remove(workDirectory+'mymap1.png')
os.remove(workDirectory+'mymap3.png')


# TERMINAL OUTPUTS AND TESTS
print(tab(df_Degrees_count, headers='keys', tablefmt='psql', showindex=False))
print(today)
print("OK, export done!")