示例#1
0
def run():
    print("Running @", datetime.now())
    print("Downloading data..")
    collection.run()
    print("Aggregating data..")
    aggregation.run()
    print("Analyzing data..")
    analysis.run()
    print("Creating plots..")
    visualization.run()
    print("Done!")
示例#2
0
def run(i,j):
    start = time.time()
    if i == 9 and j == 9:
        setcurrent = analysis.run([],[],[],None)
    else:
        setcurrent = analysis.run([],[],[],None,
                                  pos_tagged = han.POS_TAGGED[i:j],
                                  results = han.RESULTS[i:j])
    end = time.time()
    print("Time: %d" %(end - start))
    return setcurrent
示例#3
0
def run_commandline_program(sys_argv, current_version, latest_version):
    def print_help():
        print(
            'There are two options for running PySoar from the commandline:\n'
            '1. `python main_python` for GUI\n'
            '2. `python main_pysoar [url]` - where [url] is the daily competition url'
        )

    def status_handle(message):
        print(message)

    def download_handle(new, total=None):
        if total is not None:
            analysis_str = 'Downloaded: %s/%s' % (new, total)
        else:
            analysis_str = 'Downloaded: %s' % new
        print(analysis_str)

    def on_success():
        print('Analysis complete')

    def on_failure(msg):
        print('Error: %s' % msg)

    def analysis_handle(new, total=None):
        if total is not None:
            analysis_str = 'Analyzed: %s/%s' % (new, total)
        else:
            analysis_str = 'Analyzed: %s' % new
        print(analysis_str)

    if latest_version and latest_version.lstrip('v') != current_version:
        print('Latest version is %s! Current: %s' %
              (latest_version, current_version))

    if len(sys_argv) == 2:
        if sys_argv[1] == '--help':
            print_help()
        else:
            url = sys_argv[1]
            if url_format_correct(url, status_handle):
                source = get_url_source(url)
                run(url,
                    source,
                    download_progress=download_handle,
                    analysis_progress=analysis_handle,
                    on_success=on_success,
                    on_failure=on_failure)
    else:
        print_help()
示例#4
0
def hello_world(text):
        #param = request.args.get('text')
        param=json.loads(text)
        print param
        # fc=subprocess.check_output('/usr/bin/python analysis.py ' + param['text'],shell=True)
        fc=analysis.run(param['text'])
        # print fc
        return fc #input
示例#5
0
def hello_world(text):
    #param = request.args.get('text')
    param = json.loads(text)
    print param
    # fc=subprocess.check_output('/usr/bin/python analysis.py ' + param['text'],shell=True)
    fc = analysis.run(param['text'])
    # print fc
    return fc  #input
示例#6
0
def compare_feature_selectors(p):
    feature_selectors = [
        SymmetricalUncertainty(),
        Relief(),
        SVM_RFE(percentage_features_to_select=p),
    ]

    e_methods = [
        ensemble_methods.Mean(data_set_feature_selectors=feature_selectors),
        ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=feature_selectors),
        ensemble_methods.MeanWithClassifier(data_set_feature_selectors=feature_selectors, classifiers=analysis.default_classifiers),
    ]

    fs = feature_selectors + [LassoFeatureSelector(), Random()] + e_methods

    data_sets = ["colon", "arcene", "dexter", "gisette"]

    # analysis.artificial(fs, jaccard_percentage=p)

    analysis.run(data_sets, fs, jaccard_percentage=p)
示例#7
0
def do_analysis():
    """Call the analysis and log meta information."""
    # check what data files are available
    data_dir = flask.request.form['datadir']
    data_files = glob.glob(data_dir + "*")
    print("found those datafiles:")
    for file in data_files:
        print('- ' + file)

    print('perform analysis {} ...'.format(flask.request.form['analysis']))

    result = {
        "id": flask.request.form['id'],
        "time_start_wall": f"{datetime.datetime.now():%Y-%m-%d %H:%M:%S}",
        "URL": flask.request.form['url'],
        "domain": flask.request.form['domain'],
        "analysis": flask.request.form['analysis'],
        "info": "Done",
    }

    # run the analysis
    wall_start = time.time()
    cpu_start = time.process_time()
    result['result'] = analysis.run(data_dir)
    wall_end = time.time()
    cpu_end = time.process_time()
    result['time_duration_wall'] = wall_end - wall_start
    result['time_duration_cpu'] = cpu_end - cpu_start
    result['time_end_wall'] = f"{datetime.datetime.now():%Y-%m-%d %H:%M:%S}"

    # save results
    outfilepath = flask.request.form['outfile']
    print('Storing results in ' + outfilepath)
    with open(outfilepath, 'w') as outfile:
        json.dump(result, outfile)

    # return something
    return 'Finished analysis: ' + flask.request.form['id']
示例#8
0
def analyze(args):
    keys = [
        'company',
    ]
    lost_keys = get_lost_keys(args, keys)
    if lost_keys:
        return {
            'respCode': '9999',
            'respMsg': '缺少参数: %s' % ' '.join(lost_keys)
        }
    try:
        company = args['company']
    except Exception as e:
        return {
            'respCode': '9999',
            'respMsg': '数据类型错误: %s' % str(e),
            'sample_args': {
                'company': 'aitai',
            }  # 后端传递入参都是字符, 需要检查数据类型
        }
    data = analysis.run(company)
    res = {'respCode': '0000', 'respMsg': 'success', 'data': {'data': data}}
    return res
示例#9
0
def fig_Pcolor(outfile='fig_Pcolor.png'):
    """

    Args:
        outfile:

    Returns:

    """
    set_mplrc()

    # Load f_mL
    f_mL = analysis.load_f_mL()

    frb_pre = analysis.get_candidates()

    # Init prior
    prior = associate_defs.adopted.copy()

    # Plot
    plt.figure(figsize=(7, 7))

    # Bayesian + parse
    _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre)

    #bins_L = np.linspace(0.01, 10., 20)
    bins_L = np.linspace(-2., 1, 15)
    #bins_L = np.linspace(0.1, 2., 20)
    #bins_L = np.linspace(16., 25., 20) # Testing only

    # High confidence
    high_conf_Mstar, high_conf_ur = [], []
    for frbA in frb_pre.frbA:
        if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure:
            # This is klunky
            hg = frbA.frb.grab_host()
            if 'z_spec' not in hg.redshift.keys():
                continue
            imax = np.argmax(frbA.candidates.P_Ox)
            # u-r
            if 'u-r' not in hg.derived.keys():
                continue
            ur = hg.derived['u-r']
            Mstar = hg.derived['Mstar']
            # Save
            high_conf_Mstar.append(np.log10(Mstar))
            high_conf_ur.append(ur)
    high_conf_Mstar = np.array(high_conf_Mstar)
    high_conf_ur = np.array(high_conf_ur)

    # Plot m_r vs. z
    df = pandas.DataFrame(dict(Mstar=high_conf_Mstar, ur=high_conf_ur))
    jg = sns.jointplot(data=df, x='Mstar', y='ur')
    jg.ax_marg_x.set_xlim(8, 10.5)
    jg.ax_marg_y.set_ylim(0.5, 2.0)
    jg.ax_joint.set_xlabel(r'$\log_{10} \, (M*/M_\odot)$')
    jg.ax_joint.set_ylabel(r'$u-r$')
    jg.ax_joint.minorticks_on()

    jg.ax_joint.yaxis.set_major_locator(plt.MultipleLocator(0.5))
    jg.ax_joint.xaxis.set_major_locator(plt.MultipleLocator(1.0))
    #jg.ax_joint.yaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$'))

    # Font size
    set_fontsize(jg.ax_joint, 15.)

    # End
    plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1)
    print('Writing {:s}'.format(outfile))
    kwargs = {}
    if 'png' in outfile:
        kwargs['dpi'] = 700
    plt.savefig(outfile, **kwargs)
    plt.close()
示例#10
0
def fig_PL(outfile='fig_PL.png'):
    """

    Args:
        outfile:

    Returns:

    """
    set_mplrc()

    sns.set_theme()
    sns.set_style('whitegrid')
    sns.set_context('paper')

    # Load f_mL
    f_mL = analysis.load_f_mL()

    frb_pre = analysis.get_candidates()

    # Init prior
    prior = associate_defs.adopted.copy()

    # Plot
    plt.figure(figsize=(4, 8))
    gs = gridspec.GridSpec(2, 1)

    # Bayesian + parse
    _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre)

    #bins_L = np.linspace(0.01, 10., 20)
    bins_L = np.linspace(-2., 1, 15)
    #bins_L = np.linspace(0.1, 2., 20)
    #bins_L = np.linspace(16., 25., 20) # Testing only

    # High confidence
    high_conf_L, high_conf_z, high_conf_mr = [], [], []
    for frbA in frb_pre.frbA:
        if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure:
            # This is klunky
            hg = frbA.frb.grab_host()
            if 'z_spec' not in hg.redshift.keys():
                continue
            imax = np.argmax(frbA.candidates.P_Ox)
            # Magnitude
            m = frbA.candidates.iloc[imax][frbA.filter]
            # m_r(L*)
            m_r_Lstar = float(f_mL(frbA.frb.z))
            # Now magnitude time
            log10_Lstar = (m_r_Lstar - m) / 2.5
            print('FRB, z, m, m_L*, log10_L: ', frbA.frb.frb_name, frbA.frb.z,
                  m, m_r_Lstar, log10_Lstar)
            # Save
            high_conf_L.append(log10_Lstar)
            high_conf_z.append(frbA.frb.z)
            high_conf_mr.append(m)
    high_conf_L = np.array(high_conf_L)
    high_conf_z = np.array(high_conf_z)
    high_conf_mr = np.array(high_conf_mr)

    # Plot m_r vs. z
    ax = plt.subplot(gs[0])
    sns.scatterplot(x=high_conf_z, y=high_conf_mr, ax=ax)
    ax.set_xlabel(r'$z$')
    ax.set_ylabel(r'$m_r$')

    # Add a line
    zs = np.linspace(0.02, 0.5, 100)
    mrss = f_mL(zs)
    ax.plot(zs, mrss, 'k--')

    fsz = 15.
    set_fontsize(ax, fsz)

    # Plot
    ax = plt.subplot(gs[1])

    weights = np.ones_like(high_conf_L) / high_conf_L.size
    lbl = r'$P(O_i) > ' + '{}'.format(associate_defs.POx_secure) + '$ FRBs'
    ax.hist(high_conf_L,
            bins=bins_L,
            weights=weights,
            color='b',
            label=lbl,
            histtype='stepfilled')

    # Stats
    print("Median L/L* = {}".format(np.median(10**high_conf_L)))
    print("RMS log(L/L*) = {}".format(np.std(high_conf_L)))

    # Label me
    ax.set_xlabel(r'$\log_{10} \, (L/L*)$')
    ax.set_ylabel('PDF')
    ax.xaxis.set_major_locator(plt.MultipleLocator(1.))
    #ax.xaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$'))
    ax.set_ylim(0., 0.3)

    # Legend
    legend = ax.legend(loc='upper right',
                       scatterpoints=1,
                       borderpad=0.2,
                       handletextpad=handletextpad,
                       fontsize=13.)

    # Font size
    set_fontsize(ax, fsz)

    # End
    plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1)
    print('Writing {:s}'.format(outfile))
    kwargs = {}
    if 'png' in outfile:
        kwargs['dpi'] = 700
    plt.savefig(outfile, **kwargs)
    plt.close()
示例#11
0
def fig_mag_vs_DM(outfile='fig_mag_vs_DM.png'):
    """
    mini Maquart relation

    Args:
        outfile:

    Returns:

    """
    set_mplrc()

    # Load f_mL
    f_mL = analysis.load_f_mL()

    # Init prior
    prior = associate_defs.adopted.copy()

    # Plot
    plt.figure(figsize=(8, 5))
    gs = gridspec.GridSpec(1, 1)

    cm = plt.get_cmap('jet')

    # Bayesian + parse
    frbA_tbl, model_mags, model_theta, max_PMix = analysis.run(prior)

    # Colors
    N = len(frbA_tbl) + 1
    plt.rcParams["axes.prop_cycle"] = plt.cycler("color",
                                                 cm(np.linspace(0, 1, N)))

    # Plot -- must come after colors
    ax = plt.subplot(gs[0])

    # Plot em
    Pmin = 0.01
    unit_size = 150.
    order = [0, 1, 6, 3, 2, 4, 5, 7, 8, 9, 10, 11, 12]
    DMcosmic = [
        frb_row.frbA.frb.DM.value - frb_row.frbA.frb.DMISM.value - 100
        for _, frb_row in frbA_tbl.iterrows()
    ]
    order = np.argsort(DMcosmic)
    #for _, frb_row in frbA_tbl.iterrows():
    for ss in order:
        # Restrict to candiates with min
        frb_row = frbA_tbl.iloc[ss]
        ok_c = frb_row.frbA.candidates.P_Ox > Pmin
        N_c = np.sum(ok_c)
        # Scatter plot em
        ax.scatter(
            [frb_row.frbA.frb.DM.value - frb_row.frbA.frb.DMISM.value - 100] *
            N_c,
            frb_row.frbA.candidates[ok_c][frb_row.frbA.filter],
            label=frb_row.frb,
            edgecolor='darkslategrey',
            marker='o',
            s=unit_size * frb_row.frbA.candidates[ok_c].P_Ox,
        )

    # Fiducial relation
    L_Lstar = 0.270  # Grabbed from PL
    numL = int(np.round(1. / L_Lstar))
    log10_L_Lstar = np.log10(L_Lstar)
    std_log10_L_Lstar = 0.47  # Grabbed from fig_PL

    # Load up DM_cosmic
    dm_cosmic, z = igm.average_DM(1., cumul=True)
    f_DMz = interp1d(z, dm_cosmic.value)

    zval = np.linspace(0.02, 1., 100)
    DMs = f_DMz(zval)

    mLstar = f_mL(zval)
    m_FRB = mLstar - 2.5 * log10_L_Lstar

    ax.plot(DMs, m_FRB, 'k-', label=r'$L=L*/${}'.format(numL))

    # Label me
    ax.set_xlabel(
        r'DM$_{\rm cosmic} \equiv$ DM$_{\rm FRB}$ - DM$_{\rm ISM}$ - 100')
    ax.set_ylabel(r'$m_r$')
    #ax.xaxis.set_major_locator(plt.MultipleLocator(1.))
    #ax.xaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$'))
    ax.set_xlim(0., 800.)

    # Legend
    legend = ax.legend(loc='lower right',
                       scatterpoints=1,
                       borderpad=0.0,
                       handletextpad=handletextpad,
                       fontsize=10.)

    # Font size
    set_fontsize(ax, 15.)

    # End
    plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1)
    print('Writing {:s}'.format(outfile))
    kwargs = {}
    if 'png' in outfile:
        kwargs['dpi'] = 700
    plt.savefig(outfile, **kwargs)
    plt.close()
def execfile():
    channel_id = e1.get()
    from analysis import run
    an = run(channel_id)
    an.rest()
示例#13
0
def fig_Phalflight(outfile='fig_Phalflight.png'):
    """

    Args:
        outfile:

    Returns:

    """
    set_mplrc()

    # Load f_mL
    f_mL = analysis.load_f_mL()

    frb_pre = analysis.get_candidates()

    # Init prior
    prior = associate_defs.adopted.copy()

    # Plot
    plt.figure(figsize=(7, 7))

    # Bayesian + parse
    _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre)

    #bins_L = np.linspace(0.01, 10., 20)
    bins_L = np.linspace(-2., 1, 15)
    #bins_L = np.linspace(0.1, 2., 20)
    #bins_L = np.linspace(16., 25., 20) # Testing only

    # High confidence
    high_conf_z, high_conf_hl = [], []
    for frbA in frb_pre.frbA:
        if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure:
            # Half light
            imax = np.argmax(frbA.candidates.P_Ox)
            # Save
            high_conf_z.append(frbA.frb.z)
            high_conf_hl.append(frbA.candidates.iloc[imax].half_light)
    high_conf_z = np.array(high_conf_z)
    high_conf_hl = np.array(high_conf_hl)

    # Plot m_r vs. z
    df = pandas.DataFrame(dict(z=high_conf_z, hl=high_conf_hl))
    jg = sns.jointplot(data=df, x='z', y='hl')
    jg.ax_marg_x.set_xlim(0, 0.6)
    #jg.ax_marg_y.set_ylim(0.5, 2.5)
    jg.ax_joint.set_xlabel(r'$z$')
    jg.ax_joint.set_ylabel(chalf + ' (arcsec)')

    #jg.ax_joint.yaxis.set_major_locator(plt.MultipleLocator(0.5))
    #jg.ax_joint.yaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$'))

    # Font size
    #set_fontsize(ax, 13.)

    # End
    plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1)
    print('Writing {:s}'.format(outfile))
    kwargs = {}
    if 'png' in outfile:
        kwargs['dpi'] = 700
    plt.savefig(outfile, **kwargs)
    plt.close()
示例#14
0
def combinations():
    fs = [
        SymmetricalUncertainty(),
        Relief(),
        SVM_RFE(),
        LassoFeatureSelector(),
    ]

    e_methods = [
        ensemble_methods.Mean(data_set_feature_selectors=fs),
        ensemble_methods.Influence(data_set_feature_selectors=fs),
        ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=fs),
        ensemble_methods.MeanWithClassifier(
            data_set_feature_selectors=fs,
            classifiers=analysis.default_classifiers
        ),
        ensemble_methods.InfluenceWithClassifier(
            data_set_feature_selectors=fs,
            classifiers=analysis.default_classifiers
        ),
        ensemble_methods.MeanNormWithClassifier(
            data_set_feature_selectors=fs,
            classifiers=analysis.default_classifiers
        ),
    ]

    for comb in itertools.combinations(list(range(4)), 3):
        comb_fs = [fs[i] for i in comb]
        e_methods.extend([
            ensemble_methods.Mean(data_set_feature_selectors=comb_fs),
            ensemble_methods.Influence(data_set_feature_selectors=comb_fs),
            ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=comb_fs),
            ensemble_methods.MeanWithClassifier(
                data_set_feature_selectors=comb_fs,
                classifiers=analysis.default_classifiers
            ),
            ensemble_methods.InfluenceWithClassifier(
                data_set_feature_selectors=comb_fs,
                classifiers=analysis.default_classifiers
            ),
            ensemble_methods.MeanNormWithClassifier(
                data_set_feature_selectors=comb_fs,
                classifiers=analysis.default_classifiers
            ),
        ])

    for comb in itertools.combinations(list(range(4)), 2):
        comb_fs = [fs[i] for i in comb]
        e_methods.extend([
            ensemble_methods.Mean(data_set_feature_selectors=comb_fs),
            ensemble_methods.Influence(data_set_feature_selectors=comb_fs),
            ensemble_methods.MeanNormalizedSum(data_set_feature_selectors=comb_fs),
            ensemble_methods.MeanWithClassifier(
                data_set_feature_selectors=comb_fs,
                classifiers=analysis.default_classifiers
            ),
            ensemble_methods.InfluenceWithClassifier(
                data_set_feature_selectors=comb_fs,
                classifiers=analysis.default_classifiers
            ),
            ensemble_methods.MeanNormWithClassifier(
                data_set_feature_selectors=comb_fs,
                classifiers=analysis.default_classifiers
            ),
        ])

    data_sets = ["artificial", "colon", "arcene", "dexter", "gisette"]

    analysis.run(data_sets, fs + e_methods, prefix="combinations")
示例#15
0
#
# START
#

answer = "n"  # input("Start Generate Data? (y/n)")
if (answer.lower() == "y" or 'generate' in sys.argv):
    import generate
    if ('test' in sys.argv):
        generate.test()
    else:
        generate.run()

answer = "n"  # input("Start Prepare Data? (y/n)")
if (answer.lower() == "y" or 'prepare' in sys.argv):
    import prepare
    if ('analyze' in sys.argv):
        prepare.analyze()
    else:
        prepare.run()

answer = "n"  # input("Start Training? (y/n)")
if (answer.lower() == "y" or 'train' in sys.argv):
    import train
    train.run()

answer = "n"  #input("Start Visualize? (y/n)")
if (answer.lower() == "y" or 'visualize' in sys.argv):

    import analysis
    analysis.run()
示例#16
0
def main(analysis_type, command_variables, threshold):
    pdb_dir = fopen(
        "D:\\xampp\\htdocs\\python\\SSP\\absolute_path_of_pdb_files")[0].strip(
        )  ## EDIT
    dssp_dir = None
    #dssp_dir = "..\\pdb_files\\dsspout\\"
    #pdb_dir = "..\\pdb_files\\"
    x = Workbook()

    x.add_sheet("X-RAY")
    x.add_sheet("X-RAY 2")
    x.add_sheet("X-RAY 3")
    x.add_sheet("NMR averaged model")
    x.add_sheet("NMR multiple models 1")
    x.add_sheet("NMR multiple models 2")
    x.add_sheet("Other")
    date_ = time.strftime("%d%b%Y_%H%M%S")
    write_path = "D:\\xampp\\htdocs\\python\\Analyses\\" + str(
        int(random.random() * 100000000)) + "_" + date_ + ".xls"
    inA = 0
    if len(command_variables) == 2:
        if command_variables[1] == '-A':
            date_ = time.strftime("%d_%b_%Y")
            write_path = "D:\\xampp\\htdocs\\python\\Analyses\\disulfide_analysis_" + date_ + ".xls"
            command_variables.pop(1)
            inA = 1
    ## EDIT
    #x.save(write_path)
    #x.save("..\\Analyses\\test.xls")

    # file pdb1a5n is missing, possibly because 1a5n does not contain ssbonds (correct)
    #"pdb1tdy.ent","pdb1a5n.ent","pdb1cb6.ent","pdb2ac5.ent"

    #for file in [pdb_dir+name for name in ["pdb1kdg.ent", "pdb1kdk.ent", "pdb1kdm.ent", "pdb1kdq.ent", "pdb1kdu.ent", "pdb1kdv.ent", "pdb1kdy.ent", "pdb1kdz.ent", "pdb1ke1.ent", "pdb1ke2.ent", "pdb1keb.ent", "pdb1keg.ent", "pdb1kek.ent", "pdb1kel.ent", "pdb1kem.ent", "pdb1ken.ent", "pdb1keo.ent", "pdb1kex.ent", "pdb1kf2.ent", "pdb1kf3.ent", "pdb1kf4.ent", "pdb1kf5.ent", "pdb1kf7.ent"]]:
    #    run(file,x)

    #try:
    #    open("..\\Analyses\\basic_analysis_"+time.strftime("%a_%d_%b_%Y")+".xls","r")
    #except IOError:

    #for file in os.listdir(pdb_dir):
    #for file in ["pdb1axi.ent"]:
    #    run(pdb_dir+file,x)
    #n = len(os.listdir(pdb_dir))
    erfile = open("D:\\xampp\\htdocs\\python\\Errors\\error_" + analysis_type +
                  "_" + date_ + ".log", "w")  ## EDIT
    runlist = None
    #check command parameters
    if len(command_variables) < 2:
        #command_variables.pop(0)
        #run_list = []
        run_list = os.listdir(pdb_dir)
        run_list.remove("dsspout")
    elif len(command_variables) >= 2:
        command_variables.pop(0)
        run_list = []
        for i in command_variables:
            if i.find("\\") == -1:
                i = ".\\" + i
            if i.lower().find('.ent') != -1 or i.lower().find('.pdb') != -1:
                if not os.path.isfile(i):
                    erfile.write("The file " + i + " was not found.")
                    sys.exit(1)
                run_list.append(i)
                pdb_dir = ""
                dssp_dir = pdb_dir + os.path.dirname(i) + "\\dsspout"
    else:
        run_list = []
        run_list = command_variables[:]

    print "Checking DSSP files... "
    if dssp_dir == None:
        dssp_dir = pdb_dir + os.path.dirname(run_list[0]) + "\\dsspout"
    if not os.path.isdir(dssp_dir):
        dssplist = []
    else:
        dssplist = os.listdir(dssp_dir)
    newlist = {}
    for j in dssplist:
        newlist[j.split(".")[0].split("_")[0]] = 1
    print len(newlist)
    print len(run_list)
    for i in run_list:
        found = 0
        print i[:-3].split("\\")[-1].rstrip(".")
        if newlist.has_key(i[:-3].split("\\")[-1].rstrip(".")):
            found = 1
        if found == 0:
            if not os.path.isdir(pdb_dir + os.path.dirname(i) + "\\dsspout"):
                os.mkdir(pdb_dir + os.path.dirname(i) + "\\dsspout")
            outpath = pdb_dir + os.path.dirname(i) + "\\dsspout" + "\\"
            erfile.write("python create_dssp.py " + pdb_dir +
                         os.path.dirname(i) + "\\" + os.path.basename(i) +
                         " " + outpath)
            os.system("python D:\\xampp\htdocs\python\SSP\create_dssp.py " +
                      pdb_dir + os.path.dirname(i) + "\\" +
                      os.path.basename(i) + " " + outpath)
            print "CREATE "
    print "Done"

    #     print "Checking DSSP files... "
    #     for i in run_list:
    #         dssp_dir = pdb_dir+os.path.dirname(i)+"\\dsspout"
    #         if not os.path.isdir(dssp_dir):
    #            dssplist = []
    #         else:
    #            dssplist = os.listdir(dssp_dir)
    #         found = 0
    #         for j in dssplist:
    #             if j.find(i[:-3].split("\\")[-1]) != -1:
    #                  found = 1
    #                  break;
    #         if found == 0:
    #             if not os.path.isdir(pdb_dir+os.path.dirname(i)+"\\dsspout"):
    #                os.mkdir(pdb_dir+os.path.dirname(i)+"\\dsspout")
    #             os.system("D:\\xampp\\htdocs\\python\\SSP\\dsspcmbi.exe "+pdb_dir+os.path.dirname(i)+"\\"+os.path.basename(i)+" > "+pdb_dir+os.path.dirname(i)+"\\dsspout\\"+i[:-3].split("\\")[-1]+"dssp")         ## EDIT
    #             print "DSSP -> "+pdb_dir+os.path.basename(i)
    #     print "Done"

    #else:
    #    try:
    #        run_list = [z.strip() for z in fopen("new_files_downloaded_on_"+command_variables[1])]
    #    except IOError:
    #        tag = "new_files_downloaded_on_"
    #        print "The file \""+tag+command_variables[1]+"\" does not exist. Possible options are:"
    #        for line in [z for z in os.listdir(".") if z[:24] == tag]:
    #            print line
    #        sys.exit(1)
    ###
    ###temp
    #    import string
    #    run_list = [z.replace(".dssp",".ent") for z in open("dssplist","r").read().split("\n") if z != ""]
    #    aer = run_list.index("pdb2gi7.ent")
    #    run_list = run_list[aer:]
    #    print run_list
    #    run_list = ["pdb2gi7.ent"]
    ###
    #histogram - for analysing relationships between secondary structures and bond types
    hist_ = {}
    #run with specified command parameters in run list
    #    run_list = ["pdb1gpq.ent","pdb1gps.ent","pdb1gpt.ent","pdb1gpz.ent","pdb1gqb.ent","pdb1gqr.ent","pdb1gqs.ent","pdb1gqv.ent","pdb1gqz.ent","pdb1gr2.ent","pdb1gra.ent","pdb1grn.ent","pdb1grt.ent","pdb1gsk.ent","pdb1gsm.ent","pdb1gsn.ent","pdb1gsp.ent","pdb1gt6.ent","pdb1gtp.ent","pdb1gts.ent","pdb1gtt.ent","pdb1gu2.ent","pdb1gu3.ent","pdb1guj.ent","pdb1gur.ent","pdb1guv.ent","pdb1gv7.ent","pdb1gv8.ent","pdb1gv9.ent","pdb1gvc.ent","pdb1gvk.ent","pdb1gvl.ent","pdb1gvt.ent","pdb1gvu.ent","pdb1gvv.ent","pdb1gvw.ent","pdb1gvx.ent","pdb1gvz.ent","pdb1gw0.ent","pdb1gw2.ent","pdb1gwa.ent","pdb1gwb.ent","pdb1gwd.ent","pdb1gwn.ent","pdb1gwo.ent","pdb1gwt.ent","pdb1gwu.ent","pdb1gx2.ent","pdb1gx8.ent","pdb1gx9.ent","pdb1gxa.ent","pdb1gxd.ent","pdb1gxs.ent","pdb1gxv.ent","pdb1gxx.ent","pdb1gxy.ent","pdb1gxz.ent","pdb1gy0.ent","pdb1gyc.ent","pdb1gyd.ent","pdb1gye.ent","pdb1gyh.ent","pdb1gyo.ent","pdb1gz1.ent","pdb1gz2.ent","pdb1gz7.ent","pdb1gza.ent","pdb1gzb.ent","pdb1gzj.ent","pdb1gzm.ent","pdb1gzp.ent","pdb1gzq.ent","pdb1gzr.ent","pdb1gzy.ent","pdb1gzz.ent","pdb1h02.ent","pdb1h03.ent","pdb1h04.ent","pdb1h0b.ent","pdb1h0d.ent","pdb1h0g.ent","pdb1h0h.ent","pdb1h0i.ent","pdb1h0j.ent","pdb1h0l.ent","pdb1h0z.ent","pdb1h12.ent","pdb1h13.ent","pdb1h14.ent","pdb1h15.ent","pdb1h1b.ent","pdb1h1h.ent","pdb1h1n.ent","pdb1h20.ent","pdb1h22.ent","pdb1h23.ent","pdb1h2b.ent","pdb1h2p.ent","pdb1h2q.ent","pdb1h30.ent","pdb1h34.ent","pdb1h3j.ent","pdb1h3p.ent","pdb1h3t.ent","pdb1h3u.ent","pdb1h3v.ent","pdb1h3w.ent","pdb1h3x.ent","pdb1h3y.ent","pdb1h43.ent","pdb1h44.ent","pdb1h45.ent","pdb1h46.ent","pdb1h49.ent","pdb1h4i.ent","pdb1h4j.ent","pdb1h4p.ent","pdb1h4u.ent","pdb1h4w.ent","pdb1h52.ent","pdb1h53.ent","pdb1h55.ent","pdb1h57.ent","pdb1h58.ent","pdb1h59.ent","pdb1h5a.ent","pdb1h5b.ent","pdb1h5c.ent","pdb1h5d.ent","pdb1h5e.ent","pdb1h5f.ent","pdb1h5g.ent","pdb1h5h.ent","pdb1h5i.ent","pdb1h5j.ent","pdb1h5k.ent","pdb1h5l.ent","pdb1h5m.ent","pdb1h5o.ent","pdb1h5x.ent","pdb1h6m.ent","pdb1h6r.ent","pdb1h6v.ent","pdb1h75.ent","pdb1h76.ent","pdb1h7l.ent","pdb1h7q.ent","pdb1h80.ent","pdb1h81.ent","pdb1h82.ent","pdb1h83.ent","pdb1h84.ent","pdb1h86.ent","pdb1h87.ent","pdb1h8d.ent","pdb1h8i.ent","pdb1h8l.ent","pdb1h8n.ent","pdb1h8o.ent","pdb1h8p.ent","pdb1h8s.ent","pdb1h8u.ent","pdb1h8v.ent","pdb1h8x.ent","pdb1h8y.ent","pdb1h8z.ent","pdb1h91.ent","pdb1h9h.ent","pdb1h9i.ent","pdb1h9l.ent","pdb1h9v.ent","pdb1h9z.ent","pdb1ha0.ent","pdb1ha2.ent","pdb1ha6.ent","pdb1ha8.ent","pdb1ha9.ent","pdb1haa.ent","pdb1hae.ent","pdb1haf.ent","pdb1hag.ent","pdb1hah.ent","pdb1hai.ent","pdb1haj.ent"]
    n = len(run_list)
    typedict = {}
    for type_ in ["lys_arg", "his", "asp_glu", "trp", "phe", "tyr"]:
        typedict[type_] = (analysis_type == type_)

    timestart = time.ctime()
    for i, file in enumerate(run_list):
        #for i,file in enumerate(["pdb1w4y.ent"]):
        #        print type(hist_)
        print pdb_dir + file
        hist_ = analysis.run(pdb_dir + file,
                             x,
                             erfile,
                             dist=threshold,
                             histogram=hist_,
                             lys_arg=typedict["lys_arg"],
                             his=typedict["his"],
                             asp_glu=typedict["asp_glu"],
                             trp=typedict["trp"],
                             phe=typedict["phe"],
                             tyr=typedict["tyr"])
        m = int((i + 1) / float(n) * 10000) / 100.0
        print str(m) + "% complete"

    timefinish1 = time.ctime()
    print "\nResults file is being saved. Please do not close the program window.\n"

    ##################################################################
    ##The histogram bit
    def histcmp(a, b):
        return (hist_[a] < hist_[b]) * 2 - 1

    #fffff = open("histogram_","w")
    #hkeys = hist_.keys()[:]
    #hkeys.sort(histcmp)
    #x.add_sheet("Histogram")
    #histosheet = x.get_sheet(4)

    #histosheet.write(0,0,"Secondary structure 1",style0)
    #histosheet.write(0,1,"Secondary structure 2",style0)
    #histosheet.write(0,2,"Disulfide Bond type",style0)
    #histosheet.write(0,3,"Count",style0)
    #for row,h in enumerate(hkeys):
    #    fffff.write(str(h)+" "*(21-len(h))+str(hist_[h])+"\n")
    #    histosheet.write(row+1,0,h[0])
    #    histosheet.write(row+1,1,h[1])
    #    histosheet.write(row+1,2,h[2:])
    #    histosheet.write(row+1,3,hist_[h])
    #fffff.close()
    ##################################################################

    realactive = x.get_active_sheet()
    firstactive = 6
    sheet = x.get_sheet(0)
    if sheet.row(0).get_str_count() > 0:
        firstactive = 0
    sheet.set_first_visible_row(0)
    sheet = x.get_sheet(1)
    if sheet.row(0).get_str_count() > 0 and firstactive > 1:
        firstactive = 1
    sheet.set_first_visible_row(0)
    sheet = x.get_sheet(2)
    if sheet.row(0).get_str_count() > 0 and firstactive > 2:
        firstactive = 2
    sheet.set_first_visible_row(0)
    sheet = x.get_sheet(3)
    if sheet.row(0).get_str_count() > 0 and firstactive > 3:
        firstactive = 3
    sheet.set_first_visible_row(0)
    sheet = x.get_sheet(4)
    if sheet.row(0).get_str_count() > 0 and firstactive > 4:
        firstactive = 3
    sheet.set_first_visible_row(0)
    sheet = x.get_sheet(5)
    sheet.set_first_visible_row(0)
    if sheet.row(0).get_str_count() > 0 and firstactive > 5:
        firstactive = 5
    sheet.set_first_visible_row(0)
    sheet = x.get_sheet(6)
    sheet.set_first_visible_row(0)

    x.set_active_sheet(firstactive)
    x.save(write_path)
    if inA:
        os.system(
            "7z a -tzip -mx=9 D:\\xampp\\htdocs\\python\\Analyses\\disulfide_analysis_"
            + date_ + ".zip " + write_path)
    print "\nAnalysis complete and results saved.\n"
    timefinish2 = time.ctime()
    erfile.close()

    print "start    =", timestart
    print "prcessed =", timefinish1
    print "saved    =", timefinish2
    #sorting and printing of histogram
    #gives descending sort
    #write histogram to spreadsheet

    #cys["SSBOND   7 CYS A 1348    CYS A 1380"]

    return write_path
示例#17
0
if args.barcode_level:
    counts['unique_id'] = list(counts.index)
    label = "barcode_level"
elif args.feature_level:
    counts['unique_id'] = [
        str(i) + "_" + str(j) + "_" + str(k) + "_" + str(l)
        for i, j, k, l in zip(counts['sseqid'], counts['guide_status'],
                              counts['donor_status'], counts['library'])
    ]
    label = "feature_level"
"""elif args.v2:
	counts['unique_id'] = [str(i)+"_"+str(j)+"_"+str(k) for i,j,k in zip(counts['ref'], counts['guide_status'], counts['MD'])]
	label = "feature_level"	
else: # default is barcode level analysis
	counts['unique_id'] = list(counts.index)
	label = "barcode_level"
"""
#counts = counts[samples + ['unique_id', 'ref', 'guide_status', 'MD']].dropna(subset=samples, how='all')
counts = counts[samples + [
    "unique_id", "sseqid", "library", "subpool", "guide_status",
    "donor_status", "bsp_status"
]].dropna(subset=samples, how='all')
counts['percentNA'] = counts[samples].isnull().sum(axis=1) / float(
    len(samples))
counts = counts[counts['percentNA'] < 0.25]

counts.to_csv(outfile + "_" + label + "_counts.csv")
results, log = analysis.run(counts, model, log=True)
results.to_csv(outfile + "_" + label + "_analysis.csv")
log.to_csv(outfile + "_" + label + "_log.csv")
示例#18
0
def fig_Psizesep(outfile='fig_Psizesep.png'):
    """

    Args:
        outfile:

    Returns:

    """
    set_mplrc()

    sns.set_theme()
    sns.set_style('whitegrid')
    sns.set_context('paper')

    # Load f_mL
    f_mL = analysis.load_f_mL()

    frb_pre = analysis.get_candidates()

    # Init prior
    prior = associate_defs.adopted.copy()

    # Plot
    plt.figure(figsize=(7, 7))

    # Bayesian + parse
    _, model_mags, model_theta, max_PMix = analysis.run(prior, frb_pre=frb_pre)

    #bins_L = np.linspace(0.01, 10., 20)
    bins_L = np.linspace(-2., 1, 15)
    #bins_L = np.linspace(0.1, 2., 20)
    #bins_L = np.linspace(16., 25., 20) # Testing only

    # High confidence
    high_conf_size, high_conf_sep = [], []
    max_sep = 0.
    for frbA in frb_pre.frbA:
        if np.max(frbA.candidates.P_Ox) > associate_defs.POx_secure:
            # Max
            imax = np.argmax(frbA.candidates.P_Ox)
            # Measures
            ang_phys = associate_defs.cosmo.kpc_proper_per_arcmin(frbA.frb.z)
            high_conf_sep.append((frbA.candidates.iloc[imax].separation *
                                  units.arcsec * ang_phys).to('kpc').value)
            high_conf_size.append(
                ((frbA.candidates.iloc[imax].half_light * units.arcsec) *
                 ang_phys).to('kpc').value)
            #
            if high_conf_sep[-1] > max_sep:
                max_sep = high_conf_sep[-1]
                max_FRB = frbA.frb
    high_conf_size = np.array(high_conf_size)
    high_conf_sep = np.array(high_conf_sep)
    print("Max sep: {}".format(max_FRB))

    # Plot m_r vs. z
    df = pandas.DataFrame(dict(size=high_conf_size, sep=high_conf_sep))
    jg = sns.jointplot(data=df, x='sep', y='size')
    #jg.ax_marg_x.set_xlim(0, 0.6)
    jg.ax_marg_y.set_ylim(0., 7)
    jg.ax_joint.set_xlabel('Physical Separation (kpc)')
    jg.ax_joint.set_ylabel('Galaxy size (kpc)')

    # One-to-one line
    jg.ax_joint.plot([0., 11.], [0., 11.], 'k--')

    #jg.ax_joint.yaxis.set_major_locator(plt.MultipleLocator(0.5))
    #jg.ax_joint.yaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$'))

    # Font size
    set_fontsize(jg.ax_joint, 15.)

    # End
    plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1)
    print('Writing {:s}'.format(outfile))
    kwargs = {}
    if 'png' in outfile:
        kwargs['dpi'] = 700
    plt.savefig(outfile, **kwargs)
    plt.close()
示例#19
0
def fig_prior_vs_posterior(outfile='fig_prior_vs_posterior.png',
                           POx_secure=associate_defs.POx_secure,
                           prior_mode='conservative'):
    """

    Args:
        outfile:

    Returns:

    """
    set_mplrc()

    frb_pre = analysis.get_candidates()

    # Init prior
    prior = getattr(associate_defs, prior_mode).copy()

    thetas = np.linspace(0., associate_defs.theta_max, 1000)
    dtheta = thetas[1] - thetas[0]

    bins_theta = np.linspace(0., 6., 20)

    # Plot
    plt.figure(figsize=(6, 5))
    gs = gridspec.GridSpec(2, 2)

    # Loop on theta priors
    for ss, clr, tprior in zip(np.arange(3), ['k', 'b', 'g', 'gray'], [
            associate_defs.theta_u,
            associate_defs.theta_c,
            associate_defs.theta_e,
            associate_defs.theta_e,
    ]):
        # Plot
        ax = plt.subplot(gs[ss])
        # Prior
        prior['theta'] = tprior

        if ss < 3:
            # Bayesian + parse
            _, model_mags, model_theta, max_PMix = analysis.run(
                prior, frb_pre=frb_pre)

            # Distribution
            ptheta = bayesian.pw_Oi(thetas, 1., prior['theta'])
            scl = np.sum(ptheta * dtheta)
            print('scl:', scl)
            convolved_ptheta = None

            # High confidence
            high_conf_theta = []
            nsecure = 0
            for frbA in frb_pre.frbA:
                if np.max(frbA.candidates.P_Ox) > POx_secure:
                    imax = np.argmax(frbA.candidates.P_Ox)
                    high_conf_theta.append(
                        frbA.candidates.iloc[imax].separation /
                        frbA.candidates.iloc[imax].half_light)
                    # Convolve with prior
                    sigR_phi = np.sqrt(
                        frbA.frb.sig_a *
                        frbA.frb.sig_b) / frbA.candidates.iloc[imax].half_light
                    conv_theta = convolve(ptheta,
                                          np.exp(-thetas**2 / 2 / sigR_phi**2),
                                          mode='full')
                    scl_c = np.sum(conv_theta * dtheta)
                    if convolved_ptheta is None:
                        convolved_ptheta = conv_theta / scl_c
                    else:
                        convolved_ptheta += conv_theta / scl_c
                    # Increment
                    nsecure += 1
            # Nomalize
            high_conf_theta = np.array(high_conf_theta)
            convolved_ptheta /= nsecure

            # Save
            if tprior == associate_defs.theta_e:
                save_high = high_conf_theta.copy()

            # KS test
            #cumsum = np.cumsum(ptheta*dtheta) / scl
            cumsum = np.cumsum(convolved_ptheta * dtheta)
            more_thetas = np.arange(convolved_ptheta.size) * dtheta
            #f_CDF = interp1d(thetas, cumsum)
            f_CDF = interp1d(more_thetas, cumsum)
            res = kstest(high_conf_theta, f_CDF)
            pvalue = res.pvalue

        else:
            # Fit secure with a new exponential
            scale_lengths = np.linspace(0.25, 2., 100)
            P_KS = []
            for scale_length in scale_lengths:
                ptheta = bayesian.pw_Oi(thetas,
                                        1.,
                                        associate_defs.theta_e,
                                        scale_half=scale_length)
                scl = np.sum(ptheta * dtheta)

                # KS test
                cumsum = np.cumsum(ptheta * dtheta) / scl
                f_CDF = interp1d(thetas, cumsum)
                res = kstest(high_conf_theta, f_CDF)
                # Save
                P_KS.append(res.pvalue)

            imax = np.argmax(P_KS)
            best_sl = scale_lengths[imax]
            print("Best scale length = {}".format(best_sl))
            pvalue = P_KS[imax]
            # Once more
            ptheta = bayesian.pw_Oi(thetas,
                                    1.,
                                    associate_defs.theta_e,
                                    scale_half=best_sl)
            scl = np.sum(ptheta * dtheta)
            prior['theta']['method'] = 'exp ' + '{:0.1f}'.format(
                best_sl) + r'$ \, ' + cmhalf + '$'
            #
            high_conf_theta = save_high

        # Plot profiles
        ax.plot(thetas,
                ptheta / scl,
                label=prior['theta']['method'] + ' unconvolved',
                color=clr,
                alpha=0.3)
        more_thetas = np.arange(convolved_ptheta.size) * dtheta
        ax.plot(more_thetas,
                convolved_ptheta,
                label=prior['theta']['method'] + r' $P_{\rm KS} =' +
                '{:0.2f}'.format(pvalue) + '$',
                color=clr)

        # Plot secure
        weights3 = np.ones_like(high_conf_theta) / high_conf_theta.size
        lbl = r'$P(O_i) > ' + '{}'.format(
            POx_secure) + '$ FRBs' if ss == 0 else None
        ax.hist(high_conf_theta,
                bins=bins_theta,
                weights=weights3,
                color='darkred',
                label=lbl,
                histtype='stepfilled')

        # Plot all
        if ss < 3:
            weights2 = np.ones_like(model_theta) / model_theta.size
            lbl = 'All FRB candidates' if ss == 0 else None
            ax.hist(model_theta,
                    weights=weights2,
                    bins=bins_theta,
                    color='darkgrey',
                    label=lbl,
                    histtype='step')

        # Label me
        ax.set_xlabel(r'$\theta/' + cmhalf + '$')
        ax.set_ylabel('PDF')
        ax.xaxis.set_major_locator(plt.MultipleLocator(1.))
        #ax.xaxis.set_major_formatter(FormatStrFormatter(r'$%.3f$'))
        ax.set_xlim(0., 8)
        ax.set_ylim(0., 0.5)

        # Legend
        legend = ax.legend(loc='upper right',
                           scatterpoints=1,
                           borderpad=0.2,
                           handletextpad=handletextpad,
                           fontsize=11.)

        # Font size
        set_fontsize(ax, 13.)

    # End
    plt.tight_layout(pad=0.2, h_pad=0., w_pad=0.1)
    print('Writing {:s}'.format(outfile))
    kwargs = {}
    if 'png' in outfile:
        kwargs['dpi'] = 700
    plt.savefig(outfile, **kwargs)
    plt.close()
示例#20
0
def main(analysis_type, command_variables, threshold):
    #pdb_dir = fopen("absolute_path_of_pdb_files")[0].strip()
    pdb_dir = "../pdb_files"
    x = Workbook()

    x.add_sheet("NMR averaged model")
    x.add_sheet("NMR multiple models")
    x.add_sheet("X-RAY")
    x.add_sheet("Other")
    date_ = time.strftime("%d_%b_%Y")
    x.save("../Analyses/" + analysis_type + "_" + date_ + ".xls")

    # file pdb1a5n is missing, possibly because 1a5n does not contain ssbonds (correct)
    #"pdb1tdy.ent","pdb1a5n.ent","pdb1cb6.ent","pdb2ac5.ent"

    #for file in [pdb_dir+name for name in ["pdb1kdg.ent", "pdb1kdk.ent", "pdb1kdm.ent", "pdb1kdq.ent", "pdb1kdu.ent", "pdb1kdv.ent", "pdb1kdy.ent", "pdb1kdz.ent", "pdb1ke1.ent", "pdb1ke2.ent", "pdb1keb.ent", "pdb1keg.ent", "pdb1kek.ent", "pdb1kel.ent", "pdb1kem.ent", "pdb1ken.ent", "pdb1keo.ent", "pdb1kex.ent", "pdb1kf2.ent", "pdb1kf3.ent", "pdb1kf4.ent", "pdb1kf5.ent", "pdb1kf7.ent"]]:
    #    run(file,x)

    #try:
    #    open("..\\Analyses\\basic_analysis_"+time.strftime("%a_%d_%b_%Y")+".xls","r")
    #except IOError:

    #for file in os.listdir(pdb_dir):
    #for file in ["pdb1axi.ent"]:
    #    run(pdb_dir+file,x)
    #n = len(os.listdir(pdb_dir))
    erfile = open("error_" + analysis_type + "_" + date_ + ".log", "w")

    #check command parameters
    if len(command_variables) <= 1:
        run_list = os.listdir(pdb_dir)
        run_list.remove("dsspout")
        print "1"
    elif command_variables[1].find(".ent") != -1:
        try:
            run_list = [command_variables[1]]
            print "2"
            #pdb_dir = "./"
        except IOError:
            print "The file " + command_variables[1] + " was not found."
            sys.exit(1)
    else:
        print "3"
        try:
            run_list = [
                z.strip() for z in fopen("new_files_downloaded_on_" +
                                         command_variables[1])
            ]
        except IOError:
            tag = "new_files_downloaded_on_"
            print "The file \"" + tag + command_variables[
                1] + "\" does not exist. Possible options are:"
            for line in [z for z in os.listdir(".") if z[:24] == tag]:
                print line
            sys.exit(1)
###
###temp
#    import string
#    run_list = [z.replace(".dssp",".ent") for z in open("dssplist","r").read().split("\n") if z != ""]
#    aer = run_list.index("pdb2gi7.ent")
#    run_list = run_list[aer:]
#    print run_list
#    run_list = ["pdb2gi7.ent"]
###
#run_list = ["pdb2fz3.ent"]
#histogram - for analysing relationships between secondary structures and bond types
    hist_ = {}
    #run with specified command parameters in run list
    #    run_list = ["pdb1gpq.ent","pdb1gps.ent","pdb1gpt.ent","pdb1gpz.ent","pdb1gqb.ent","pdb1gqr.ent","pdb1gqs.ent","pdb1gqv.ent","pdb1gqz.ent","pdb1gr2.ent","pdb1gra.ent","pdb1grn.ent","pdb1grt.ent","pdb1gsk.ent","pdb1gsm.ent","pdb1gsn.ent","pdb1gsp.ent","pdb1gt6.ent","pdb1gtp.ent","pdb1gts.ent","pdb1gtt.ent","pdb1gu2.ent","pdb1gu3.ent","pdb1guj.ent","pdb1gur.ent","pdb1guv.ent","pdb1gv7.ent","pdb1gv8.ent","pdb1gv9.ent","pdb1gvc.ent","pdb1gvk.ent","pdb1gvl.ent","pdb1gvt.ent","pdb1gvu.ent","pdb1gvv.ent","pdb1gvw.ent","pdb1gvx.ent","pdb1gvz.ent","pdb1gw0.ent","pdb1gw2.ent","pdb1gwa.ent","pdb1gwb.ent","pdb1gwd.ent","pdb1gwn.ent","pdb1gwo.ent","pdb1gwt.ent","pdb1gwu.ent","pdb1gx2.ent","pdb1gx8.ent","pdb1gx9.ent","pdb1gxa.ent","pdb1gxd.ent","pdb1gxs.ent","pdb1gxv.ent","pdb1gxx.ent","pdb1gxy.ent","pdb1gxz.ent","pdb1gy0.ent","pdb1gyc.ent","pdb1gyd.ent","pdb1gye.ent","pdb1gyh.ent","pdb1gyo.ent","pdb1gz1.ent","pdb1gz2.ent","pdb1gz7.ent","pdb1gza.ent","pdb1gzb.ent","pdb1gzj.ent","pdb1gzm.ent","pdb1gzp.ent","pdb1gzq.ent","pdb1gzr.ent","pdb1gzy.ent","pdb1gzz.ent","pdb1h02.ent","pdb1h03.ent","pdb1h04.ent","pdb1h0b.ent","pdb1h0d.ent","pdb1h0g.ent","pdb1h0h.ent","pdb1h0i.ent","pdb1h0j.ent","pdb1h0l.ent","pdb1h0z.ent","pdb1h12.ent","pdb1h13.ent","pdb1h14.ent","pdb1h15.ent","pdb1h1b.ent","pdb1h1h.ent","pdb1h1n.ent","pdb1h20.ent","pdb1h22.ent","pdb1h23.ent","pdb1h2b.ent","pdb1h2p.ent","pdb1h2q.ent","pdb1h30.ent","pdb1h34.ent","pdb1h3j.ent","pdb1h3p.ent","pdb1h3t.ent","pdb1h3u.ent","pdb1h3v.ent","pdb1h3w.ent","pdb1h3x.ent","pdb1h3y.ent","pdb1h43.ent","pdb1h44.ent","pdb1h45.ent","pdb1h46.ent","pdb1h49.ent","pdb1h4i.ent","pdb1h4j.ent","pdb1h4p.ent","pdb1h4u.ent","pdb1h4w.ent","pdb1h52.ent","pdb1h53.ent","pdb1h55.ent","pdb1h57.ent","pdb1h58.ent","pdb1h59.ent","pdb1h5a.ent","pdb1h5b.ent","pdb1h5c.ent","pdb1h5d.ent","pdb1h5e.ent","pdb1h5f.ent","pdb1h5g.ent","pdb1h5h.ent","pdb1h5i.ent","pdb1h5j.ent","pdb1h5k.ent","pdb1h5l.ent","pdb1h5m.ent","pdb1h5o.ent","pdb1h5x.ent","pdb1h6m.ent","pdb1h6r.ent","pdb1h6v.ent","pdb1h75.ent","pdb1h76.ent","pdb1h7l.ent","pdb1h7q.ent","pdb1h80.ent","pdb1h81.ent","pdb1h82.ent","pdb1h83.ent","pdb1h84.ent","pdb1h86.ent","pdb1h87.ent","pdb1h8d.ent","pdb1h8i.ent","pdb1h8l.ent","pdb1h8n.ent","pdb1h8o.ent","pdb1h8p.ent","pdb1h8s.ent","pdb1h8u.ent","pdb1h8v.ent","pdb1h8x.ent","pdb1h8y.ent","pdb1h8z.ent","pdb1h91.ent","pdb1h9h.ent","pdb1h9i.ent","pdb1h9l.ent","pdb1h9v.ent","pdb1h9z.ent","pdb1ha0.ent","pdb1ha2.ent","pdb1ha6.ent","pdb1ha8.ent","pdb1ha9.ent","pdb1haa.ent","pdb1hae.ent","pdb1haf.ent","pdb1hag.ent","pdb1hah.ent","pdb1hai.ent","pdb1haj.ent"]
    n = len(run_list)
    typedict = {}
    for type_ in ["lys_arg", "his", "asp_glu", "trp", "phe", "tyr"]:
        typedict[type_] = (analysis_type == type_)

    timestart = time.ctime()
    for i, file in enumerate(run_list):
        #for i,file in enumerate(["pdb1w4y.ent"]):
        #        print type(hist_)
        hist_ = analysis.run(pdb_dir + file,
                             x,
                             erfile,
                             dist=threshold,
                             histogram=hist_,
                             lys_arg=typedict["lys_arg"],
                             his=typedict["his"],
                             asp_glu=typedict["asp_glu"],
                             trp=typedict["trp"],
                             phe=typedict["phe"],
                             tyr=typedict["tyr"])
        m = int((i + 1) / float(n) * 10000) / 100.0
        print str(m) + "% complete"

    timefinish1 = time.ctime()
    print "\nResults file is being saved. Please do not close the program window.\n"

    ##################################################################
    ##The histogram bit
    def histcmp(a, b):
        return (hist_[a] < hist_[b]) * 2 - 1

    fffff = open("histogram_", "w")
    hkeys = hist_.keys()[:]
    hkeys.sort(histcmp)
    x.add_sheet("Histogram")
    histosheet = x.get_sheet(4)

    histosheet.write(0, 0, "Secondary structure 1", style0)
    histosheet.write(0, 1, "Secondary structure 2", style0)
    histosheet.write(0, 2, "Disulfide Bond type", style0)
    histosheet.write(0, 3, "Count", style0)
    for row, h in enumerate(hkeys):
        fffff.write(str(h) + " " * (21 - len(h)) + str(hist_[h]) + "\n")
        histosheet.write(row + 1, 0, h[0])
        histosheet.write(row + 1, 1, h[1])
        histosheet.write(row + 1, 2, h[2:])
        histosheet.write(row + 1, 3, hist_[h])
    fffff.close()
    ##################################################################
    x.save("..\\Analyses\\" + analysis_type + "_" + date_ + ".xls")
    print "\nAnalysis complete and results saved.\n"
    timefinish2 = time.ctime()
    erfile.close()

    print "start    =", timestart
    print "prcessed =", timefinish1
    print "saved    =", timefinish2