Python read_as_pandas примеры, plumed_pandas.read_as_pandas Python примеры использования

Пример #1

0

Показать файл

def main():
    '''plotting output of torcv tries'''

    parser = argparse.ArgumentParser()
    parser.add_argument('-f', help='give colvar_file', action='store', dest='f', type=str)
    parser.add_argument('-start', help='give start position of column', action='store',
                        dest='start', type=int)
    parser.add_argument('-end', help='give end position +1 of column', action='store',
                        dest='end', type=int)
    parser.add_argument('-col', help='list of column names to plot', nargs='*', dest='col')
    parser.add_argument('-o', help='output image name / path', action='store', dest='out',
                        type=str)
    args = parser.parse_args()

    #def skip(index):
    #    if index % 100 == 0:
    #        return True
    #    return False

    dataf = plumed_pandas.read_as_pandas(args.f, skiprows=lambda x: x % 8000)

    if args.col is not None:
        ax = dataf.plot(x='time', y=args.col)
        ax.legend([args.f])
    else:
        ax = dataf.plot(x='time', y=range(args.start, args.end))
    plt.savefig(f'{args.out}.png', format='png')
    plt.close()
    print(f'{args.out}.png has been created')

Пример #2

0

Показать файл

Файл: conv.py Проект: jerem2401/scripts

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        help='give colvar_file, for plotting f(var)=t',
                        action='store',
                        nargs='*',
                        dest='f')
    parser.add_argument('-v',
                        help='give colvar_file, for building weithed histo',
                        action='store',
                        nargs='*',
                        dest='var')
    args = parser.parse_args()

    args.var.append('time')

    for i in args.f:
        df = plumed_pandas.read_as_pandas(i)
        df = df.drop(list(set(df.columns) - set(args.var)), axis='columns')
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        for j in args.var[0:-1]:
            ax.plot(df[args.var[-1]][::1000], df[j][::1000], label=j)
            ax.legend()
        plt.savefig(os.path.basename(i).split('.txt')[0] + '_conv.jpeg',
                    quality=30)
        print(os.path.basename(i).split('.txt')[0] + '_conv.jpeg done')

Пример #3

0

Показать файл

Файл: block.py Проект: jerem2401/scripts

def rew_chunk(f, c, k):
    #opening colvar files, chunking them, do rew_histo for each chunk
    for i in f:
        pos = re.search('(?<=colvar).*(?=_)', i).group()
        check = glob.glob('./histo_' + str(pos) + '_c*.dat')
        if check != []:
            print('./histo_' + str(pos) + '_c*.dat  exists !')
        else:
            print('reading ' + i)
            df = plumed_pandas.read_as_pandas(i)
            dflen = len(df.index)
            print('dflen=' + str(dflen))
            chlen = round(dflen / c)
            print('chlen=' + str(chlen))

            for n, j in enumerate(range(0, dflen, chlen)):
                chunk = df[['nCV', 'guide_restraint.bias']][j:j + chlen]

                print('head of chunk ' + str(n) + ' from ' + str(j) + ' to ' +
                      str(j + chlen) + ' :',
                      chunk.head(),
                      chunk.tail(),
                      sep='\n')

                #Put next lines as a function in rew_histo, to make it versatile
                hist2, bin_edges = np.histogram(
                    chunk['nCV'],
                    bins=[
                        i for i in np.around(np.arange(-1.25, 1.25, 0.01),
                                             decimals=3)
                    ],
                    weights=[
                        np.exp(i / 2.49434)
                        for i in chunk['guide_restraint.bias']
                    ],
                    density=True)
                hist2 = hist2 / hist2.sum()
                d = {'z': bin_edges[0:-1], 'hist': hist2}
                hdf = pd.DataFrame(d)

                # /!\ saved in working directory
                np.savetxt(r'./histo_' + str(pos) + '_c' + str(n) + '.dat',
                           hdf.values,
                           header="col1=z col2=hist\n#1 #2 " + str(pos) +
                           "\n#1 #2 " + str(k),
                           fmt='%.6f')
                print('./histo_' + str(pos) + '_c' + str(n) + '.dat is done')
    return (dflen, chlen)

Пример #4

0

Показать файл

def main():
    '''
    parsing the colvar.txt, find the closest nCV value to target_val
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        help='give colvar_files',
                        action='store',
                        dest='f',
                        type=str)
    parser.add_argument('-v',
                        help='tnCV',
                        action='store',
                        dest='v',
                        type=float)
    parser.add_argument('-wm',
                        help="""give min THEN max value of cv,
                        in case of adding windows this will change a little bit the behavior
                        but this should not be problematic""",
                        nargs='*',
                        action='store',
                        dest='w',
                        type=float)
    args = parser.parse_args()

    #read COLVAR.txt
    df = plumed_pandas.read_as_pandas(args.f)

    #extract time of frame closest to the target_value and cnt of gksi at this frame
    if args.v == args.w[0] or args.v == args.w[1]:
        tvalue = df.iloc[(df['nCV'] - float(args.v)).abs().idxmin()]['time']
        #gksi = df.iloc[(df['nCV']-float(args.v)).abs().idxmin()]['restraint2.RMSDMID_cntr']
        #gkap = df.iloc[(df['nCV']-float(args.v)).abs().idxmin()]['restraint2.RMSDMID_kappa']
    else:
        #get idx of min diff btw args.v & restraint.ncv_cnt for all the pulling simulation in df
        mindiff = (df['restraint.nCV_cntr'] - float(args.v)).abs().idxmin()
        tvalue = df.iloc[mindiff]['time']
        #gksi = df.iloc[mindiff]['restraint2.RMSDMID_cntr']
        #gkap = df.iloc[mindiff]['restraint2.RMSDMID_kappa']

    #return (tvalue, gksi, gkap)
    return (tvalue)

Пример #5

0

Показать файл

def main(colvf, ksi, minmax):
    '''
    parsing the colvar.txt, find the closest nCV value to target_val
    '''

    #read COLVAR.txt
    df = plumed_pandas.read_as_pandas(colvf)

    #extract time of frame closest to the target_value and cnt of gksi at this frame
    if ksi == minmax[0] or ksi == minmax[1]:
        mindiff = (df['p1.sss'] - float(ksi)).abs().idxmin()
        found = df.iloc[mindiff]['p1.sss']
        tvalue = df.iloc[mindiff]['time']
    else:
        #get idx of min diff btw args.v & path.p1.sss_cntr for all the pulling simulation in df
        #mindiff = (df['path.p1.sss_cntr']-float(ksi)).abs().idxmin()
        mindiff = (df['p1.sss'] - float(ksi)).abs().idxmin()
        found = df.iloc[mindiff]['p1.sss']
        tvalue = df.iloc[mindiff]['time']

    return found, tvalue

Пример #6

0

Показать файл

Файл: rew_hist_final.py Проект: jerem2401/scripts

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-f',
                        help='give colvar_file, for building weithed histo',
                        action='store',
                        dest='f',
                        required=True)
    parser.add_argument(
        '--plot',
        help=
        'plot histograms together: all1 (all2 for separating plots in the middle of the CV in case of cyclic CV), or separately: sep',
        action='store',
        dest='plot')
    parser.add_argument('-k',
                        help='force constant used for each windows',
                        action='store',
                        dest='k',
                        required=True)
    parser.add_argument('-s',
                        help='binning step, deflt=0.01 for ext -1.25 and 1.25',
                        action='store',
                        dest='s',
                        default=0.01,
                        type=float)
    parser.add_argument('-min',
                        help='min boundary for hist, deflt= %(default)s',
                        action='store',
                        dest='min',
                        default=-1.25,
                        type=float)
    parser.add_argument('-max',
                        help='max boundary for hist, deflt= %(default)s',
                        action='store',
                        dest='max',
                        default=1.25,
                        type=float)
    parser.add_argument(
        '-rew',
        help=
        'if present, allows to reweight histo by the value of the guide_restraint.bias',
        action='store_true',
        dest='rew')
    parser.add_argument(
        '-col',
        help=
        'from which column of the colvar file do you want to do an histogram, deflt= %(default)s',
        default='nCV',
        action='store',
        dest='col',
        type=str)
    parser.add_argument('--o',
                        help='name of hist file',
                        action='store',
                        dest='o',
                        type=str)
    parser.add_argument('-pos',
                        help='center of haromic potential',
                        action='store',
                        dest='pos')
    args = parser.parse_args()

    print(
        'careful: put the -rew option added in the last maj for reweighting !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
    )
    print(args.f, args.o)
    if args.plot == None:
        pos = args.pos
        #pos=re.search('(-|[0-9]|\.)+(?=_)', args.f).group()

        if os.path.exists('./histo_' + str(pos) + '_.txt'):
            print('./histo_' + str(pos) + '_.txt exists !')
            sys.exit()
        elif args.o != None and os.path.exists(args.o):
            print(args.o + ' exists !')
            sys.exit()
        else:
            df = plumed_pandas.read_as_pandas(args.f)

            if not args.rew:
                df = df[args.col]
                hist2, bin_edges = np.histogram(
                    df,
                    bins=[
                        i for i in np.around(
                            np.arange(args.min, args.max, args.s), decimals=3)
                    ],
                    density=True)
                hist2 = hist2 / hist2.sum()
            else:
                df = df[['nCV', 'guide_restraint.bias']]
                hist2, bin_edges = np.histogram(
                    df['nCV'],
                    bins=[
                        i for i in np.around(
                            np.arange(args.min, args.max, args.s), decimals=3)
                    ],
                    weights=[
                        np.exp(i / 2.49434) for i in df['guide_restraint.bias']
                    ],
                    density=True)
                hist2 = hist2 / hist2.sum()

            d = {'z': bin_edges[0:-1], 'hist': hist2}

            hdf = pd.DataFrame(d)

            # /!\ saved in working directory
            if args.o == None:
                np.savetxt(r'./histo_' + str(pos) + '_.txt',
                           hdf.values,
                           header="col1=z col2=hist\n#1 #2 " + str(pos) +
                           "\n#1 #2 " + str(args.k),
                           fmt='%.6f')
                print('./histo_' + str(pos) + '_.txt is done')
            else:
                np.savetxt(args.o, hdf.values, fmt='%.6f')
                print(args.o + ' is done')

    if args.plot == 'all2':

        import matplotlib.pyplot as plt
        import matplotlib

        matplotlib.rcParams.update({'font.size': 23})

        histo_files = os.popen(
            'ls histo*.txt | sort -t _ -k 2 -n').read().split()
        h1 = histo_files[:len(histo_files) // 2]
        h2 = histo_files[len(histo_files) // 2:]

        for i in [h1, h2]:

            fig = plt.figure(figsize=(15, 8))
            ax = fig.add_subplot(1, 1, 1)

            for j in i:

                z, thishist = np.loadtxt(j, unpack=True)
                #ax.bar(z, thishist, width = z[0]-z[1])
                ax.plot(z, thishist)

            plt.xlim(min(z), max(z))
            plt.savefig('hist_' + str([h1, h2].index(i)) + '.jpeg')
            plt.close()

    if args.plot == 'all1':
        import statistics
        import matplotlib.pyplot as plt
        import matplotlib
        matplotlib.rcParams.update({'font.size': 15})
        histo_files = os.popen(
            'ls histo*.txt | sort -t _ -k 2 -n').read().split()
        fig = plt.figure(figsize=(15, 8))
        fig, axs = plt.subplots(figsize=(15, 8))
        n = 1
        allz = []
        for i in histo_files:
            label = str(n)
            xs = i.split('_')[1]
            x = float(xs)
            k = os.popen(
                f"grep -oP '(?<=KAPPA=)[0-9]*$' ../E_{xs}/plumed_{xs}.dat"
            ).read()
            z, thishist = np.loadtxt(i, unpack=True)
            y = max(thishist)
            axs.plot(z, thishist, label=label + ":" + str(x) + ':' + k)
            axs.annotate(s=label, xy=(x, y), fontsize=8)
            n += 1
            allz = np.concatenate((allz, z), axis=None)
        axs.legend(loc='upper left', ncol=7, fontsize=7, handlelength=0.4)
        plt.xlim(np.amin(allz), np.amax(allz))
        plt.savefig('hist_all1.jpeg')
        plt.close()

    if args.plot == 'sep':

        import matplotlib.pyplot as plt
        import matplotlib

        histo_files = os.popen('ls histo* | sort -t _ -k 2 -n').read().split()

        for h in histo_files:

            fig, ax = plt.subplots(figsize=(15, 8))

            z, thishist = np.loadtxt(h, unpack=True)
            Eg = h.split('_')[2]
            Eg = Eg.strip('.txt')

            ax.plot(z, thishist)
            ax.plot(np.full((2, ), float(Eg)),
                    np.asarray([min(thishist), max(thishist)]))

            plt.title(h.strip('.txt'))
            plt.savefig(h.strip('.txt') + '.jpeg')

Пример #7

0

Показать файл

Файл: ncvplot.py Проект: jerem2401/scripts

def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-f', help='give colvar_files with spaces inbetween, should not be used with -umb', nargs='+', action='store', dest='f')
    parser.add_argument('-umb', help='specify that umb plot mode should be turn on', action='store_true', dest='umb')
    parser.add_argument('--a', help='provide alpha if not in directory_name under certain format', action='store', dest='a', type=float)
    parser.add_argument('-o', help='give path for plot output', action='store', dest='o')
    parser.add_argument('-s', help='number of frames to skip for plotting', action='store', dest='s', default=2, type=int)
    parser.add_argument('-deqx',  help='rmsd fluctuation of first state', action='store', dest='deqx', default=0.05, type=float)
    parser.add_argument('-deqy',  help='rmsd fluctuation of second state', action='store', dest='deqy', default=0.05, type=float)
    parser.add_argument('-rmsdx', help='provide name of rmsdx in colvar file', action='store', dest='rmsdx', type=str)
    parser.add_argument('-rmsdy', help='provide name of rmsdy in colvar file', action='store', dest='rmsdy', type=str)
    args = parser.parse_args()

    cwd = os.getcwd()
    cwds = cwd.split('/')

    if args.a != None:
        a=args.a
    else:
        a = float(re.search(r"(?<=_a).*(?=_deqx)", cwds[-1]).group())
        print("careful alpha guessed from working directory name:"+str(a))

    #function
    def nCV(X,Y):
        Z = ((args.deqx/X)**a)-((args.deqy/Y)**a)
        return Z

    if args.umb == False:

        files=args.f

        ###################################Special values################################################
        Xeq=0.001376
        Yeq=0.098296

        Xax=0.098361
        Yax=0.001384

        eqCVval=nCV(Xeq,Yeq)
        axCVval=nCV(Xax,Yax)
        #################################################################################################

        fig=plt.figure(1, figsize=(10,6*len(files)))
        fig=plt.figure(1)

        for pltn, i in zip(range(1, len(files)*2+1, 2), files):
            df=plumed_pandas.read_as_pandas(i)
            RMSDeq = df[args.rmsdx][::args.s]
            RMSDax = df[args.rmsdy][::args.s]
            phi = df['phi'][::args.s]
            psi = df['psi'][::args.s]

            xy = np.arange(0.001, max(float(RMSDeq.max()),float(RMSDax.max())), 0.005)
            X, Y = np.meshgrid(xy, xy)
            Z=nCV(X,Y)

            plt.subplot(len(files), 2, pltn)
            levels=[-2,-1,-0.5,-0.25,-0.1,-0.05,0,0.05,0.1,0.25,0.5,1,2]
            levels+=[axCVval,eqCVval]
            levels=sorted(levels)
            CS = plt.contour(X, Y, Z, levels=levels)
            plt.clabel(CS, inline=1, fontsize=10)
            plt.xlabel('RMSDeq')
            plt.ylabel('RMSDax')
            plt.scatter(RMSDeq, RMSDax, c=list(range(1, len(RMSDax)+1, 1)), cmap=plt.cm.get_cmap('rainbow'), s=10)
            plt.title('RMSDs of trajectory relative to state ax and eq \n & projection of the nCV')
            plt.gca().set_aspect('equal', adjustable='box')

            plt.subplot(len(files), 2, pltn+1)
            plt.title('phi & psi dihedral angles')
            plt.xlabel('phi')
            plt.ylabel('psi')
            plt.scatter(phi, psi, c=list(range(1, len(RMSDax)+1, 1)), cmap=plt.cm.get_cmap('rainbow'), s=10)
            plt.gca().set_aspect('equal', adjustable='box')

        plt.tight_layout(pad=3)
        fig.suptitle(cwds[-1])
        if args.o == None:
            plt.savefig(cwds[-1]+".jpeg")
        else :
            plt.savefig(args.o+'/plot.jpeg')
        plt.close()

    else:
        if args.umb == True:

            print("umb plot mode turned on")
            print(
            '''
            You've specified -umb: umb plot mode turned on

            Careful: this script generate n pdf files, with n=nbr of umb windows/15 (+1 if left over)
            Then if you have a number of windows >> 75, you should think of rearranging this script"
            '''
            )

            #list of colvar files
            colvar_list=os.popen('ls E*/colvar* | sort -t _ -k 2 -n').read().split()

            #list of ksi values
            CVval=[re.search('(?<=E_).*(?=/)',i).group() for i in colvar_list]

            #following paragraph not needed anymore since now os.popen + sort already list by increasing value of ksi
            #get dictionary of CVval values with their indexes (last line inverse keys and values)
            #indexing = enumerate(CVval2)
            #dico=dict(list(indexing))
            #dico={v: k for k, v in dico.items()}
            #reordering values in list according to CVval2 order
            #colvar_listS = [None] * len(colvar_list)
            #for i in colvar_list:
            #colvar_listS[dico[float(re.search(r"(?<=E_).*(?=/)", i).group())]] = i
            #colvar_list=colvar_listS

            #cut colvar_list in several lists of 20 elem to lighten the plot generation and avoid crash (for std desktop ressources)
            clcut=[colvar_list[i:i + 20] for i in range(0, len(colvar_list), 20)]

            #function
            dfxmax=plumed_pandas.read_as_pandas(colvar_list[0])
            dfymax=plumed_pandas.read_as_pandas(colvar_list[-1])

            #to genereate the borns of the grid for Z function (and for max axis)
            #, I tool the 1st element of the 2 rmsds and add 10% to it instea
            #of loading all the dataf and taking the max +10%
            RMSDxmax = dfxmax[args.rmsdx][0]+(0.10*dfxmax[args.rmsdx][0])
            RMSDymax = dfymax[args.rmsdy][0]+(0.10*dfymax[args.rmsdx][0])
            maxRMSD  = max(RMSDymax,RMSDxmax)

            phixm  = dfxmax['phi'][0]+(1.5*dfxmax['phi'][0])
            psixm  = dfxmax['psi'][0]+(1.5*dfxmax['psi'][0])

            phiym  = dfymax['phi'][0]+(1.5*dfymax['phi'][0])
            psiym  = dfymax['psi'][0]+(1.5*dfymax['psi'][0])

            angmin   = min(phixm,psixm,phiym,psiym)
            angmax   = max(phixm,psixm,phiym,psiym)

            xy = np.arange(0.001, maxRMSD, 0.005)
            X, Y = np.meshgrid(xy, xy)
            Z=nCV(X,Y)

            #data objects
            for nbl, k in enumerate(clcut, 0):
                plt.figure(1)
                plt.figure(figsize=(12,5*len(k)))
                for pltn, i in zip(range(1, len(k)*2+1, 2), k):
                    df=plumed_pandas.read_as_pandas(i)
                    RMSDeq = df[args.rmsdx][::args.s]
                    RMSDax = df[args.rmsdy][::args.s]
                    phi = df['phi'][::args.s]
                    psi = df['psi'][::args.s]

                    #get param of restraints from the plumed input files, todo: build a dic {colvar_file : CVval} instead of the wierd trick in the following line (should change also in l.87-93)
                    with open(re.search(r".*E_([0-9]|-|\.)*", i).group()+"/plumed_"+CVval[k.index(i)+nbl*20]+".dat", "r") as plume:
                        gonextl1=False
                        gonextl2=False
                        for line in plume:
                            if gonextl1:
                                param = re.search(r"AT.*KAPPA[^ ]*", line)
                                gonextl1=False
                            if gonextl2:
                                param2 = re.search(r"AT.*KAPPA[^ ]*", line)
                                gonextl2=False
                            if "# nCV restraint" in line:
                                gonextl1=True
                            if "# guideCV" in line:
                                gonextl2=True
                    plume.close()

                    plt.subplot(len(k), 2, pltn)
                    levels=[-1.0, -0.5, -0.2, -0.1, 0, 0.1, 0.2, 0.5, 1.0]
                    if float(CVval[k.index(i)+nbl*20]) in levels:
                        levels.remove(float(CVval[k.index(i)+nbl*20]))
                        CS = plt.contour(X, Y, Z, levels=levels)
                        CS2 = plt.contour(CS, levels=[float(CVval[k.index(i)+nbl*20])], colors='red', linestyles='dashed')
                    else:
                        CS = plt.contour(X, Y, Z, levels=levels)
                        CS2 = plt.contour(CS, levels=[float(CVval[k.index(i)+nbl*20])], colors='red', linestyles='dashed')
                    plt.clabel(CS, inline=1, fontsize=10)
                    plt.clabel(CS2, inline=1)
                    plt.xlabel('RMSDeq')
                    plt.ylabel('RMSDax')
                    plt.scatter(RMSDeq, RMSDax, c=list(range(1, len(RMSDax)+1, 1)), cmap=plt.cm.get_cmap('rainbow'), s=10)
                    plt.xlim(0,maxRMSD)
                    plt.ylim(0,maxRMSD)
                    plt.title(param.group()+'\n'+param2.group())

                    plt.subplot(len(k), 2, pltn+1)
                    plt.xlim(angmin, angmax)
                    plt.ylim(angmin, angmax)
                    plt.title('phi & psi dihedral angles')
                    plt.xlabel('phi')
                    plt.ylabel('psi')
                    plt.scatter(phi, psi, c=list(range(1, len(RMSDax)+1, 1)), cmap=plt.cm.get_cmap('rainbow'), s=10)


                plt.tight_layout()
                if args.o == None:
                    plt.savefig('plot'+str(nbl)+'.jpeg', quality=40)
                else:
                    plt.savefig(args.o+'/plot'+str(nbl)+'.jpeg', quality=30)
                plt.close()

        else:
            print("you should specify the colvar_files with -f or turn umb plot mode on with -umb")

Python read_as_pandas примеры использования