Exemplo n.º 1
0
def create_archive(export_file, service=None, resume=None):
    """update or create index.html and download archive of all links"""

    print('[*] [{}] Starting archive from {} export file.'.format(
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        export_file,
    ))

    with open(export_file, 'r', encoding='utf-8') as f:
        links, service = parse_export(f, service=service)

    if resume:
        try:
            links = [
                link for link in links
                if float(link['timestamp']) >= float(resume)
            ]
        except TypeError:
            print(
                'Resume value and all timestamp values must be valid numbers.')

    if not links or not service:
        print('[X] No links found in {}, is it a {} export file?'.format(
            export_file, service))
        raise SystemExit(1)

    if not os.path.exists(service):
        os.makedirs(service)

    if not os.path.exists(os.path.join(service, 'archive')):
        os.makedirs(os.path.join(service, 'archive'))

    dump_index(links, service)
    check_dependencies()
    try:
        for link in links:
            dump_website(link, service)
    except (KeyboardInterrupt, SystemExit, Exception) as e:
        print('{red}[X] Archive creation stopped.{reset}'.format(**ANSI))
        print('    Continue where you left off by running:')
        print('       ./archive.py {} {} {}'.format(
            export_file,
            service,
            link['timestamp'],
        ))
        if not isinstance(e, KeyboardInterrupt):
            raise e
        raise SystemExit(1)

    print('{}[√] [{}] Archive update complete.{}'.format(
        ANSI['green'],
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ANSI['reset']))
Exemplo n.º 2
0
def fire_multiprocess(traj_file, top_file, function, num_confs, n_cpus, *args):
    """
    Distributes a function over a given number of processes

    Parameters:
        traj_file (str): The name of the trajectory file to analyze.
        top_file (str): The name of the topology file associated with the trajectory.
        function (function): The analysis function to be parallelized.
        num_confs (int): The number of configurations in the trajectory.
        n_cpus (int): The number of processes to launch.
        *args: The arguments for the provided function.

    Returns:
        results (list): The results from each individual processor's run.

    Note: The manner in which to concatenate the results is function-specific so should be handled in the calling module.
    """

    from config import check_dependencies
    check_dependencies(["pathos"])
    confs_per_processor = int(np.floor(num_confs / n_cpus))

    reader_pool = []
    processor_pool = pp.Pool(n_cpus)

    #split_starts and split_ends are around for backwards compatability with the old parallelize algorithm
    reader_pool, tmpfiles = split_trajectory(traj_file, top_file, num_confs,
                                             n_cpus, confs_per_processor)
    split_starts = [0 for r in reader_pool]
    split_ends = [confs_per_processor for r in reader_pool]
    rem = num_confs % n_cpus
    for i in range(rem):
        split_ends[i] += 1

    #Staple everything together, send it out to the workers, and collect the results as a list
    #Functions passed to this parallelizer must have the argument order defined by the lst variable (reader, <unique args>, number of configurations total, starting conf id, number of confs for this processor)
    #This args unpacking method was added in Python 3.6, so if you have an older version of Python that's why this isn't working
    results = []
    lst = [(r, *args, num_confs, s, e)
           for r, s, e in zip(reader_pool, split_starts, split_ends)]

    #starmap allows you to have arguments that themselves are iterables
    #async because we don't actually care what order stuff finishes in.
    results = processor_pool.starmap_async(function, lst).get()
    processor_pool.close()
    for f in tmpfiles:
        f.close()
        remove(f.name)

    return (results)
Exemplo n.º 3
0
                        nargs=1,
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument('-p',
                        metavar='num_cpus',
                        nargs=1,
                        type=int,
                        dest='parallel',
                        help="(optional) How many cores to use")
    args = parser.parse_args()

    from config import check_dependencies
    check_dependencies(["python", "numpy", "matplotlib"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    parallel = args.parallel
    if parallel:
        n_cpus = args.parallel[0]

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    num_confs = cal_confs(traj_file)
    import UTILS.base  #this needs to be imported after the model type is set
Exemplo n.º 4
0
                        help="The inputfile used to run the simulation")
    parser.add_argument('trajectory',
                        type=str,
                        nargs=1,
                        help='the trajectory file you wish to analyze')
    parser.add_argument(
        '-v',
        type=str,
        nargs=1,
        dest='outfile',
        help='if you want instead average per-particle energy as a viewer JSON'
    )
    args = parser.parse_args()

    from config import check_dependencies
    check_dependencies(["python", "numpy"])

    traj_file = args.trajectory[0]
    inputfile = args.inputfile[0]
    try:
        outfile = args.outfile[0]
        visualize = True
    except:
        visualize = False

    top_file = get_input_parameter(inputfile, "topology")
    if "RNA" in get_input_parameter(inputfile, "interaction_type"):
        environ["OXRNA"] = "1"
    else:
        environ["OXRNA"] = "0"
    import UTILS.base  #this needs to be imported after the model type is set
Exemplo n.º 5
0
msg += ['* Architecture: ' + plat]
plat = plat + '-' + sys.version[0:3]
gpawso = 'build/lib.%s/' % plat + '_gpaw.so'
gpawbin = 'build/bin.%s/' % plat + 'gpaw-python'
if 'clean' in sys.argv:
    if os.path.isfile(gpawso):
        print 'removing ', gpawso
        os.remove(gpawso)
    if os.path.isfile(gpawbin):
        print 'removing ', gpawbin
        os.remove(gpawbin)

sources = glob('c/*.c') + ['c/bmgs/bmgs.c']
sources = sources + glob('c/xc/*.c')

check_dependencies(sources)

extension = Extension('_gpaw',
                      sources,
                      libraries=libraries,
                      library_dirs=library_dirs,
                      include_dirs=include_dirs,
                      define_macros=define_macros,
                      undef_macros=undef_macros,
                      extra_link_args=extra_link_args,
                      extra_compile_args=extra_compile_args,
                      runtime_library_dirs=runtime_library_dirs,
                      extra_objects=extra_objects)

extensions = [extension]
Exemplo n.º 6
0
def perform_DBSCAN(points, num_confs, traj_file, inputfile, metric_name):
    """
    Runs the DBSCAN algorithm using the provided analysis as positions and splits the trajectory into clusters.

    Parameters:
        points (numpy.array): The points fed to the clstering algorithm.
        num_confs (int): The number of configurations in the trajectory.
        traj_file (str): The analyzed trajectory file.
        inputfile (str): The input file used to run the analyzed simulation.
        metric_name (str): The type of data the points represent (usually either "euclidean" or "precomputed").
    
    Returns:
        labels (numpy.array): The clusterID of each configuration in the trajectory.
    """

    #run system checks
    from config import check_dependencies
    check_dependencies(["python", "sklearn", "matplotlib"])
    
    print("INFO: Running DBSCAN...", file=stderr)
    EPS=12
    MIN_SAMPLES=8

    #dump the input as a json file so you can iterate on EPS and MIN_SAMPLES
    dump_file = "cluster_data.json"
    print("INFO: Serializing input data to {}".format(dump_file), file=stderr)
    print("INFO: Run just clustering.py with the serialized data to adjust clustering parameters", file=stderr)
    out = [points.tolist(), num_confs, traj_file, inputfile, metric_name]
    dump(out, codecs.open(dump_file, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4)

    #prepping to show the plot later
    #this only shows the first three dimensions because we assume that this is either PCA data or only a few dimensions anyway

    #components = perform_pca(points, 3)
    dimensions = []
    x = []
    dimensions.append(x)

    if points.shape[1] > 1:
        y = []
        dimensions.append(y)

    if points.shape[1] > 2:
        z = []
        dimensions.append(z)
    
    for i in points:
        for j, dim in enumerate(dimensions):
            dim.append(i[j])

    #DBSCAN parameters:
    #eps: the pairwise distance that configurations below are considered neighbors
    #min_samples: The smallest number of neighboring configurations required to start a cluster
    #metric: If the matrix fed in are points in n-dimensional space, then the metric needs to be "euclidean".
    #If the matrix is already a square distance matrix, the metrix needs to be "precomputed".
    #the eps and min_samples need to be determined for each structure
    #If you're making your own multidimensional data, you probably want to normalize your data first.
    print("INFO: Adjust clustering parameters by modifying the 'EPS' and 'MIN_SAMPLES' values in the script.", file=stderr)
    print("INFO: Current values: eps={}, min_samples={}".format(EPS, MIN_SAMPLES))
    db = DBSCAN(eps=EPS, min_samples=MIN_SAMPLES, metric=metric_name).fit(points) 
    labels = db.labels_
    
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    print ("Number of clusters:", n_clusters_)

    
    print("INFO: Making cluster plot...")
    if len(dimensions) == 3:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
    else:
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)

    plt.xlabel("OP0")
    plt.ylabel("OP1")

    if len(dimensions) == 3:
        ax.set_zlabel("OP2")
        #to show the plot immediatley and interactivley
        '''a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', 7))
        b = fig.colorbar(a, ax=ax)
        plt.show()'''
        
        #to make a video showing a rotating plot
        plot_file = "animated.mp4"
        def init():
            a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1))
            fig.colorbar(a, ax=ax)
            return [fig]

        def animate(i):
            ax.view_init(elev=10., azim=i)
            return [fig]

        anim = animation.FuncAnimation(fig, animate, init_func=init, frames=range(360), interval=20, blit=True)
        
        anim.save(plot_file, fps=30, extra_args=['-vcodec', 'libx264'])

    else:
        plot_file = "plot.png"
        if len(dimensions) == 1:
            dimensions.append(np.arange(len(dimensions[0])))
            a = ax.scatter(dimensions[1], dimensions[0], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1))
        else:
            a = ax.scatter(dimensions[0], dimensions[1], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1))
        b = fig.colorbar(a, ax=ax)
        plt.savefig(plot_file)
    print("INFO: Saved cluster plot to {}".format(plot_file), file=stderr)

    if metric_name == "precomputed":
        get_centroid(points, metric_name, num_confs, labels, traj_file, inputfile)

    split_trajectory(traj_file, inputfile, labels, n_clusters_)

    return labels
Exemplo n.º 7
0
msg += ['* Architecture: ' + plat]
plat = plat + '-' + sys.version[0:3]
gpawso = 'build/lib.%s/' % plat + '_gpaw.so'
gpawbin = 'build/bin.%s/' % plat + 'gpaw-python'
if 'clean' in sys.argv:
    if os.path.isfile(gpawso):
        print('removing ', gpawso)
        os.remove(gpawso)
    if os.path.isfile(gpawbin):
        print('removing ', gpawbin)
        os.remove(gpawbin)

sources = glob('c/*.c') + ['c/bmgs/bmgs.c']
sources = sources + glob('c/xc/*.c')

check_dependencies(sources)

extension = Extension('_gpaw',
                      sources,
                      libraries=libraries,
                      library_dirs=library_dirs,
                      include_dirs=include_dirs,
                      define_macros=define_macros,
                      undef_macros=undef_macros,
                      extra_link_args=extra_link_args,
                      extra_compile_args=extra_compile_args,
                      runtime_library_dirs=runtime_library_dirs,
                      extra_objects=extra_objects)

extensions = [extension]