def create_archive(export_file, service=None, resume=None): """update or create index.html and download archive of all links""" print('[*] [{}] Starting archive from {} export file.'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), export_file, )) with open(export_file, 'r', encoding='utf-8') as f: links, service = parse_export(f, service=service) if resume: try: links = [ link for link in links if float(link['timestamp']) >= float(resume) ] except TypeError: print( 'Resume value and all timestamp values must be valid numbers.') if not links or not service: print('[X] No links found in {}, is it a {} export file?'.format( export_file, service)) raise SystemExit(1) if not os.path.exists(service): os.makedirs(service) if not os.path.exists(os.path.join(service, 'archive')): os.makedirs(os.path.join(service, 'archive')) dump_index(links, service) check_dependencies() try: for link in links: dump_website(link, service) except (KeyboardInterrupt, SystemExit, Exception) as e: print('{red}[X] Archive creation stopped.{reset}'.format(**ANSI)) print(' Continue where you left off by running:') print(' ./archive.py {} {} {}'.format( export_file, service, link['timestamp'], )) if not isinstance(e, KeyboardInterrupt): raise e raise SystemExit(1) print('{}[√] [{}] Archive update complete.{}'.format( ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ANSI['reset']))
def fire_multiprocess(traj_file, top_file, function, num_confs, n_cpus, *args): """ Distributes a function over a given number of processes Parameters: traj_file (str): The name of the trajectory file to analyze. top_file (str): The name of the topology file associated with the trajectory. function (function): The analysis function to be parallelized. num_confs (int): The number of configurations in the trajectory. n_cpus (int): The number of processes to launch. *args: The arguments for the provided function. Returns: results (list): The results from each individual processor's run. Note: The manner in which to concatenate the results is function-specific so should be handled in the calling module. """ from config import check_dependencies check_dependencies(["pathos"]) confs_per_processor = int(np.floor(num_confs / n_cpus)) reader_pool = [] processor_pool = pp.Pool(n_cpus) #split_starts and split_ends are around for backwards compatability with the old parallelize algorithm reader_pool, tmpfiles = split_trajectory(traj_file, top_file, num_confs, n_cpus, confs_per_processor) split_starts = [0 for r in reader_pool] split_ends = [confs_per_processor for r in reader_pool] rem = num_confs % n_cpus for i in range(rem): split_ends[i] += 1 #Staple everything together, send it out to the workers, and collect the results as a list #Functions passed to this parallelizer must have the argument order defined by the lst variable (reader, <unique args>, number of configurations total, starting conf id, number of confs for this processor) #This args unpacking method was added in Python 3.6, so if you have an older version of Python that's why this isn't working results = [] lst = [(r, *args, num_confs, s, e) for r, s, e in zip(reader_pool, split_starts, split_ends)] #starmap allows you to have arguments that themselves are iterables #async because we don't actually care what order stuff finishes in. results = processor_pool.starmap_async(function, lst).get() processor_pool.close() for f in tmpfiles: f.close() remove(f.name) return (results)
nargs=1, help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument('-p', metavar='num_cpus', nargs=1, type=int, dest='parallel', help="(optional) How many cores to use") args = parser.parse_args() from config import check_dependencies check_dependencies(["python", "numpy", "matplotlib"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" num_confs = cal_confs(traj_file) import UTILS.base #this needs to be imported after the model type is set
help="The inputfile used to run the simulation") parser.add_argument('trajectory', type=str, nargs=1, help='the trajectory file you wish to analyze') parser.add_argument( '-v', type=str, nargs=1, dest='outfile', help='if you want instead average per-particle energy as a viewer JSON' ) args = parser.parse_args() from config import check_dependencies check_dependencies(["python", "numpy"]) traj_file = args.trajectory[0] inputfile = args.inputfile[0] try: outfile = args.outfile[0] visualize = True except: visualize = False top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set
msg += ['* Architecture: ' + plat] plat = plat + '-' + sys.version[0:3] gpawso = 'build/lib.%s/' % plat + '_gpaw.so' gpawbin = 'build/bin.%s/' % plat + 'gpaw-python' if 'clean' in sys.argv: if os.path.isfile(gpawso): print 'removing ', gpawso os.remove(gpawso) if os.path.isfile(gpawbin): print 'removing ', gpawbin os.remove(gpawbin) sources = glob('c/*.c') + ['c/bmgs/bmgs.c'] sources = sources + glob('c/xc/*.c') check_dependencies(sources) extension = Extension('_gpaw', sources, libraries=libraries, library_dirs=library_dirs, include_dirs=include_dirs, define_macros=define_macros, undef_macros=undef_macros, extra_link_args=extra_link_args, extra_compile_args=extra_compile_args, runtime_library_dirs=runtime_library_dirs, extra_objects=extra_objects) extensions = [extension]
def perform_DBSCAN(points, num_confs, traj_file, inputfile, metric_name): """ Runs the DBSCAN algorithm using the provided analysis as positions and splits the trajectory into clusters. Parameters: points (numpy.array): The points fed to the clstering algorithm. num_confs (int): The number of configurations in the trajectory. traj_file (str): The analyzed trajectory file. inputfile (str): The input file used to run the analyzed simulation. metric_name (str): The type of data the points represent (usually either "euclidean" or "precomputed"). Returns: labels (numpy.array): The clusterID of each configuration in the trajectory. """ #run system checks from config import check_dependencies check_dependencies(["python", "sklearn", "matplotlib"]) print("INFO: Running DBSCAN...", file=stderr) EPS=12 MIN_SAMPLES=8 #dump the input as a json file so you can iterate on EPS and MIN_SAMPLES dump_file = "cluster_data.json" print("INFO: Serializing input data to {}".format(dump_file), file=stderr) print("INFO: Run just clustering.py with the serialized data to adjust clustering parameters", file=stderr) out = [points.tolist(), num_confs, traj_file, inputfile, metric_name] dump(out, codecs.open(dump_file, 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4) #prepping to show the plot later #this only shows the first three dimensions because we assume that this is either PCA data or only a few dimensions anyway #components = perform_pca(points, 3) dimensions = [] x = [] dimensions.append(x) if points.shape[1] > 1: y = [] dimensions.append(y) if points.shape[1] > 2: z = [] dimensions.append(z) for i in points: for j, dim in enumerate(dimensions): dim.append(i[j]) #DBSCAN parameters: #eps: the pairwise distance that configurations below are considered neighbors #min_samples: The smallest number of neighboring configurations required to start a cluster #metric: If the matrix fed in are points in n-dimensional space, then the metric needs to be "euclidean". #If the matrix is already a square distance matrix, the metrix needs to be "precomputed". #the eps and min_samples need to be determined for each structure #If you're making your own multidimensional data, you probably want to normalize your data first. print("INFO: Adjust clustering parameters by modifying the 'EPS' and 'MIN_SAMPLES' values in the script.", file=stderr) print("INFO: Current values: eps={}, min_samples={}".format(EPS, MIN_SAMPLES)) db = DBSCAN(eps=EPS, min_samples=MIN_SAMPLES, metric=metric_name).fit(points) labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) print ("Number of clusters:", n_clusters_) print("INFO: Making cluster plot...") if len(dimensions) == 3: fig = plt.figure() ax = fig.add_subplot(111, projection='3d') else: fig = plt.figure() ax = fig.add_subplot(1, 1, 1) plt.xlabel("OP0") plt.ylabel("OP1") if len(dimensions) == 3: ax.set_zlabel("OP2") #to show the plot immediatley and interactivley '''a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', 7)) b = fig.colorbar(a, ax=ax) plt.show()''' #to make a video showing a rotating plot plot_file = "animated.mp4" def init(): a = ax.scatter(x, y, z, s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1)) fig.colorbar(a, ax=ax) return [fig] def animate(i): ax.view_init(elev=10., azim=i) return [fig] anim = animation.FuncAnimation(fig, animate, init_func=init, frames=range(360), interval=20, blit=True) anim.save(plot_file, fps=30, extra_args=['-vcodec', 'libx264']) else: plot_file = "plot.png" if len(dimensions) == 1: dimensions.append(np.arange(len(dimensions[0]))) a = ax.scatter(dimensions[1], dimensions[0], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1)) else: a = ax.scatter(dimensions[0], dimensions[1], s=2, alpha=0.4, c=labels, cmap=plt.get_cmap('tab10', n_clusters_+1)) b = fig.colorbar(a, ax=ax) plt.savefig(plot_file) print("INFO: Saved cluster plot to {}".format(plot_file), file=stderr) if metric_name == "precomputed": get_centroid(points, metric_name, num_confs, labels, traj_file, inputfile) split_trajectory(traj_file, inputfile, labels, n_clusters_) return labels
msg += ['* Architecture: ' + plat] plat = plat + '-' + sys.version[0:3] gpawso = 'build/lib.%s/' % plat + '_gpaw.so' gpawbin = 'build/bin.%s/' % plat + 'gpaw-python' if 'clean' in sys.argv: if os.path.isfile(gpawso): print('removing ', gpawso) os.remove(gpawso) if os.path.isfile(gpawbin): print('removing ', gpawbin) os.remove(gpawbin) sources = glob('c/*.c') + ['c/bmgs/bmgs.c'] sources = sources + glob('c/xc/*.c') check_dependencies(sources) extension = Extension('_gpaw', sources, libraries=libraries, library_dirs=library_dirs, include_dirs=include_dirs, define_macros=define_macros, undef_macros=undef_macros, extra_link_args=extra_link_args, extra_compile_args=extra_compile_args, runtime_library_dirs=runtime_library_dirs, extra_objects=extra_objects) extensions = [extension]