def read_corpus_word2vec(): info = utils.read_info("../info.tsv") ''' This tokenizer divides a text into a list of sentences, by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences. It must be trained on a large collection of plaintext in the target language before it can be used. The NLTK data package includes a pre-trained Punkt tokenizer for English. ''' sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') sentences = [] NUM_DOCS = len(info) # get number of docs for indDoc in range(0, NUM_DOCS): text = open(info[indDoc][0]).read() raw_sentences = sent_detector.tokenize(text.strip()) for raw_sentence in raw_sentences: if len(raw_sentence) > 0: sentences.append(sentence_process(raw_sentence)) sys.stdout.write("\rReading collection: %d%%" %(indDoc*100/NUM_DOCS)) sys.stdout.flush() return sentences
def set_input(folders_matrix): Ts = read_stru.get_T() input_dict = read_stru.get_input_dict() Ecut = float(input_dict["ecutwfc"]) info = utils.read_info() input = { "file_list": [ f"{utils.folder_opt_matrix}/" + folder_matrix for folder_matrix in folders_matrix ], "info": { "Nt_all": Ts, "Nu": {T: info["Nu"] for T in Ts}, "Nb_true": [nbands for weight, nbands in folders_matrix.values()], "weight": [weight for weight, nbands in folders_matrix.values()], "Rcut": read_stru.get_Rcut(), "dr": {T: utils.dr for T in Ts}, "Ecut": {T: Ecut for T in Ts}, "lr": utils.lr }, "C_init_info": { "init_from_file": False }, "V_info": { "init_from_file": True, "same_band": False } } return json.dumps(input, indent=4)
def read_corpus_word2vec(): info = utils.read_info("../info.tsv") ''' This tokenizer divides a text into a list of sentences, by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences. It must be trained on a large collection of plaintext in the target language before it can be used. The NLTK data package includes a pre-trained Punkt tokenizer for English. ''' sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') sentences = [] NUM_DOCS = len(info) # get number of docs for indDoc in range(0, NUM_DOCS): text = open(info[indDoc][0]).read() raw_sentences = sent_detector.tokenize(text.strip()) for raw_sentence in raw_sentences: if len(raw_sentence) > 0: sentences.append(sentence_process(raw_sentence)) sys.stdout.write("\rReading collection: %d%%" % (indDoc * 100 / NUM_DOCS)) sys.stdout.flush() return sentences
def read_basic_info(self): self.original_sr, self.nchannels, self.sampwidth, self.length = utils.read_info( self.path) self.md5 = utils.binaryMD5(self.path) self.duration = (float(self.length) / float(self.original_sr)) / self.timeexp self.filesize = utils.media_size(self.path) self.sr = self.original_sr self.mask = None self.signal = None
def get_dis_opt(dis): opt_mode = "kmeans" dis_opt = dict() info = utils.read_info() for T1,T2 in dis: dis_tmp = read_stru.delete_zero(dis[T1,T2]) if len(dis_tmp)<=info["dimer_num"]: dis_opt[T1,T2] = list(dis_tmp.keys()) else: if opt_mode=="linspace": dis_opt[T1,T2] = np.linspace( min(dis_tmp), max(dis_tmp), info["dimer_num"] ) elif opt_mode=="kmeans": kmeans = KMeans(n_clusters=info["dimer_num"]) label = kmeans.fit_predict( np.array(list(dis_tmp.keys())).reshape(-1,1), sample_weight = [num/i_dis**2 for i_dis,num in dis_tmp.items()]) dis_opt[T1,T2] = list(kmeans.cluster_centers_.reshape(-1)) pprint.pprint(dict(zip(dis_tmp.keys(),label))) if T1==T2: dis_opt[T1,T2].append(0.0) return dis_opt
def cal(input): info = utils.read_info() with open(f"{utils.folder_opt}/input.json", "w") as file: file.write(input) if utils.sub == "qsub": with open(f"{utils.folder_opt}/sub.sh", "w") as file: file.write( textwrap.dedent(f"""\ #!/bin/bash #PBS -q batch #PBS -l nodes=1:ppn=1 #PBS -l walltime=2:00:00 #PBS -o job.log #PBS -e job.err ulimit -s unlimited cd $PBS_O_WORKDIR export OMP_NUM_THREADS=1 EXEC={info["opt_orb"]} python3 -u $EXEC """)) elif utils.sub == "tianhe2": with open(f"{utils.folder_opt}/sub.sh", "w") as file: file.write( textwrap.dedent(f"""\ #!/bin/bash EXEC={info["opt_orb"]} python3 -u $EXEC >Log.txt """)) os.chdir(utils.folder_opt) if utils.sub == "qsub": os.system("qsub sub.sh") elif utils.sub == "tianhe2": os.system("yhbatch -N 1 sub.sh") # os.system(f'python3 -u {info["opt_orb"]}') os.chdir("../")
def cal_ABACUS(T1, T2, i_dis): folder = pathlib.Path(utils.folder_name(T1, T2, i_dis)).resolve() folder.mkdir(parents=True, exist_ok=False) with open(folder / "INPUT", "w") as file: info = utils.read_info() input_dict = read_stru.get_input_dict() input_dict["ntype"] = 1 if T1 == T2 else 2 input_dict["exx_hybrid_type"] = 'opt_orb' input_dict["nbands"] = (read_stru.get_nw()[T1] if abs(i_dis) < 1E-10 else read_stru.get_nw()[T1] + read_stru.get_nw()[T2]) input_dict["nspin"] = 1 input_dict["gamma_only"] = 1 input_dict["pseudo_dir"] = os.path.abspath( input_dict.get("pseudo_dir", r"./")) input_dict["exx_opt_orb_lmax"] = len(info["Nu"]) - 1 read_stru.print_input(file, input_dict) with open(folder / "STRU", "w") as file: Ts = (T1, ) if T1 == T2 else (T1, T2) file.write("ATOMIC_SPECIES\n") pseudo_path = read_stru.get_pseudo_path() for T in Ts: file.write(f"{T} 1 {pseudo_path[T]}\n") file.write("\nNUMERICAL_ORBITAL\n") lcao_path = read_stru.get_lcao_path() for T in Ts: file.write(f"{lcao_path[T]}\n") file.write( textwrap.dedent(f""" LATTICE_CONSTANT 1\n LATTICE_VECTORS 30 0 0 0 30 0 0 0 30\n ATOMIC_POSITIONS Cartesian """)) if T1 == T2: if abs(i_dis) < 1E-10: file.write( textwrap.dedent(f""" {T1} 0 1 0 0 0 0 0 0 """)) else: file.write( textwrap.dedent(f""" {T1} 0 2 0 0 0 0 0 0 {i_dis} 0 0 0 0 0 """)) else: file.write( textwrap.dedent(f""" {T1} 0 1 0 0 0 0 0 0\n {T2} 0 1 {i_dis} 0 0 0 0 0 """)) with open(folder / "KPT", "w") as file: file.write( textwrap.dedent(f"""\ K_POINTS 0 Gamma 1 1 1 0 0 0 """)) info = utils.read_info() if utils.sub == "qsub": with open(folder / "sub.sh", "w") as file: file.write( textwrap.dedent(f"""\ #!/bin/bash #PBS -q gold5120 #PBS -l nodes=1:ppn=1 #PBS -l walltime=1:00:00 #PBS -o job.log #PBS -e job.err ulimit -s unlimited cd $PBS_O_WORKDIR EXEC={info["ABACUS"]} mpirun -n 1 -env OMP_NUM_THREADS=1 $EXEC """)) elif utils.sub == "bsub": with open(folder / "sub.sh", "w") as file: file.write( textwrap.dedent(f"""\ #!/bin/sh #BSUB -q renxg #BSUB -o job.log -e job.err #BSUB -n 1 EXEC={info["ABACUS"]} mpirun -n 1 -env OMP_NUM_THREADS=1 $EXEC """)) os.chdir(folder) if utils.sub == "qsub": os.system("qsub sub.sh") elif utils.sub == "bsub": os.system("bsub < sub.sh") elif utils.sub == "tianh2": os.system(f'yhrun -n 1 -c 1 {info["ABACUS"]} >Log.txt') os.chdir("../")
def cal(): pathlib.Path(utils.folder_exx).mkdir(parents=True, exist_ok=False) os.system(f"cp INPUT {utils.folder_exx}/") os.system(f"cp KPT {utils.folder_exx}/") with open(f"{utils.folder_exx}/INPUT", "w") as file: input_dict = read_stru.get_input_dict() input_dict["pseudo_dir"] = os.path.abspath( input_dict.get("pseudo_dir", r"./")) read_stru.print_input(file, input_dict, 1) with open("STRU", "r") as file: strus = re.compile("LATTICE_CONSTANT").split(file.read()) with open(f"{utils.folder_exx}/STRU", "w") as file: Ts = read_stru.get_T() file.write("ATOMIC_SPECIES\n") pseudo_path = read_stru.get_pseudo_path() for T in Ts: file.write(f"{T} 12 {pseudo_path[T]}\n") file.write("\nNUMERICAL_ORBITAL\n") lcao_path = read_stru.get_lcao_path() for T in Ts: file.write(f"{lcao_path[T]}\n") file.write("\nABFS_ORBITAL\n") for T in read_stru.get_T(): file.write(f"../{utils.folder_opt}/orb_{T}.dat\n") file.write("\nLATTICE_CONSTANT") file.write(strus[1]) info = utils.read_info() if utils.sub == "qsub": with open(f"{utils.folder_exx}/sub.sh", "w") as file: file.write( textwrap.dedent(f"""\ #!/bin/bash #PBS -q gold5120 #PBS -l nodes=2:ppn=28 #PBS -l walltime=99:99:99 #PBS -o job.log #PBS -e job.err ulimit -s unlimited cd $PBS_O_WORKDIR EXEC={info["ABACUS"]} mpirun -n 2 -env OMP_NUM_THREADS=28 $EXEC """)) elif utils.sub == "bsub": with open(f"{utils.folder_exx}/sub.sh", "w") as file: file.write( textwrap.dedent(f"""\ #!/bin/sh #BSUB -q renxg #BSUB -o job.log -e job.err #BSUB -n 6 mpirun -n 2 -env OMP_NUM_THREADS=28 {info['ABACUS']} """)) os.chdir(utils.folder_exx) if utils.sub == "qsub": os.system("qsub sub.sh") elif utils.sub == "bsub": os.system(f"bsub < sub.sh") elif utils.sub == "tianhe2": os.system( f'yhrun -N 1 -n 1 -c 24 -t 1440 {info["ABACUS"]} >Log.txt 2>&1 &') else: raise ValueError("utils.sub") os.chdir("../")