def glue(self, nch=64, fs=100): sortkeys = sorted(self.data.keys()) if len(sortkeys): files = [self.data[sortkey]["filename"] for sortkey in sortkeys] fulldata = get_raw_data(files[0]) lastvalue = fulldata[-64:] for ind, _file in enumerate(files[1:]): data = get_raw_data(_file) bufferdata = np.tile(lastvalue, int(fs * self.buffer[ind])) fulldata = np.concatenate((fulldata, bufferdata, data)) if len(sortkeys) == 1: if fulldata.shape[0] / nch < 360000: print("Adding some constant data at the end") bufferdata = np.tile(lastvalue, 360000 - int(fulldata.shape[0] / nch)) fulldata = np.concatenate((fulldata, bufferdata)) ### CHECK LENGTHS print(self.totallen, " == ", fulldata.shape[0] / nch, "?") # saving file asksave = messagebox.askquestion( "Saving glued data", "Do you want to save glued data into file?", icon='warning') if asksave == 'yes': savefile = filedialog.asksaveasfilename( title="Save datafile as...", defaultextension="", initialdir=dirn(files[0]), initialfile="GLUED" + base(files[0])) write_data(savefile, fulldata)
def process_geojson(self, input_path='./data/geolocation_sample.json', output_path='./data/geolocation_sample.geojson'): # format as geojson # `coordinates` format as [longitude, latitude] geo_mappings = self.process_json(input_path) geojson = { "type": "FeatureCollection", "features": [{ "type": "Feature", "geometry": { "type": "Point", "coordinates": [ float(d['coordinates'].split(',')[1]), float(d['coordinates'].split(',')[0]) ] }, "properties": { "name": d['host'] }, } for d in geo_mappings] } write_data(output_path, geojson)
def main(): starttime = datetime.datetime.now() for i in range(1): # --- Preparation --- pdb_obj = PDB('./FAcD-FA-ASP.pdb', wk_dir='./FAcD_test'+str(i)) # Initiate with the file from protein data bank (self.path) pdb_obj.rm_wat() # Remove water and ion in original crystal file (self.path) pdb_obj.get_protonation() # For most crystal files, add hydrogens (self.path) # --- Operation --- # Mutation Muta_tag = pdb_obj.Add_MutaFlag('r') # Generate a target "Flag" for mutation if pdb_obj.MutaFlags[0][2] == '108': # Keep the key residue continue pdb_obj.PDB2PDBwLeap() # Deploy mutation (self.path) # protonation modification pdb_obj.rm_allH() # Remove all hydrogens after mutation (residues only) (self.path) pdb_obj.get_protonation() # Determine protonation state again (self.path) # use minimization to relax each mutated PDB pdb_obj.PDB2FF() # Generate parameter files for MD simulation pdb_obj.PDBMin(engine='Amber_pmemd_gpu') # Minimization (self.path) # --- Sample with MD --- pdb_obj.rm_wat() # Remove water from the minimization (self.path) pdb_obj.PDB2FF(ifsavepdb=1) # Generate parameter files *savepdb save the exact structure use in MD for future analysis (self.path) pdb_obj.PDBMD(tag=Muta_tag, engine='Amber_pmemd_gpu', equi_cpu=1) # Run MD (self.nc) # sample pdb_obj.nc2mdcrd(point=100) # Sample from trajactory (self.mdcrd) # --- QM cluster --- atom_mask = ':108,298' # Define QM cluster / can also use some presets: ligand; residues within a distance using a Layer object g_route = '# hf/6-31G(d) pop=cm5' # QM keywords pdb_obj.PDB2QMCluster(atom_mask, g_route=g_route, ifchk=1) # Run QM cluster calculation (self.qm_cluster_out, self.qm_cluster_chk) pdb_obj.get_fchk(keep_chk=0) # Save fchk files for analysis (self.qm_cluster_fchk) # --- Analysis --- # targeting C-F bond a1 = int(pdb_obj.stru.ligands[0].CH3) a2 = int(pdb_obj.stru.ligands[0].F) a1qm = pdb_obj.qm_cluster_map[str(a1)] a2qm = pdb_obj.qm_cluster_map[str(a2)] # Field Strength (MM) E_atom_mask = ':1-107,109-297' # Define atoms for field strength calculation Es = pdb_obj.get_field_strength(E_atom_mask, a1=a1 ,a2=a2 ,bond_p1='center') # Run Field Strength analysis # Bond Dipole Moment (QM) Dipoles = PDB.get_bond_dipole(pdb_obj.qm_cluster_fchk, a1qm, a2qm) # Run Bond Dipole Moment analysis # Mutation distance r1 = pdb_obj.stru.ligands[0] r2 = pdb_obj.stru.chains[ord(pdb_obj.MutaFlags[0][1])-65][int(pdb_obj.MutaFlags[0][2])-1] Dist = pdb_obj.stru.get_resi_dist(r1, r2) # write to csv or plot write_data(pdb_obj.MutaFlags, {'E': Es, 'Bond Dipole': Dipoles, 'Distance': Dist}, data_output_path) # Current data: Mutation - MD geometry - QM cluster wavefunction = Field strength at bond - Bond dipole moment endtime = datetime.datetime.now() print(endtime - starttime)
def extract_benchmarkportfolio(db, output): # def qry_get_investment_name(): # return ''' # select InvestmentListID, InvestmentListName from tblInvestmentList # where isActive=1 and parentID is null and # (investmentListName like '%Lonsec (Traditional) SAA Benchmark%') # ''' def qry_get_investment_name(): return ''' select InvestmentListID, InvestmentListName from tblInvestmentList where isActive=1 and parentID is null and (investmentListName like '%Lonsec (Traditional) SAA Benchmark%') ''' def qry_get_investment_list_benchmark_weight_details(listid): return ''' With T(InvestmentListID, InvestmentListName, RiskCategoryNo) as ( select InvestmentListID, InvestmentListName, RiskCategoryNo from tblInvestmentList where investmentListid={listid} union all select il.InvestmentListID, il.InvestmentListName, il.RiskCategoryNo from tblInvestmentList il inner join T on il.parentID = T.investmentListID ) select T.InvestmentListName, 'Risk Profile '+ convert(varchar, T.RiskCategoryNo) as Strategy , ve.AlternativeCode as SecurityCode , 'CASH' as Exchange , ve.BenchmarkName , ilb.[Weight], ilb.DateFrom from T left join tblInvestmentListBenchmark ilb on ilb.investmentListID = T.InvestmentListID and ilb.DateTo = '2079-06-06' left join tblBenchmark ve on ilb.BenchmarkID = ve.BenchmarkID order by T.investmentListID, T.RiskCategoryNo '''.format(listid=listid) def replace_with_parent(row, parent): row[0] = parent return row rows = db.get_data(qry_get_investment_name()) header = ['Portfolio', 'Strategy', 'Security Code', 'Exchange', 'Security Name', 'Weight', 'Effective Date'] report_data = [] for row in rows: listid, listname = row logger.info('Processing {} - {}'.format(listname, listid)) data = db.get_data(qry_get_investment_list_benchmark_weight_details(listid)) data = [replace_with_parent(row, listname) for row in data if row.Strategy] # don't count first row report_data += data helper.write_data(output, report_data, header)
def process_json(self, output_path): geo_mappings = [] for record in self.data['hits']['hits']: try: mapping = {} geolocation = record['_source']['geolocation'] host = record['_source']['host'] mapping['host'] = host mapping['coordinates'] = geolocation geo_mappings.append(mapping) except: print("Oops!", sys.exc_info()[0], "occured.") write_data(output_path, geo_mappings) return geo_mappings
def main(): pdb_obj = PDB(glob('*_ff.pdb')[0]) pdb_obj.Add_MutaFlag('XXX') pdb_obj.prmtop_path = glob('*prmtop')[0] pdb_obj.prepi_path = { 'SAH': '../ligands/ligand_SAH.prepin', 'MET': '../ligands/ligand_MET.prepin' } pdb_obj.mdcrd = glob('./MD/*mdcrd')[0] # --- QM cluster --- atom_mask = ':217,218' pdb_obj.get_stru() sele_lines, pdb_obj.qm_cluster_map = pdb_obj.stru.get_sele_list( atom_mask, fix_end='H', prepi_path=pdb_obj.prepi_path) pdb_obj.qm_cluster_fchk = glob('./QMCluster/*fchk') pdb_obj.qm_cluster_fchk.sort( key=lambda file_name: int(file_name.split('.')[-2].split('_')[-1])) # pdb_obj.get_fchk(keep_chk=0) # --- Analysis --- # targeting C-I bond a1 = int(pdb_obj.stru.ligands[1].C1) a2 = int(pdb_obj.stru.ligands[1].I1) a1qm = pdb_obj.qm_cluster_map[str(a1)] a2qm = pdb_obj.qm_cluster_map[str(a2)] # Field Strength (MM) E_atom_mask = ':1-216' Es = pdb_obj.get_field_strength(E_atom_mask, a1=a1, a2=a2, bond_p1='center') # Bond Dipole Moment (QM) print(a1qm, a2qm) print(pdb_obj.qm_cluster_fchk) Dipoles = PDB.get_bond_dipole(pdb_obj.qm_cluster_fchk, a1qm, a2qm) # write to csv or plot write_data(pdb_obj.MutaFlags, { 'E': Es, 'Bond Dipole': Dipoles }, data_output_path)
tgt_results = [] pbar = ProgressBar() for i in pbar(range(len(orig_data))): en_point = i - batch_size de_point = i + batch_size if en_point < 0: en_point = 0 if de_point > len(simi_src_data) - 1: de_point = len(simi_src_data) - 1 max_bleu, bleu[0], bleu[1] = get_max_bleu( orig_data[i], simi_src_data[en_point:de_point]) tgt_results.append(' '.join( simi_tar_data[(en_point + bleu[0]):(en_point + bleu[1])])) print_result(orig_data, tgt_results) return tgt_results helper.log_w("Reading data. Please wait...") orig_data = helper.read_data(ORIG_FILE) simi_src_data = helper.read_data(SIMI_FILE_SRC_LANG) simi_tar_data = helper.read_data(SIMI_FILE_TAR_LANG) helper.log_w("Compute similarity...") out_tgt_data = similarize(orig_data, simi_src_data, simi_tar_data, 10) helper.log_w("Writing new data...") helper.write_data(DEST_FILE, out_tgt_data) helper.log_w("Done.")
bleu[0], bleu[1] = get_max_bleu(orig_data[i], simi_src_data[en_point:de_point]) count= count + 1 if(bleu[0] > 0.45) print(" ") print("origin en : ",orig_data[i]) print("translate en : ",simi_src_data[en_point + bleu[1]]) print("target vi: ",simi_tar_data[en_point + bleu[1]]) print("cosine point: ",bleu[0]) print(" ") tgt_results_vi.append(simi_tar_data[en_point + bleu[1]]) tgt_results_en.append(orig_data[i]) print("GOAL SETENCES",count) return tgt_results_vi,tgt_results_en helper.log_w("Reading data. Please wait...") orig_data = helper.read_data(ORIG_FILE) simi_src_data = helper.read_data(SIMI_FILE_SRC_LANG) simi_tar_data = helper.read_data(SIMI_FILE_TAR_LANG) helper.log_w("Compute similarity...") out_tgt_data_vi,out_tgt_data_vi_en = similarize(orig_data, simi_src_data, simi_tar_data, 1500) helper.log_w("Writing new data...") helper.write_data(DEST_FILE_VI, out_tgt_data_vi) helper.write_data(DEST_FILE_EN, out_tgt_data_vi_en) helper.log_w("Done.")
import maxSum as ms import helper as hp for i in range(1,4): print 'test '+str(i)+':' matrix,m,n = hp.read_data('../data/test'+str(i)+'.txt') print 'finish reading test data' T = ms.maxSum(matrix,m,n) print 'finish computing T' a,b,score = ms.findMaxScore(T,m,n) print 'finish finding max score' stack = ms.backTrack(T,matrix,a,b) print 'finish backTrack' output = hp.write_data(stack,score,'../data/test'+str(i)+'grp12.txt') print 'finish writing output'
def extract_modelportfolio(db, output): # def qry_get_investment_name(): # return ''' # select InvestmentListID, InvestmentListName from tblInvestmentList # where isActive=1 and parentID is null and # (investmentListName like '%Aon%Model Portfolios' # or investmentListName like '%ASET%Model Portfolio%' # or investmentListName like '%Aylesbury%Portfolio%' # or investmentListName like '%BFP%Phase%' # or investmentListName like '%Camerons%Portfolios%' # or investmentListName like '%FSS%Portfolios%' # or investmentListName like '%BT Panorama%Portfolios%' # or investmentListName like '%DAC objective%Portfolio%' # or investmentListName like '%Lonsec Retirement%Portfolios%' # or investmentListName like '%LFG Model Portfolios%' # or investmentListName like '%AssetChoice Ess%Portfolios%' # ) # ''' def qry_get_investment_name(): return ''' select InvestmentListID, InvestmentListName from tblInvestmentList where isActive=1 and parentID is null and (investmentListName like 'UniSuper%Portfolio%' ) ''' def qry_get_investment_list_investment_weight_details(listid): return ''' With T(InvestmentListID, InvestmentListName, RiskCategoryNo) as ( select InvestmentListID, InvestmentListName, RiskCategoryNo from tblInvestmentList where investmentListid={listid} union all select il.InvestmentListID, il.InvestmentListName, il.RiskCategoryNo from tblInvestmentList il inner join T on il.parentID = T.investmentListID ) select T.InvestmentListName, rc.RiskCategory as Strategy, coalesce(ve.stockCode, isff.ApirCode , ic.investmentCode) as SecurityCode , case when ve.Exchange is not null then ve.Exchange when isff.InstrumentID is not null then 'FND' else 'CASH' end , coalesce(ve.stockName, isff.InvestmentName, 'CASH') as SecurityName , ili.[Weight], ili.DateFrom, ili.InvestmentID from T left join tblRiskCategory rc on T.RiskCategoryNo = rc.RiskCategoryNo left join tblInvestmentListInvestment ili on ili.investmentListID = T.InvestmentListID and ili.DateTo = '2079-06-06' left join vewEquities ve on ili.InvestmentID = ve.StockID left join vewISF_Fund isff on isff.InvestmentID = ili.InvestmentID left join tblInvestmentCode ic on ic.InvestmentID = ili.InvestmentID and ic.IsUsedForGrowthSeries=1 order by T.investmentListID, T.RiskCategoryNo '''.format(listid=listid) def replace_with_parent(row, parent): row[0] = parent return row rows = db.get_data(qry_get_investment_name()) header = ['Portfolio', 'Strategy', 'Security Code', 'Exchange', 'Security Name', 'Weight', 'Effective Date', 'InvestmentID'] report_data = [] for row in rows: listid, listname = row logger.info('Processing {} - {}'.format(listname, listid)) data = db.get_data(qry_get_investment_list_investment_weight_details(listid)) logger.info(data) data = [replace_with_parent(row, listname) for row in data[1:]] # don't count first row report_data += data helper.write_data(output, report_data, header)