def expand_branches(node, branchrules, dirnamerules, urlrules, fnamerules, debug=False, sort=False, limit=0): # to deal with the case of images, i think what we want to do is treat branchrules # as a dict of { content check : branchrule }, so like # soup.text.contains('external site') : branchrules # or something like that # remember some way to preserve the file extension # ...urlrules might also have to be a dict. gross cwd = node.data['cwd'] fname = node.data['fname'] with open(cwd + '/' + fname) as f: content = f.read() logger.debug(' > expanding {} branches...'.format(fname)) soup = bsp(content, 'html5lib') # this applies the branchrules lambda to the parsed soup branch_url_list = branchrules(soup) # TODO BS4 Tag has no compare operator for < and > #if sort: # branch_url_list = sorted(branch_url_list) # this applies the dirnamerules lambda to cwd and fname cwd = dirnamerules(cwd, fname) pth('./{}'.format(cwd)).mkdir(parents=True, exist_ok=True) # now for the population of branch nodes branches = [] urls = [] for i, branch_url_suffix in enumerate(branch_url_list): if limit > 0 and i >= limit: break # TODO we need this on the last pass to just make the correct format # final image URL - template is listed in the last call to branchnodes branch_url = urlrules(branch_url_suffix) if branch_url in urls: continue urls.append(branch_url) # crazy this next line actually works branch_fname = fnamerules(branch_url) branches.append( Node(trunk=node, url=branch_url, fname=branch_fname, cwd=cwd)) if debug: logger.debug('cwd {}; fname {}; url {}'.format( cwd, branch_fname, branch_url)) return branches
def read(cls, data_name: str): """This method reads the dataset specified. Args: data_name (str): Specifies the path of data to be read. Returns: pd.DataFrame: The specified dataset """ file_path = pth(__file__).parent / pth("%s.csv" % data_name) if not file_path.exists(): raise FileNotFoundError( 'The specified dataset does not exist. Run "list_all()" to list the available datasets.' ) else: X = pd.read_csv(file_path, index_col=0, header=0, na_values="NaN") X = X.fillna(0) return X
def download_pages(nodelist): logger.debug(' > downloading pages...') # WARNING this is a big assumption, but it's our code so f**k it # EVERYTHING IN NODELIST MUST HAVE THE SAME CWD cwd = nodelist[0].data['cwd'] pth('./{}'.format(cwd)).mkdir(parents=True, exist_ok=True) htmlpages = [] for _, _, fnames in walk(cwd): htmlpages.extend(fnames) for node in nodelist: fname = node.data['fname'] if fname not in htmlpages: page = requests.get(node.data['url']) with open('./{}/{}'.format(cwd, fname), 'w') as f: logger.debug('writing {}/{}...'.format(cwd, fname)) f.write(page.text) else: logger.debug('file {}/{} already exists'.format(cwd, fname))
from lib.functions import * from lib.NmfClass import * from matplotlib import pyplot as plt import nimfa as nf import os from pathlib import Path as pth os.chdir(pth(__file__).parent) x_ori = pd.read_csv('data/input_CCLE_drug_IC50_zero-one.csv', index_col=0, header=0) x = clean_df(x_ori).as_matrix() k_list = [ 30, 25, 10, 3, 40, 2, 5, 6, 40, ] data_name = "CCLE_drug" # x = nf.examples.medulloblastoma.read(normalize=True) # data_name = "medulloblastoma" # k_list = list(range(2, 10)) k_list.sort()
def list_all(cls): """Prints out all the datasets present as CSV in the datasets folder.""" for x in pth(__file__).parent.glob("*.csv"): print(x.stem)
def if_nexists_make_file(save_path, init_text='None'): if not pth(save_path).is_file(): f = open(save_path, 'w+') f.write(init_text) f.close()
def if_nexists_make_dir(save_path): if not pth(save_path).is_dir(): mkdir(save_path)
import argparse as ap import pandas as pd from bignmf.models.jnmf.integrative import IntegrativeJnmf import matplotlib as mpl mpl.use('Agg') from matplotlib import pyplot as plt import os import seaborn as sns from lib.functions import clean_df import numpy as np from pathlib import Path as pth main_dir = pth(os.getcwd()).resolve() script_dir = pth(__file__).parent.absolute() os.chdir(script_dir) parser = ap.ArgumentParser() parser.add_argument("data_name", type=str, help="Which dataset to use", choices=["Avana", "GeCKO", "RNAi_Ach", "RNAi_merged", "RNAi_Nov_DEM","filtered_avana", "filtered_nov_dem"]) parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity") parser.add_argument("k", type=int, help="rank uptil which the copehenetic correlation code must be plotted") parser.add_argument("iter",type=int, help="the maximum number of iterations that the nmf will run for" ) parser.add_argument("trials", type=int, help="number of trails against which the consensus data will be plotted") args = parser.parse_args() print(args.data_name) a = pd.read_csv("data/%s.csv" % args.data_name, index_col=0) print("Original shape: %s"%str(a.shape)) a = clean_df(a, axis=1) a = (a - (np.min(a.values))) / np.std(a.values) data = {args.data_name:a} print("Cleaned shape: %s"%str(data[args.data_name].shape))
def __get_path_for_text_files(file_name): return pth.joinpath(pth(c.data_files_dir), file_name)
from urllib.request import urlopen as url from pathlib import Path as pth import re from scipy.special import spherical_jn as jn, spherical_yn as yn import numpy as np import matplotlib.pyplot as plt variant = 9 C=300000000 PI=3.1415926535 file = pth('./taskfile.txt') if not file.exists(): txt = url('https://jenyay.net/uploads/Student/Modelling/task_02.txt').read() f0 = file.open('wb') f0.write(txt) f0.close() if file.exists(): f1 = file.open() lines = [x for x in f1] p = re.compile(r'[0-9\.\-e]+') m = p.findall(lines[variant-1]) print(m[1:]) f1.close() D = float(m[1]) fmin = float(m[2]) fmax = float(m[3]) f = np.linspace(fmin, fmax, 400) r = D/2
from pathlib import Path as pth import os import sys if sys.argv[1] in globals(): print("Give an argument") else: print("Creating script file") loc = sys.argv[1].split('.')[0] a = pth(os.getcwd()) a = a / pth("run-%s.sh" % loc) a.touch() x = open("run-%s.sh" % loc, "w") x.write("echo $SHELL\n") x.write("#!/bin/sh\n") x.write("echo $SHELL\n") x.write("#$ -S /bin/sh # set shell in UGE\n") x.write("export LANG=en_US.UTF-8\n") x.write("module load python/3.6\n") x.write("python3 %s.py\n" % loc) x.close() print("Created script file run-%s.py" % loc)
import numpy as np import matplotlib.pyplot as plt from pathlib import Path as pth p = pth('results') res = p / 'task_01_307b_Zhuravlev_9.txt' if not p.exists(): p.mkdir(exist_ok=True) if p.exists(): with res.open('w') as f: def y(x): y = 0.5 + ((np.sin((x**2)-(A**2)))**2-0.5)/(1 + 0.001*((x**2)+(A**2)))**2; y1 = round(y, 2); return y1 f.write("X Y") x = -10 A = 0 while x <= 10: f.write("\n" + str(x) + " " + str(y(x))) \ x = x + 0.5 x = np.arange(-10, 10) fig, ax = plt.subplots() ax.plot(x, y(x)) lgnd = ax.legend(['y'], loc='upper center', shadow=True) lgnd.get_frame().set_facecolor('green') plt.show()
from scipy.constants import c, pi from scipy.special import spherical_jn as jn, spherical_yn as yn import numpy as np from pathlib import Path as pth import matplotlib.pyplot as plt from urllib.request import urlopen as uopn import re import pandas as pd tsk_v = 2 pfile = pth('./taskfile.csv') df = pd.read_csv(pfile, header=None, delimiter=';') print(df.head()) data = df.to_numpy() #get data for my variant m = data[1, :] D = float(m[0]) fmin = float(m[1]) fmax = float(m[2]) f = np.linspace(fmin, fmax, 400) r = D / 2 def hn(n, x): return complex(jn(n, x), yn(n, x)) def bn(n, x): upr_n = x * jn(n - 1, x) - n * jn(n, x)