示例#1
0
def expand_branches(node,
                    branchrules,
                    dirnamerules,
                    urlrules,
                    fnamerules,
                    debug=False,
                    sort=False,
                    limit=0):
    # to deal with the case of images, i think what we want to do is treat branchrules
    # as a dict of { content check : branchrule }, so like
    #   soup.text.contains('external site') : branchrules
    # or something like that
    # remember some way to preserve the file extension
    # ...urlrules might also have to be a dict. gross

    cwd = node.data['cwd']
    fname = node.data['fname']
    with open(cwd + '/' + fname) as f:
        content = f.read()
    logger.debug('    > expanding {} branches...'.format(fname))
    soup = bsp(content, 'html5lib')
    # this applies the branchrules lambda to the parsed soup
    branch_url_list = branchrules(soup)
    # TODO BS4 Tag has no compare operator for < and >
    #if sort:
    #    branch_url_list = sorted(branch_url_list)
    # this applies the dirnamerules lambda to cwd and fname
    cwd = dirnamerules(cwd, fname)
    pth('./{}'.format(cwd)).mkdir(parents=True, exist_ok=True)
    # now for the population of branch nodes
    branches = []
    urls = []
    for i, branch_url_suffix in enumerate(branch_url_list):
        if limit > 0 and i >= limit:
            break
        # TODO we need this on the last pass to just make the correct format
        # final image URL - template is listed in the last call to branchnodes
        branch_url = urlrules(branch_url_suffix)
        if branch_url in urls:
            continue
        urls.append(branch_url)
        # crazy this next line actually works
        branch_fname = fnamerules(branch_url)
        branches.append(
            Node(trunk=node, url=branch_url, fname=branch_fname, cwd=cwd))
        if debug:
            logger.debug('cwd {}; fname {}; url {}'.format(
                cwd, branch_fname, branch_url))
    return branches
示例#2
0
    def read(cls, data_name: str):
        """This method reads the dataset specified.

        Args:
                data_name (str): Specifies the path of data to be read.

                Returns:
                        pd.DataFrame: The specified dataset
        """
        file_path = pth(__file__).parent / pth("%s.csv" % data_name)
        if not file_path.exists():
            raise FileNotFoundError(
                'The specified dataset does not exist. Run "list_all()" to list the available datasets.'
            )
        else:
            X = pd.read_csv(file_path, index_col=0, header=0, na_values="NaN")
            X = X.fillna(0)
            return X
示例#3
0
def download_pages(nodelist):
    logger.debug('    > downloading pages...')
    # WARNING this is a big assumption, but it's our code so f**k it
    # EVERYTHING IN NODELIST MUST HAVE THE SAME CWD
    cwd = nodelist[0].data['cwd']
    pth('./{}'.format(cwd)).mkdir(parents=True, exist_ok=True)
    htmlpages = []
    for _, _, fnames in walk(cwd):
        htmlpages.extend(fnames)
    for node in nodelist:
        fname = node.data['fname']
        if fname not in htmlpages:
            page = requests.get(node.data['url'])
            with open('./{}/{}'.format(cwd, fname), 'w') as f:
                logger.debug('writing {}/{}...'.format(cwd, fname))
                f.write(page.text)
        else:
            logger.debug('file {}/{} already exists'.format(cwd, fname))
示例#4
0
from lib.functions import *
from lib.NmfClass import *
from matplotlib import pyplot as plt
import nimfa as nf
import os
from pathlib import Path as pth

os.chdir(pth(__file__).parent)
x_ori = pd.read_csv('data/input_CCLE_drug_IC50_zero-one.csv',
                    index_col=0,
                    header=0)
x = clean_df(x_ori).as_matrix()
k_list = [
    30,
    25,
    10,
    3,
    40,
    2,
    5,
    6,
    40,
]
data_name = "CCLE_drug"

# x = nf.examples.medulloblastoma.read(normalize=True)
# data_name = "medulloblastoma"
# k_list = list(range(2, 10))

k_list.sort()
示例#5
0
 def list_all(cls):
     """Prints out all the datasets present as CSV in the datasets folder."""
     for x in pth(__file__).parent.glob("*.csv"):
         print(x.stem)
示例#6
0
def if_nexists_make_file(save_path, init_text='None'):
    if not pth(save_path).is_file():
        f = open(save_path, 'w+')
        f.write(init_text)
        f.close()
示例#7
0
def if_nexists_make_dir(save_path):
    if not pth(save_path).is_dir():
        mkdir(save_path)
import argparse as ap
import pandas as pd
from bignmf.models.jnmf.integrative import IntegrativeJnmf 
import matplotlib as mpl
mpl.use('Agg')
from matplotlib import pyplot as plt
import os
import seaborn as sns
from lib.functions import clean_df
import numpy as np
from pathlib import Path as pth
main_dir = pth(os.getcwd()).resolve()
script_dir = pth(__file__).parent.absolute()
os.chdir(script_dir)

parser = ap.ArgumentParser()
parser.add_argument("data_name", type=str, help="Which dataset to use", choices=["Avana", "GeCKO", "RNAi_Ach", "RNAi_merged", "RNAi_Nov_DEM","filtered_avana", "filtered_nov_dem"])
parser.add_argument("-v", "--verbose", action="store_true", help="increase output verbosity")
parser.add_argument("k", type=int, help="rank uptil which the copehenetic correlation code must be plotted")
parser.add_argument("iter",type=int, help="the maximum number of iterations that the nmf will run for" )
parser.add_argument("trials", type=int, help="number of trails against which the consensus data will be plotted")
args = parser.parse_args()

print(args.data_name)
a = pd.read_csv("data/%s.csv" % args.data_name, index_col=0)
print("Original shape: %s"%str(a.shape))
a = clean_df(a, axis=1)
a = (a - (np.min(a.values))) / np.std(a.values)
data = {args.data_name:a}
print("Cleaned shape: %s"%str(data[args.data_name].shape))
示例#9
0
 def __get_path_for_text_files(file_name):
     return pth.joinpath(pth(c.data_files_dir), file_name)
示例#10
0
from urllib.request  import urlopen as url
from pathlib import Path as pth
import re
from scipy.special import spherical_jn as jn, spherical_yn as yn
import numpy as np
import matplotlib.pyplot as plt

variant = 9 
C=300000000
PI=3.1415926535

file = pth('./taskfile.txt')
if not file.exists():
  txt = url('https://jenyay.net/uploads/Student/Modelling/task_02.txt').read()
f0 = file.open('wb')
f0.write(txt)
f0.close()

if file.exists():
  f1 = file.open()
lines = [x for x in f1]
p = re.compile(r'[0-9\.\-e]+')
m = p.findall(lines[variant-1])
print(m[1:])
f1.close()

D = float(m[1])
fmin = float(m[2])
fmax = float(m[3])
f = np.linspace(fmin, fmax, 400)
r = D/2
示例#11
0
from pathlib import Path as pth
import os
import sys

if sys.argv[1] in globals():
    print("Give an argument")
else:
    print("Creating script file")
    loc = sys.argv[1].split('.')[0]
    a = pth(os.getcwd())
    a = a / pth("run-%s.sh" % loc)
    a.touch()
    x = open("run-%s.sh" % loc, "w")
    x.write("echo $SHELL\n")
    x.write("#!/bin/sh\n")
    x.write("echo $SHELL\n")
    x.write("#$ -S /bin/sh # set shell in UGE\n")
    x.write("export LANG=en_US.UTF-8\n")
    x.write("module load python/3.6\n")
    x.write("python3 %s.py\n" % loc)
    x.close()
    print("Created script file run-%s.py" % loc)
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path as pth

p = pth('results') 
res = p / 'task_01_307b_Zhuravlev_9.txt' 
if not p.exists(): 
  p.mkdir(exist_ok=True) 
  if p.exists(): 
    with res.open('w') as f:
    def y(x): 
      y = 0.5 + ((np.sin((x**2)-(A**2)))**2-0.5)/(1 + 0.001*((x**2)+(A**2)))**2;
      y1 = round(y, 2);
      return y1
    
    f.write("X    Y")
    x = -10
    A = 0
    while x <= 10: 
      f.write("\n" + str(x) + "    " + str(y(x))) \
      x = x + 0.5

    x = np.arange(-10, 10)
    fig, ax = plt.subplots()
    ax.plot(x, y(x))  
    lgnd = ax.legend(['y'], loc='upper center', shadow=True)
    lgnd.get_frame().set_facecolor('green')
    plt.show()

示例#13
0
文件: Task_2.py 项目: haicvp1998/hai
from scipy.constants import c, pi
from scipy.special import spherical_jn as jn, spherical_yn as yn
import numpy as np
from pathlib import Path as pth
import matplotlib.pyplot as plt
from urllib.request import urlopen as uopn
import re
import pandas as pd

tsk_v = 2
pfile = pth('./taskfile.csv')
df = pd.read_csv(pfile, header=None, delimiter=';')
print(df.head())
data = df.to_numpy()

#get data for my variant
m = data[1, :]
D = float(m[0])
fmin = float(m[1])
fmax = float(m[2])

f = np.linspace(fmin, fmax, 400)
r = D / 2


def hn(n, x):
    return complex(jn(n, x), yn(n, x))


def bn(n, x):
    upr_n = x * jn(n - 1, x) - n * jn(n, x)