Exemplo n.º 1
0
def load_gff_file():
	create_go_table()
	src = os.path.abspath(os.path.join(os.pardir, 'data', reference_go))
	dest = os.path.abspath(os.path.join('data', reference_go))
	shutil.copyfile(src, dest)
	delete_first_line(dest)
	load_table('gotable', dest)
Exemplo n.º 2
0
 def load_promoters_to_mysql(self):
     ls = ['10000', '5000', '2000', '1000', '500']
     for l in ls:
         filename = 'data/promoters.sql.' + l + '.txt'
         tname = "promoter_" + l
         print(filename, tname)
         create_promoter_table(tname)
         load_table(tname, filename)
Exemplo n.º 3
0
def get_first_line():
    src = os.path.abspath(os.path.join(os.pardir, 'data', reference_ortholog))
    cmd = 'head -n 1 ' + src
    p = subprocess.Popen(["head", "-n", "1", src],
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    out, err = p.communicate()
    #a = os.system(cmd)
    line = out.decode("utf-8")
    line = line.rstrip("\r|\n")
    ortho = []
    for col in line.split("\t"):
        if ('gene_id' in col):
            continue
        if ('_desc' in col):
            continue
        ortho.append(col)
    a = os.path.abspath(
        os.path.join(os.pardir, 'bin', 'php', 'settings', 'ortholog.php'))
    php = "<?php\n$ortholog = ['{0}'];\n?>".format("','".join(ortho))
    with open(a, "w") as outfile:
        outfile.write(php)
    create_ortholog(ortho)


if __name__ == '__main__':
    get_first_line()
    a = os.path.abspath(os.path.join(os.pardir, 'data', reference_ortholog))
    load_table('ortholog', a)
Exemplo n.º 4
0
        print(filename)
        ## define original source
        src = os.path.join(rnaseq_dir, filename)
        ## define path of destination
        dest = os.path.abspath(os.path.join('data', filename))
        ## copy source to destination
        shutil.copyfile(src, dest)
        ## delete the first line
        delete_first_line(dest)
        ## remove inf and -inf and replace it with 100000
        parse_infinity(dest)
        ## Make a R file with only the experiments
        a = create_R_file(dest)
        ## Get the experiments list
        print(a)
        ## Collect the R file list
        outfiles.append(dest)

    create_rnaseq_table()
    for file in outfiles:
        load_table('rnaseq', file)

    expslist, hash = combine_rnaseq(outfiles)
    rnaseq_combine_file = os.path.abspath(
        os.path.join('data', 'rnaseq.combine'))
    rnaseqcombine(expslist, hash, rnaseq_combine_file)
    update_rnaseq(expslist, rnaseq_combine_file)
    delete_first_line(rnaseq_combine_file)
    create_rnaseq_expression_table(expslist)
    load_table('rnaseq_expression', rnaseq_combine_file)
Exemplo n.º 5
0
from mysql_tables import create_pathway_table, load_table

sys.path.append(os.path.abspath('../'))
from data import *

def genfile(boo):
	b = os.getcwd()
	a = os.path.abspath(os.path.join(os.pardir, 'bin', 'php', 'settings', 'pathway.php'))
	php = "<?php\n$available_pathway = {0};\n?>".format(boo)
	with open(a, "w") as outfile:
		outfile.write(php)

def add_description():
	print('add pathway')
	genfile('TRUE')

def no_description():
	print('no pathway')
	genfile('FALSE')

if __name__ == '__main__':
	print(reference_pathway)
	if reference_pathway:
		create_pathway_table()
		a = os.path.abspath(os.path.join(os.pardir, 'data', reference_pathway))
		load_table('pathway', a)
		add_description()
	else:
		no_description()
Exemplo n.º 6
0
  gene_id, gene_name = extract_gene_id_name(cols[8])
  return gene_id, chr, gene_start, gene_end, gene_length, gene_strand, gene_name

def parse_gff(filename, outfile):
  fh = open(filename)
  out = open(outfile, 'w')
  for rec in fh:
    if rec[0] == '#':
      continue
    rec = rec.rstrip("\r|\n")
    cols = rec.split("\t")
    if cols[2] != 'gene':
      continue
    if 'scaffold' in cols[0]:
      continue
    #print(">> ", rec)
    #print("\t".join(extract_gene_info(cols)))
    out.write("\t".join(extract_gene_info(cols)) + "\n")

  fh.close()
  out.close()
  
if __name__ == '__main__':
  #filename = 'data/Homo_sapiens.GRCh37.75.gtf'
  #filename = 'data/test'
  #filename = 'data/Zea_mays.AGPv3.31.gtf'
  parse_gff('../data/' + reference_gtf, 'data/' + reference_gtf + '.parse')
  create_gff_table()
  load_table('gfftest', 'data/' + reference_gtf + '.parse')

Exemplo n.º 7
0
def genfile(boo):
    b = os.getcwd()
    a = os.path.abspath(
        os.path.join(os.pardir, 'bin', 'php', 'settings', 'descriptions.php'))
    php = "<?php\n$available_description = {0};\n?>".format(boo)
    with open(a, "w") as outfile:
        outfile.write(php)


def add_description():
    print('add description')
    genfile('TRUE')


def no_description():
    print('no description')
    genfile('FALSE')


if __name__ == '__main__':
    print(reference_description)
    if reference_description:
        create_gene_descriptions()
        a = os.path.abspath(
            os.path.join(os.pardir, 'data', reference_description))
        load_table('gene_descriptions', a)
        add_description()
    else:
        no_description()
Exemplo n.º 8
0

def parse_exp(filename, modfilename):
    cmd = 'sed -i "s/,/\\t/g;s/$/\\t' + modfilename + '/g" ' + filename
    os.system(cmd)


if __name__ == '__main__':
    filename = '../data_chipseq/RA1_all_high_confidence_peaks.xls'
    chipseq_dir = os.path.abspath(os.path.join(os.pardir, 'data_chipseq'))
    files = [f for f in os.listdir(chipseq_dir) if re.match(r'.*\.csv', f)]
    for filename in files:
        ## define original source
        src = os.path.join(chipseq_dir, filename)
        ## define path of destination
        dest = os.path.abspath(os.path.join('data', filename))
        print(filename, src, dest)
        ## copy source to destination
        shutil.copyfile(src, dest)
        ## Create table and load it
        modfilename = filename.replace('.csv', '')
        modfilename = modfilename.replace('\.', '')
        create_chipseq()
        ## delete the first line
        delete_first_line(dest)
        parse_comma(dest)
        parse_exp(dest, modfilename)
        load_table('chipseq', dest)

    #create_chipseq_table(filename)
    #get_peak_seqs()
Exemplo n.º 9
0
def load_cluster():
	ref_dir = os.path.abspath(os.path.join(os.pardir, 'data'))
	cluster = os.path.join(ref_dir, 'cluster.txt')
	create_cluster
	load_table('cluster', cluster)