--invert			Invert selection (leave -n out)				[off]
-r	--remove-nonseq-chars		Remove non sequence characters (like *) from sequences	[off]

''')

# parser
p = ap.ArgumentParser()
p.add_argument("-in", "--input-file")
p.add_argument("-out", "--output-file")
p.add_argument("--invert", action="store_true")
p.add_argument("-n", "--names", metavar="NAMES")
p.add_argument("-r", "--remove-nonseq-chars", action="store_true")
args = p.parse_args()

# read input file
sys.stderr.write("[{0}]\tImporting list of names\n".format(at()))
FOFN = open(args.names, "r")
NAMES = []
for line in FOFN:
    NAMES.append(line.rstrip().split()[0].strip(">"))
#
FOFN.close()
NAMES = list(set(NAMES))
sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), len(NAMES)))

# parsing input fasta and writing to output
k = 0
sys.stderr.write(
    "[{0}]\tParsing input FASTA and writing to output lines matching name list\n"
    .format(at()))
Exemplo n.º 2
0
else:
    INPUT = sys.stdin

if args.output_file:
    OUTPUT = open(args.output_file, "w")
else:
    OUTPUT = sys.stdout

if args.error_file:
    ERROR = open(args.error_file, "w")
else:
    ERROR = sys.stderr

if args.verbose:
    ERROR.write("[{0}] Reading input file, computing boundaries ... \n".format(
        at()))

dic = {}
for line in INPUT:
    lst = line.rstrip("\r\n\b").split("\t")
    scaffold = lst[0]
    position = int(lst[3])
    #
    try:
        dic[scaffold]
    except KeyError:
        dic[scaffold] = [[position, position]]
    #
    k = 0
    lastIndex = len(dic[scaffold]) - 1
    while ((position > dic[scaffold][k][1] + 1) and (k < lastIndex)):
Exemplo n.º 3
0
p.add_argument("-e", "--error-file")
args = p.parse_args()

if args.output_file:
    OUTPUT = open(args.output_file, "w")
else:
    OUTPUT = sys.stdout

if args.error_file:
    ERROR = open(args.error_file, "w")
else:
    ERROR = sys.stderr

# reading file A
if args.verbose:
    ERROR.write("[{0}] Reading File A\n".format(at()))

FileA = {}
INPUT = open(args.gff_a, "r")
for line in INPUT:
    lst = line.rstrip("\b\r\n").split("\t")
    if lst[2] == args.feature:
        scaffold = str(lst[0])
        start = int(lst[3])
        end = int(lst[4])
        try:
            if len(FileA[scaffold]) > 0:
                for region in FileA[scaffold]:
                    if end == region[0] - 1:
                        idx = FileA[scaffold].index(region)
                        FileA[scaffold][idx] = [start, region[1]]
Exemplo n.º 4
0
    return y


# gtfdic function
def gtfdic(x):
    t = x.split("; ")
    y = {}
    for z in t:
        result = re.search('(.*) \"(.*)\"', z)
        y[str(result.group(1))] = str(result.group(2))
    return y


### CREATE NAMES LIST

sys.stderr.write("[{0}]\tStoring names ...\n".format(at()))

k = 0
INPUT = open(args.names, "r")
NAMES = []
for line in INPUT:
    name = line.rstrip()
    NAMES.append(name)
    k += 1

INPUT.close()

sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), k))

### SELECT GENES
else:
	INPUT = sys.stdin

if args.output_file:
	OUTPUT = open(args.output_file, "w")
else:
	OUTPUT = sys.stdout

if args.error_file:
	ERROR = open(args.error_file, "w")
else:
	ERROR = sys.stderr


if args.verbose:
	ERROR.write("[{0}] Reading input file, computing boundaries ... \n".format(at()))

dic = {}
for line in INPUT:
	lst = line.rstrip("\r\n\b").split("\t")
	scaffold = lst[0]
	position = int(lst[3])
	#
	try:
		dic[scaffold]
	except KeyError:
		dic[scaffold] = [[position,position]]
	#
	k=0
	lastIndex = len(dic[scaffold])-1
	while ((position > dic[scaffold][k][1]+1) and (k < lastIndex)):
Exemplo n.º 6
0
#while True:
for vezes in range(5):
    try:
        qwertyuiop = 1+1
        #r = session.get('https://www.inscricao.marinha.mil.br/marinha/index_concursos.jsp?id_concurso=384')
    except Exception as e:
        print('Conexão Rejeitada/Demorada')
    else:
        n_titulos = 0
        n_titulos_novos = 0
        #titulos_encontrados = [c.text for c in r.html.find('u')]
        titulos_encontrados = teste[vezes].copy()
        novos = []
        inalterados = []
        excluidos = []
        hora_consulta = at(lt())
        
        for c in range(len(titulos)):
            for b in range(len(titulos_encontrados)):
                if titulos[c] == titulos_encontrados[b]:
                    inalterados.append(c)
            if not c in inalterados:
                excluidos.append(c)

        for c in range(len(titulos_encontrados)):
            for b in range(len(titulos)):
                if titulos[b] == titulos_encontrados[c]:
                    pass
            if not c in inalterados and not c in excluidos:
                novos.append(c)
Exemplo n.º 7
0
args = p.parse_args()

if args.output_file:
	OUTPUT = open(args.output_file, "w")
else:
	OUTPUT = sys.stdout

if args.error_file:
	ERROR = open(args.error_file, "w")
else:
	ERROR = sys.stderr


# reading file A
if args.verbose:
	ERROR.write("[{0}] Reading File A\n".format(at()))

FileA = {}
INPUT = open(args.gff_a, "r")
for line in INPUT:
	lst = line.rstrip("\b\r\n").split("\t")
	if lst[2] == args.feature:
		scaffold = str(lst[0])
		start = int(lst[3])
		end = int(lst[4])
		try:
			if len(FileA[scaffold]) > 0:
				for region in FileA[scaffold]:
					if end == region[0]-1:
						idx = FileA[scaffold].index(region)
						FileA[scaffold][idx] = [start,region[1]]
			result = re.search('(.*)=(.*)', z)
		y[str(result.group(1))] = str(result.group(2))
	return y

# gtfdic function
def gtfdic(x):
	t = x.split("; ")
	y = {}
	for z in t:
		result = re.search('(.*) \"(.*)\"', z)
		y[str(result.group(1))] = str(result.group(2))
	return y

### CREATE NAMES LIST

sys.stderr.write("[{0}]\tStoring names ...\n".format(at()))

k=0
INPUT = open(args.names, "r")
NAMES = []
for line in INPUT:
	name = line.rstrip()
	NAMES.append(name)
	k+=1

INPUT.close()

sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), k))


### SELECT GENES
-r	--remove-nonseq-chars		Remove non sequence characters (like *) from sequences	[off]

''')

# parser
p = ap.ArgumentParser()
p.add_argument("-in", "--input-file")
p.add_argument("-out", "--output-file")
p.add_argument("--invert", action="store_true")
p.add_argument("-n", "--names", metavar="NAMES")
p.add_argument("-r", "--remove-nonseq-chars", action="store_true")
args = p.parse_args()


# read input file
sys.stderr.write("[{0}]\tImporting list of names\n".format(at()))
FOFN = open(args.names, "r")
NAMES = []
for line in FOFN:
	NAMES.append(line.rstrip().split()[0].strip(">"))
#
FOFN.close()
NAMES = list(set(NAMES))
sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), len(NAMES)))


# parsing input fasta and writing to output
k=0
sys.stderr.write("[{0}]\tParsing input FASTA and writing to output lines matching name list\n".format(at()))

INPUT = open(args.input_file, "r")
	fake_read = fake_read_name + "\n" + fake_read_seq + "\n"
	return fake_read

revcomp = {"A":"T", "C":"G", "T":"A", "G":"C", "N":"N"}
def paired_end(record, start_1, end_1, start_2, end_2, k):
	fake_pair_seq = [str(record.seq)[start_1:end_1], "".join([revcomp[i] for i in str(record.seq)[start_2:end_2]])[::-1]]
	fake_pair_name = [">" + str(record.id) + "_" + str(k) + "/1", ">" + str(record.id) + "_" + str(k) + "/2"]
	fake_pair = [fake_pair_name[0] + "\n" + fake_pair_seq[0] + "\n", fake_pair_name[1] + "\n" + fake_pair_seq[1] + "\n"]
	return fake_pair


### detect number of cpu and define pool ###

if args.verbose:
	if args.threads > 1:
		ERROR.write("[{0}] Defined {1} threads\n".format(at(), args.threads))
	elif args.threads == 1:
		ERROR.write("[{0}] Defined {1} thread\n".format(at(), args.threads))



if args.verbose:
	if args.type == "SE":
		ERROR.write("[{0}] Generating reads of {1} length every {2} positions\n".format(at(), args.read_length, args.step_size))
	elif args.type == "PE":
		ERROR.write("[{0}] Generating paired-end reads of insert size {1} and length {2} every {3} positions\n".format(at(), args.insert_size, args.read_length, args.step_size))
		ERROR.write("[{0}] Distance between mates: {1}\n".format(at(), args.insert_size-(2*args.read_length)))


### run processes 
Exemplo n.º 11
0
    fake_pair_name = [
        ">" + str(record.id) + "_" + str(k) + "/1",
        ">" + str(record.id) + "_" + str(k) + "/2"
    ]
    fake_pair = [
        fake_pair_name[0] + "\n" + fake_pair_seq[0] + "\n",
        fake_pair_name[1] + "\n" + fake_pair_seq[1] + "\n"
    ]
    return fake_pair


### detect number of cpu and define pool ###

if args.verbose:
    if args.threads > 1:
        ERROR.write("[{0}] Defined {1} threads\n".format(at(), args.threads))
    elif args.threads == 1:
        ERROR.write("[{0}] Defined {1} thread\n".format(at(), args.threads))

if args.verbose:
    if args.type == "SE":
        ERROR.write(
            "[{0}] Generating reads of {1} length every {2} positions\n".
            format(at(), args.read_length, args.step_size))
    elif args.type == "PE":
        ERROR.write(
            "[{0}] Generating paired-end reads of insert size {1} and length {2} every {3} positions\n"
            .format(at(), args.insert_size, args.read_length, args.step_size))
        ERROR.write("[{0}] Distance between mates: {1}\n".format(
            at(), args.insert_size - (2 * args.read_length)))
Exemplo n.º 12
0
# gtfdic function
# import re
def gtfdic(x):
    t = x.split("; ")
    y = {}
    for z in t:
        result = re.search('(.*) \"(.*)\"', z)
        y[str(result.group(1))] = str(result.group(2))
    return y


### reading scaffold lengths

if args.verbose:
    sys.stderr.write("[{0}]\tReading scaffold lengths\n".format(at()))

Scaflen = {}
SCAFS = open(args.scaffold_lengths, "r")
for line in SCAFS:
    lst = line.rstrip().split("\t")
    Scaflen[lst[0]] = int(lst[1])

SCAFS.close()

if args.verbose:
    sys.stderr.write("[{0}]\tStored information on {1} scaffolds\n".format(
        at(), len(Scaflen.keys())))

### READ INPUT FILE