Пример #1
0
 def handle(self, *args, **options):
     with hook_compressed(options['filename']) as fp:
         all_data = json.load(fp)
     with atomic():
         for info in all_data:
             person = Persona(**info)
             person.save()
Пример #2
0
    def _open(self, fp):
        if 'xml' in fp:
            return fileinput.hook_compressed(fp, 'r')

        if fp.endswith('.gz'):
            reader = codecs.getreader("utf-8")
            return reader(gzip.open(fp))
        return codecs.open(fp, encoding='utf-8', mode='r')
Пример #3
0
 def test_gz_with_encoding_fake(self):
     original_open = gzip.open
     gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string')
     try:
         result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8")
     finally:
         gzip.open = original_open
     self.assertEqual(list(result), ['Ex-binary string'])
Пример #4
0
 def do_test_use_builtin_open(self, filename, mode):
     original_open = self.replace_builtin_open(self.fake_open)
     try:
         result = fileinput.hook_compressed(filename, mode)
     finally:
         self.replace_builtin_open(original_open)
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation,
                      ((filename, mode), {}))
Пример #5
0
 def test_gz_ext_fake(self):
     original_open = gzip.open
     gzip.open = self.fake_open
     try:
         result = fileinput.hook_compressed('test.gz', 3)
     finally:
         gzip.open = original_open
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation, (('test.gz', 3), {}))
Пример #6
0
 def test_bz2_ext_fake(self):
     original_open = bz2.BZ2File
     bz2.BZ2File = self.fake_open
     try:
         result = fileinput.hook_compressed('test.bz2', 4)
     finally:
         bz2.BZ2File = original_open
     self.assertEqual(self.fake_open.invocation_count, 1)
     self.assertEqual(self.fake_open.last_invocation, (('test.bz2', 4), {}))
Пример #7
0
    def do_test_use_builtin_open(self, filename, mode):
        original_open = self.replace_builtin_open(self.fake_open)
        try:
            result = fileinput.hook_compressed(filename, mode)
        finally:
            self.replace_builtin_open(original_open)

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation,
                         ((filename, mode), {}))
Пример #8
0
    def test_gz_ext_fake(self):
        original_open = gzip.open
        gzip.open = self.fake_open
        try:
            result = fileinput.hook_compressed("test.gz", 3)
        finally:
            gzip.open = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {}))
Пример #9
0
    def test_bz2_ext_fake(self):
        original_open = bz2.BZ2File
        bz2.BZ2File = self.fake_open
        try:
            result = fileinput.hook_compressed("test.bz2", 4)
        finally:
            bz2.BZ2File = original_open

        self.assertEqual(self.fake_open.invocation_count, 1)
        self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {}))
Пример #10
0
def read_lines_old(file, myvalidator):
       Lines=[]
       try :
               for line in fileinput.hook_compressed(file, "r"):
                       line_obj=Line()
                       if(line_obj.parse_line(file,line, myvalidator)):
                               Lines.append(line_obj)
               return Lines
       except IOError :
               raise Exception(fname+": File is not found. Ignoring this file ")
Пример #11
0
def read_lines(file, myvalidator):
	Lines=[]
	try :
		prevvalue="%Y %b %d %H:%M:%S"
		prevdate=datetime.strptime("1970 Jan 01 00:00:00", prevvalue)
		for line in fileinput.hook_compressed(file, "r"):
			line_obj=Line()
			if (line_obj.isDateFound(line)):
				if(line_obj.parse_line(file,line, myvalidator)):
					prevdate=line_obj.linedatetime
					prevvalue=line_obj.datetimeformat
					Lines.append(line_obj)
			else :
				# for handling the lines without date, eg: smf files
				fname=os.path.split(file)[1]
				#print prevdate, prevvalue, line, fname
				line_obj.set(fname, prevdate, prevvalue, line[:-1].lstrip(), fake=True)
				#Lines[-1].text=Lines[-1].text+"\n"+line
				Lines.append(line_obj)

		return Lines
	except IOError :
		raise Exception(fname+": File is not found. Ignoring this file ")
Пример #12
0
def supress_repeated(supress_lines):
    for line in fileinput.hook_compressed(supress_lines, 'r+'):
        if line.rstrip():
            print(line)
Пример #13
0
output_file = sys.argv[2]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(input_file):
    print 'input_file "' + input_file + '" does not exist'
    exit()

try:
    output_fid = open(output_file, 'w')
except:
    print 'Error opening output file ' + output_file
    exit()

if input_file[-3:] == '.gz':
    input_fid = fileinput.hook_compressed(input_file, 'r')
else:
    input_fid = open(input_file, 'r')

IDtoQUERY = {}
input_array = []
plates_to_remap = set()
max_plate_id = 0

for line in input_fid:
    line = line.split()
    input_array.append(line)
    query = line[0]
    plate = line[4]
    if int(plate) > max_plate_id:
        max_plate_id = int(plate)
Пример #14
0
if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count(
        '--help') > 0:
    help()
    exit()

if sys.argv.count('-header') > 0:
    header()
    exit()

if len(sys.argv) < 2:
    print 'too few arguments (try "-h" for help)'
    exit()

input_file = sys.argv[1]
if input_file[-3:] == '.gz':
    input_fid = hook_compressed(input_file, 'r')
else:
    input_fid = open(input_file, 'r')

queries = set()
arrays = set()
counts = [0, 0, 0, 0]  # neg pos insig nan
labels = ['FG30', 'FG26', 'TS30', 'TS26', 'SCI']
exp_counts = {}
for i in range(len(labels)):
    exp_counts[labels[i]] = 0

input_fid.readline()  # toss header
for line in input_fid:
    line = line.strip().split('\t')
    queries.add(line[0])
Пример #15
0
    help()
    exit()

if len(sys.argv) < 2:
    print('too few arguments (try "-h" for help)')
    exit()

sga_file = sys.argv[1]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(sga_file):
    print('sga_file "' + sga_file + '" does not exist')
    exit()

sga_fid = fileinput.hook_compressed(sga_file, 'r')

# default to "raw" input files
QUERY_COL = 0
ARRAY_COL = 1
PLATE_COL = 2
UNIQE_COL = 4
BATCH_COL = 5
if 'raw' in sga_file:
   QUERY_COL = 0
   ARRAY_COL = 1
   PLATE_COL = 2
   UNIQE_COL = 4
   BATCH_COL = 5
elif 'release' in sga_file:
   QUERY_COL = 0
Пример #16
0
output_file = sys.argv[2]

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(combined_data_file):
    print 'combined_data_file "' + combined_data_file + '" does not exist'
    exit()
try:
    output_fid = open(output_file, 'w')
except:
    print 'Error opening output file: ' + output_file
    exit()

## Step 1: Split each line and add _SETID to the first field
if combined_data_file[-3:] == '.gz':
    combined_data_fid = fileinput.hook_compressed(combined_data_file, 'r')
else:
    combined_data_fid = open(combined_data_file, 'r')

SEEN_QUERIES = set()
for line in combined_data_fid:
    line = line.split()
    query = line[SGA_QUERY_COL]
    set = line[SGA_SET_COL]
    # Add _set to query name
    queryset = query + '_' + set
    line[SGA_QUERY_COL] = queryset
    output_fid.write('\t'.join(line))
    output_fid.write('\n')

combined_data_fid.close()
Пример #17
0
#!/usr/bin/python
import fileinput
import sys
for file in sys.argv[1:]:
    fh = fileinput.hook_compressed(file, 'r')
    data = fh.read(32768)
    while data:
        sys.stdout.write(data)
        data = fh.read(32768)
Пример #18
0
   return

################ MAIN FUNCTION

if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count('--help') > 0:
   help()
   sys.exit()

if len(sys.argv) != 3:
   print('Wrong number of arguments, try "-help"', file=sys.stderr)
   sys.exit()

SGAfile = sys.argv[1]
OtherData = sys.argv[2]

fid_1 = fileinput.hook_compressed(SGAfile, 'r')
fid_2 = fileinput.hook_compressed(OtherData, 'r')

max_plate = 0
max_batch = 0
for line in fid_1:
   if SGAfile[-3:] == '.gz':
      line = line.decode('utf-8').strip()
   else:
      line = line.strip()

   split_line = line.split('\t')
   plate = int(split_line[4])
   batch = int(split_line[5])
   if plate > max_plate:
      max_plate = plate
Пример #19
0
 def update_event(self, inp=-1):
     self.set_output_val(
         0, fileinput.hook_compressed(self.input(0), self.input(1)))
Пример #20
0
## Step 3: For each query, split the set ids into two groups
group1 = [int(x) - 1 for x in split_param.split(',')]
size1 = len(group1)
size2 = {}

setA = {}
for query in replicate_queries:
    setA[query] = [list(query_setids[query])[x] for x in group1]
    size2[query] = len(query_setids[query]) - size1

## Step 4: Iterate through the scorefile
# keep replicate queries, renameing them
# keep anything in keep_batches
# Result can be appended to a short set.

if big_data_file[-3:] == '.gz':
    big_data_fid = fileinput.hook_compressed(big_data_file, 'r')
else:
    big_data_fid = open(big_data_file, 'r')

for line in big_data_fid:
    line = line.strip().split('\t')
    if line[0] in replicate_queries:
        if line[3] in setA[line[0]]:
            line[0] = line[0] + '_A' + str(size1)
        else:
            line[0] = line[0] + '_B' + str(size2[line[0]])
        print('\t'.join(line))
    #elif line[5] in keep_batches:
    #print('\t'.join(line))
            save_path + 'clean_signal-dijet' + "_" + feature_type +
            batch_number, data)
    elif 'bg' in filename:
        np.save(
            save_path + 'clean_bg-dijet' + "_" + feature_type + batch_number,
            data)
    else:
        assert 1 == 0  # Files were not generated. There is a problem with the source filename


#load_path = './'
load_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/"

#filename = "dijet-bg.txt.gz"
#filename = "all-signal.json"
#filename = 'ten_line_signal.json'
filename = 'one_line_signal.json'

filename = load_path + filename

#save_path = './saved_batches/'
save_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/high_mid_low_and_covariance/numpy_data/batches_5000/"

if filename[
        -3:] == '.gz':  # Check if the file is compressed or not and open accordingly
    fid = fileinput.hook_compressed(filename, 'r')
else:
    fid = open(filename)

clean_and_merge_lines(fid, 1, filename, save_path)
Пример #22
0
if len(sys.argv) < 3:
    print 'too few arguments (try "-h" for help)'
    exit()

SGA_file = sys.argv[1]
batch_ids = sys.argv[2:]
#BATCH_file = sys.argv[2]
#batch_ids = set()
#b_fid = open(BATCH_file,'r')
#for line in b_fid:
#    batch_ids.add(line.strip())

# Now ensure that these all exist and we're allowed to write the output
# if we fail because of this, we want to fail before doing a lot of work
if not os.path.exists(SGA_file):
    print 'SGA_file "' + SGA_file + '" does not exist'
    exit()

if SGA_file[-3:] == '.gz':
    SGA_fid = fileinput.hook_compressed(SGA_file, 'r')
else:
    SGA_fid = open(SGA_file, 'r')

for line in SGA_fid:
    line = line.strip()
    parsed = line.split('\t')
    #if parsed[5] not in batch_ids:
    if parsed[5] in batch_ids:
        print(line)
Пример #23
0
#! /usr/bin/env python3
import fileinput

with fileinput.input() as f:
    for line in f:
        print(line, end='')

# ls | ./file_input.py
# __pycache__
# file_input.py
# file_input_multi_files.py
'''
The two following opening hooks are provided by this module:

fileinput.hook_compressed(filename, mode)
Transparently opens files compressed with gzip and bzip2 (recognized by the extensions '.gz' and '.bz2') using the gzip and bz2 modules. If the filename extension is not '.gz' or '.bz2', the file is opened normally (ie, using open() without any decompression).

Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_compressed)

fileinput.hook_encoded(encoding, errors=None)
Returns a hook which opens each file with open(), using the given encoding and errors to read the file.

Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", "surrogateescape"))

Changed in version 3.6: Added the optional errors parameter.
'''