def handle(self, *args, **options): with hook_compressed(options['filename']) as fp: all_data = json.load(fp) with atomic(): for info in all_data: person = Persona(**info) person.save()
def _open(self, fp): if 'xml' in fp: return fileinput.hook_compressed(fp, 'r') if fp.endswith('.gz'): reader = codecs.getreader("utf-8") return reader(gzip.open(fp)) return codecs.open(fp, encoding='utf-8', mode='r')
def test_gz_with_encoding_fake(self): original_open = gzip.open gzip.open = lambda filename, mode: io.BytesIO(b'Ex-binary string') try: result = fileinput.hook_compressed("test.gz", "3", encoding="utf-8") finally: gzip.open = original_open self.assertEqual(list(result), ['Ex-binary string'])
def do_test_use_builtin_open(self, filename, mode): original_open = self.replace_builtin_open(self.fake_open) try: result = fileinput.hook_compressed(filename, mode) finally: self.replace_builtin_open(original_open) self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, ((filename, mode), {}))
def test_gz_ext_fake(self): original_open = gzip.open gzip.open = self.fake_open try: result = fileinput.hook_compressed('test.gz', 3) finally: gzip.open = original_open self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, (('test.gz', 3), {}))
def test_bz2_ext_fake(self): original_open = bz2.BZ2File bz2.BZ2File = self.fake_open try: result = fileinput.hook_compressed('test.bz2', 4) finally: bz2.BZ2File = original_open self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, (('test.bz2', 4), {}))
def test_gz_ext_fake(self): original_open = gzip.open gzip.open = self.fake_open try: result = fileinput.hook_compressed("test.gz", 3) finally: gzip.open = original_open self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {}))
def test_bz2_ext_fake(self): original_open = bz2.BZ2File bz2.BZ2File = self.fake_open try: result = fileinput.hook_compressed("test.bz2", 4) finally: bz2.BZ2File = original_open self.assertEqual(self.fake_open.invocation_count, 1) self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {}))
def read_lines_old(file, myvalidator): Lines=[] try : for line in fileinput.hook_compressed(file, "r"): line_obj=Line() if(line_obj.parse_line(file,line, myvalidator)): Lines.append(line_obj) return Lines except IOError : raise Exception(fname+": File is not found. Ignoring this file ")
def read_lines(file, myvalidator): Lines=[] try : prevvalue="%Y %b %d %H:%M:%S" prevdate=datetime.strptime("1970 Jan 01 00:00:00", prevvalue) for line in fileinput.hook_compressed(file, "r"): line_obj=Line() if (line_obj.isDateFound(line)): if(line_obj.parse_line(file,line, myvalidator)): prevdate=line_obj.linedatetime prevvalue=line_obj.datetimeformat Lines.append(line_obj) else : # for handling the lines without date, eg: smf files fname=os.path.split(file)[1] #print prevdate, prevvalue, line, fname line_obj.set(fname, prevdate, prevvalue, line[:-1].lstrip(), fake=True) #Lines[-1].text=Lines[-1].text+"\n"+line Lines.append(line_obj) return Lines except IOError : raise Exception(fname+": File is not found. Ignoring this file ")
def supress_repeated(supress_lines): for line in fileinput.hook_compressed(supress_lines, 'r+'): if line.rstrip(): print(line)
output_file = sys.argv[2] # Now ensure that these all exist and we're allowed to write the output # if we fail because of this, we want to fail before doing a lot of work if not os.path.exists(input_file): print 'input_file "' + input_file + '" does not exist' exit() try: output_fid = open(output_file, 'w') except: print 'Error opening output file ' + output_file exit() if input_file[-3:] == '.gz': input_fid = fileinput.hook_compressed(input_file, 'r') else: input_fid = open(input_file, 'r') IDtoQUERY = {} input_array = [] plates_to_remap = set() max_plate_id = 0 for line in input_fid: line = line.split() input_array.append(line) query = line[0] plate = line[4] if int(plate) > max_plate_id: max_plate_id = int(plate)
if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count( '--help') > 0: help() exit() if sys.argv.count('-header') > 0: header() exit() if len(sys.argv) < 2: print 'too few arguments (try "-h" for help)' exit() input_file = sys.argv[1] if input_file[-3:] == '.gz': input_fid = hook_compressed(input_file, 'r') else: input_fid = open(input_file, 'r') queries = set() arrays = set() counts = [0, 0, 0, 0] # neg pos insig nan labels = ['FG30', 'FG26', 'TS30', 'TS26', 'SCI'] exp_counts = {} for i in range(len(labels)): exp_counts[labels[i]] = 0 input_fid.readline() # toss header for line in input_fid: line = line.strip().split('\t') queries.add(line[0])
help() exit() if len(sys.argv) < 2: print('too few arguments (try "-h" for help)') exit() sga_file = sys.argv[1] # Now ensure that these all exist and we're allowed to write the output # if we fail because of this, we want to fail before doing a lot of work if not os.path.exists(sga_file): print('sga_file "' + sga_file + '" does not exist') exit() sga_fid = fileinput.hook_compressed(sga_file, 'r') # default to "raw" input files QUERY_COL = 0 ARRAY_COL = 1 PLATE_COL = 2 UNIQE_COL = 4 BATCH_COL = 5 if 'raw' in sga_file: QUERY_COL = 0 ARRAY_COL = 1 PLATE_COL = 2 UNIQE_COL = 4 BATCH_COL = 5 elif 'release' in sga_file: QUERY_COL = 0
output_file = sys.argv[2] # Now ensure that these all exist and we're allowed to write the output # if we fail because of this, we want to fail before doing a lot of work if not os.path.exists(combined_data_file): print 'combined_data_file "' + combined_data_file + '" does not exist' exit() try: output_fid = open(output_file, 'w') except: print 'Error opening output file: ' + output_file exit() ## Step 1: Split each line and add _SETID to the first field if combined_data_file[-3:] == '.gz': combined_data_fid = fileinput.hook_compressed(combined_data_file, 'r') else: combined_data_fid = open(combined_data_file, 'r') SEEN_QUERIES = set() for line in combined_data_fid: line = line.split() query = line[SGA_QUERY_COL] set = line[SGA_SET_COL] # Add _set to query name queryset = query + '_' + set line[SGA_QUERY_COL] = queryset output_fid.write('\t'.join(line)) output_fid.write('\n') combined_data_fid.close()
#!/usr/bin/python import fileinput import sys for file in sys.argv[1:]: fh = fileinput.hook_compressed(file, 'r') data = fh.read(32768) while data: sys.stdout.write(data) data = fh.read(32768)
return ################ MAIN FUNCTION if sys.argv.count('-h') + sys.argv.count('-help') + sys.argv.count('--help') > 0: help() sys.exit() if len(sys.argv) != 3: print('Wrong number of arguments, try "-help"', file=sys.stderr) sys.exit() SGAfile = sys.argv[1] OtherData = sys.argv[2] fid_1 = fileinput.hook_compressed(SGAfile, 'r') fid_2 = fileinput.hook_compressed(OtherData, 'r') max_plate = 0 max_batch = 0 for line in fid_1: if SGAfile[-3:] == '.gz': line = line.decode('utf-8').strip() else: line = line.strip() split_line = line.split('\t') plate = int(split_line[4]) batch = int(split_line[5]) if plate > max_plate: max_plate = plate
def update_event(self, inp=-1): self.set_output_val( 0, fileinput.hook_compressed(self.input(0), self.input(1)))
## Step 3: For each query, split the set ids into two groups group1 = [int(x) - 1 for x in split_param.split(',')] size1 = len(group1) size2 = {} setA = {} for query in replicate_queries: setA[query] = [list(query_setids[query])[x] for x in group1] size2[query] = len(query_setids[query]) - size1 ## Step 4: Iterate through the scorefile # keep replicate queries, renameing them # keep anything in keep_batches # Result can be appended to a short set. if big_data_file[-3:] == '.gz': big_data_fid = fileinput.hook_compressed(big_data_file, 'r') else: big_data_fid = open(big_data_file, 'r') for line in big_data_fid: line = line.strip().split('\t') if line[0] in replicate_queries: if line[3] in setA[line[0]]: line[0] = line[0] + '_A' + str(size1) else: line[0] = line[0] + '_B' + str(size2[line[0]]) print('\t'.join(line)) #elif line[5] in keep_batches: #print('\t'.join(line))
save_path + 'clean_signal-dijet' + "_" + feature_type + batch_number, data) elif 'bg' in filename: np.save( save_path + 'clean_bg-dijet' + "_" + feature_type + batch_number, data) else: assert 1 == 0 # Files were not generated. There is a problem with the source filename #load_path = './' load_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/" #filename = "dijet-bg.txt.gz" #filename = "all-signal.json" #filename = 'ten_line_signal.json' filename = 'one_line_signal.json' filename = load_path + filename #save_path = './saved_batches/' save_path = "/phys/groups/tev/scratch4/users/kaifulam/dguest/gjj-pheno/v1/high_mid_low_and_covariance/numpy_data/batches_5000/" if filename[ -3:] == '.gz': # Check if the file is compressed or not and open accordingly fid = fileinput.hook_compressed(filename, 'r') else: fid = open(filename) clean_and_merge_lines(fid, 1, filename, save_path)
if len(sys.argv) < 3: print 'too few arguments (try "-h" for help)' exit() SGA_file = sys.argv[1] batch_ids = sys.argv[2:] #BATCH_file = sys.argv[2] #batch_ids = set() #b_fid = open(BATCH_file,'r') #for line in b_fid: # batch_ids.add(line.strip()) # Now ensure that these all exist and we're allowed to write the output # if we fail because of this, we want to fail before doing a lot of work if not os.path.exists(SGA_file): print 'SGA_file "' + SGA_file + '" does not exist' exit() if SGA_file[-3:] == '.gz': SGA_fid = fileinput.hook_compressed(SGA_file, 'r') else: SGA_fid = open(SGA_file, 'r') for line in SGA_fid: line = line.strip() parsed = line.split('\t') #if parsed[5] not in batch_ids: if parsed[5] in batch_ids: print(line)
#! /usr/bin/env python3 import fileinput with fileinput.input() as f: for line in f: print(line, end='') # ls | ./file_input.py # __pycache__ # file_input.py # file_input_multi_files.py ''' The two following opening hooks are provided by this module: fileinput.hook_compressed(filename, mode) Transparently opens files compressed with gzip and bzip2 (recognized by the extensions '.gz' and '.bz2') using the gzip and bz2 modules. If the filename extension is not '.gz' or '.bz2', the file is opened normally (ie, using open() without any decompression). Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_compressed) fileinput.hook_encoded(encoding, errors=None) Returns a hook which opens each file with open(), using the given encoding and errors to read the file. Usage example: fi = fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8", "surrogateescape")) Changed in version 3.6: Added the optional errors parameter. '''