def main(): sequenceFile = "./../RAW_DATA/seq/seq.tab" outputPath = "./../P_DATA/loc_files/from_python/" print("processing files ... ") start_time = time.time() numberOfProbes = 0 numberOfGstackProbes = 0 with open(sequenceFile, encoding="utf-8") as sequenceFile: for line in sequenceFile: if numberOfProbes > 0: p_id, pset_id, p_x, p_y, assembly, seqname, start, stop, strand, p_seq, tar_std, cat = line.split( ) pg_loc = FindNTstack(p_seq, 'G') if pg_loc > 0 and cat.find("main") != -1: numberOfGstackProbes += 1 probe = Probe(pset_id, p_x, p_y, cat) probe.WriteProbes(outputPath, pg_loc, True) numberOfProbes += 1 textToRatioFile = ""\ + "Total No. of Probes in Seq File : \t" + str(numberOfProbes) \ + "\nTotal No. of G-Stack Probes in Seq File : \t" + str(numberOfGstackProbes) \ + "\nPercentage of G-Stack Probes : \t" + str(numberOfGstackProbes/numberOfProbes*100) \ + "\nTotal no. of Probe set containing G-Stack Probes : \t" \ + str(ProbeSetWithGstack(outputPath + "combined.txt")) \ + "\n\nProgram took " + str(round(time.time() - start_time, 2)) + " seconds to execute." WriteToFile(outputPath + "ratio.txt", textToRatioFile) print("done processing ... ")
def __init__ (self, ram_size, reg_count, code, consolog): self._ram = Memory(ram_size) self._reg = Memory(reg_count) self.__ra = reg_count - 1 self.__sp = reg_count - 2 self._code = code self._probe = Probe(consolog)
def run(): f = open('../input.txt', 'r') paths = [line.strip().split(',') for line in f.readlines()] f.close() probe = Probe(paths) print("Closest: {} | Fewest: {}".format(probe.closest_intersection(), probe.fewest_steps()))
def __helper__(self, a_link, current_path, root_page_path): probe = Probe() probe.parse_string(current_path, ''' <html><head></head> <body> <a href="%s"></a> </body></html> ''' % a_link) return probe.get_internal_url(root_page_path)
def start(): plot = Plot() probe = Probe() channel = Channel() plot.add_channel(channel) while not plot.closed: samples = probe.get_samples(1024) # samples = integrate_samples(samples) channel.add_samples(samples) plot.update_plot()
def plot_separatrix_crossings(self, axes, sepmode='patch', **kw): if sepmode == 'patch': try: xsep = self._get_xsep(**kw).reshape((-1, 2)) for ax in axes: for xlim in xsep: vrect(ax, xlim, **kw) except: warn("Could not generate patches, falling back to lines") Probe.plot_separatrix_crossings(self, axes, **kw) elif sepmode == 'lines': Probe.plot_separatrix_crossings(self, axes, **kw) else: raise ValueError("Unknown sepmode: '%s'" % sepmode)
def __init__ (self, ram_size, reg_count, code, consolog): self._ram = Memory(ram_size) self._reg = Memory(reg_count) self.__ra = reg_count - 1 self.__sp = reg_count - 2 self._code = code self._ip = 0 # init instruction pointer self._max_ip = len(self._code) self._reg[self.__sp] = self._ram._size # init stack pointer self._reg[self.__ra] = self._max_ip # init return address self._probe = Probe(consolog)
class TestProbeCheckInternal(unittest.TestCase): def setUp(self): self.probe = Probe() self.probe.url = 'http://www.google.co.kr' @unittest.skip('fail skip') def test_fail(self): self.assertEqual(1, 2) def test_full_internal_url(self): root_page_url = 'http://www.google.co.kr' current_url = 'http://www.google.co.kr/foo' result = self.probe.check_internal_url(root_page_url, current_url) self.assertTrue(result) def test_different_netloc(self): root_page_url = 'http://www.google.co.kr' current_url = 'http://foogle.co.kr/foo' result = self.probe.check_internal_url(root_page_url, current_url) self.assertFalse(result) def test_relative(self): root_page_url = 'http://www.google.co.kr' current_url = 'sub' result = self.probe.check_internal_url(root_page_url, current_url) self.assertTrue(result) def test_relative2(self): root_page_url = 'http://www.google.co.kr/sub' current_url = 'sub' result = self.probe.check_internal_url(root_page_url, current_url) self.assertTrue(result) def test_absolute_fail(self): root_page_url = 'http://www.google.co.kr/sub' current_url = '/othersub' result = self.probe.check_internal_url(root_page_url, current_url) self.assertFalse(result)
def __init__(self, shn, shot=None, head=None, dig=None, eqi=None): if shot is None: shot = self.find_shot(shn) if head is None: head = shot.head if dig is None: dig = shot.dig self.shot = shot digitizer = DigitizerClasses[dig](shn) try: viewers = eqi.get_viewers(self) except AttributeError: viewers = () Probe.__init__(self, head, digitizer, R0=1.645, z0=-0.966, eqi=eqi, viewers=viewers)
def create_probe_grid(self): positions = [] delta = 20 margin = 20 for x in range(margin, self.width - margin, delta): for y in range(margin, self.height - margin, delta): positions.append((x, y)) for pos in positions: probe = Probe() probe.body.position = pos # Create the probe's sensors probe.range_scanner = RangeScanner("RangeScan:nonlandmarks", WALL_MASK|ROBOT_MASK|ANY_PUCK_MASK|ANY_LANDMARK_MASK, WALL_MASK|ROBOT_MASK|RED_PUCK_MASK) probe.landmark_scanner = RangeScanner("RangeScan:landmarks", WALL_MASK|ANY_LANDMARK_MASK, WALL_MASK|ANY_LANDMARK_MASK) self.probes.append(probe)
def gauges(da): from probe import Probe entries = [ [0, 0], [34, 10], [34, 34], [10, 34], [0, 15], ] prb = Probe(da, entries) return prb
def main(files, out, threshold): out_files = [] for file_path in list(files): if Path(file_path).is_dir(): out_files.extend(handle_folder(file_path)) else: out_files.append(file_path) analyse_clips = Analysis(out_files).summary() output_timeline = Timeline().create_timeline(settings=analyse_clips) speech_detector = SpeechDetection(batch_size=8, threshold=threshold, progress_hook=progress) speech_detector_cpu = SpeechDetection(batch_size=8, device="cpu", threshold=threshold, progress_hook=progress) output_file = open(out, 'w+') for i, file_path in enumerate(out_files): file_properties = Probe(file_path).run().extract_summary() temp_dir = tempfile.TemporaryDirectory() temp_audio_file = Encoder.wav_audio_temp(file_path, temp_dir.name) print(f'Processing File {i + 1} of {len(out_files)}: {file_path}') for _ in range(0, 3): # Try two more times if there's an error. try: speech = speech_detector.run(temp_audio_file).to_frames( file_properties['video']['frame_rate']) break except Exception as e: print(e) try: speech = speech_detector_cpu.run( temp_audio_file).to_frames( file_properties['video']['frame_rate']) print("Processed using CPU") break except Exception as e: pass output_timeline.add_file(file_path, speech) output_file.seek(0) output_file.truncate() output_file.write(output_timeline.export()) temp_dir.cleanup() output_file.close()
def run_test_resource(resource): """tests a service and provides run metrics""" result = ResourceResult(resource) result.start() probes = resource.probe_vars for probe in probes: result.add_result(Probe.run(resource, probe)) result.stop() return result
def create_probe_grid(self): positions = [] delta = 20 margin = 20 for x in range(margin, self.width - margin, delta): for y in range(margin, self.height - margin, delta): positions.append((x, y)) for pos in positions: probe = Probe() probe.body.position = pos # Create the probe's sensors probe.range_scanner = RangeScanner( "RangeScan:nonlandmarks", WALL_MASK | ROBOT_MASK | ANY_PUCK_MASK | ANY_LANDMARK_MASK, WALL_MASK | ROBOT_MASK | RED_PUCK_MASK) probe.landmark_scanner = RangeScanner( "RangeScan:landmarks", WALL_MASK | ANY_LANDMARK_MASK, WALL_MASK | ANY_LANDMARK_MASK) self.probes.append(probe)
def _gendata(): jdict = Sensor.main() jdict.update(Probe()) insert_str = f"""\ INSERT INTO pi_probe( plc_mac, scan_data ) VALUES( '58:52:8a:d6:69:a1', '{json.dumps(jdict)}' ); """ send_d = {"sql": {"db": "piscan", "query": insert_str, "commit": True}} return send_d
def calib(self): Probe.calib(self) s = slice(5000) self.norm_to_region(s) S = self.S S['Rs'] = S['R'].smooth(100, mode='gaussian') S['tip1+tip2'] = S['tip1'] + S['tip2'] S['tip1+tip2'].update(label='Mach tips sum') if not S['tip1+tip2'].V.is_swept: mask = (S['tip1+tip2'].V > -150.) | (S['tip1+tip2'] > 1.9) S['tip1+tip2'] = S['tip1+tip2'].masked(mask) S['tip1'] = S['tip1'].masked(mask) S['tip2'] = S['tip2'].masked(mask) tips = self.head.tips A = [0.5*tips[0].area, 0.5*tips[1].area, tips[2].area] A.append(A[0] + A[1]) for tip, area in zip(('tip1', 'tip2', 'tip3', 'tip1+tip2'), A): S[tip + 'j'] = S[tip] / area S[tip + 'j'].update(type='Current density', units='A / m**2')
def run_test_resource(resource): """tests a service and provides run metrics""" result = ResourceResult(resource) if not resource.active: result.message = 'Skipped' return result result.start() probes = resource.probe_vars for probe in probes: result.add_result(Probe.run(resource, probe)) result.stop() return result
def sample_space(self, iterations, output_filename): outfile = open(output_filename, 'w') outfile.write( 'Probe 5 trunc,Probe 3 trunc,Probe* 5 trunc,Probe* 3 trunc,Probe blunt end,Sink 5 trunc,Sink 3 trunc,Sink* 5 trunc,Sink* 3 trunc,beta,self.SNP activation,self.WT activation,Background activation\n' ) for i in range(iterations): print('Iteration: ' + str(i)) p = Probe.Probe(self.SNP, self.WT, self.minlength, self.concentrations, self.params, self.mutation_rate) p.display() for t in p.truncations: outfile.write(str(t) + ',') for b in p.beta: outfile.write(str(b) + ',') outfile.write('\n') outfile.close()
def getProbePoints(self): print 'This will take time....' with open(self.probefile, 'r') as probe: i = 0 readData = csv.reader(probe) MPoutputdata = open("Partition6467MatchedPoints.csv", 'w') MPoutputdata.close() SPoutputdata = open("SlopeOutput.csv", 'w') SPoutputdata.close() data = 'sampleID, dateTime, sourceCode, latitude, longitude, altitude, speed, heading, linkPVID, direction, distFromRef, distFromLink' MPoutputdata = open("Partition6467MatchedPoints.csv", 'a') MPoutputdata.write(data + "\n") for data in readData: pdata = Probe(int(data[0]),str(data[1]),int(data[2]),float(data[3]),float(data[4]),\ float(data[5]),float(data[6]),float(data[7])) if i == 0: self.probepoints.append(pdata) i = i + 1 elif self.probepoints[i - 1].sampleID == pdata.sampleID: self.probepoints.append(pdata) i = i + 1 else: self.probepoints = [] self.probepoints.append(pdata) i = 1 link = self.plotProbePoint(pdata) linkPVID = link.linkPVID distFromLink = link.perDistance(pdata) distFromRef = link.haversine(pdata) slope = self.slope(link, pdata) direction = link.directionOfTravel if direction == 'B': direction = 'X' data = "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s" % ( pdata.sampleID, pdata.dateTime, pdata.sourceCode, pdata.latitude, pdata.longitude, pdata.altitude, pdata.speed, pdata.heading, linkPVID, direction, distFromRef, distFromLink) MPoutputdata.write(data + "\n") print str(pdata) + ' Link ID ' + str(linkPVID) + ',' + str( direction) + ',' + str(distFromRef) + ',' + str( distFromLink) + ',' + '\n' + slope + '\n' MPoutputdata.close()
def sample_space_random(self, iterations, output_filename): outfile = open(output_filename, 'w') outfile.write( 'Probe 5 trunc,Probe 3 trunc,Probe* 5 trunc,Probe* 3 trunc,Probe blunt end,Sink 5 trunc,Sink 3 trunc,Sink* 5 trunc,Sink* 3 trunc,beta,self.SNP activation,self.WT activation,Background activation, self.SNP Sequence, self.WT Sequence, self.SNP Index\n' ) for i in range(iterations): print('Iteration: ' + str(i)) sequences = self.generate_target() self.SNP = sequences[0] self.WT = sequences[1] self.SNP_index = sequences[2] p = Probe.Probe(self.SNP, self.WT, self.minlength, self.concentrations, self.params, self.mutation_rate) p.display() for t in p.truncations: outfile.write(str(t) + ',') for b in p.beta: outfile.write(str(b) + ',') outfile.write(self.SNP + ',') outfile.write(self.WT + ',') outfile.write(str(self.SNP_index)) outfile.write('\n') outfile.close()
def plot(self, *args, **kw): kw.setdefault('sepmode', 'patch') return Probe.plot(self, *args, **kw)
def mk_R_axis(self, axes, Rj=None, tlab=None, xlab='Probe R (cm)', **kw): if tlab is None: tlab = np.r_[np.arange(131, 135), np.arange(135, 160, 5)] if Rj is None: Rj = 0.01*tlab return Probe.mk_R_axis(self, axes, Rj, tlab=tlab, xlab=xlab, **kw)
class CPU: def __init__ (self, ram_size, reg_count, code, consolog): self._ram = Memory(ram_size) self._reg = Memory(reg_count) self.__ra = reg_count - 1 self.__sp = reg_count - 2 self._code = code self._probe = Probe(consolog) def value (self, val): if val[0] == 'imm': return val[1] if val[0] == 'reg': return self._reg[val[1]] if val[0] == 'mem': return self._ram[val[1]] if val[0] == 'ref': v = self.value(val[1]) if len(val) == 3: v += self.value(val[2]) return self._ram[v] def write (self, addr, val): if addr[0] == 'imm': raise WriteError if addr[0] == 'reg': self._reg[addr[1]] = val if addr[0] == 'mem': self._ram[addr[1]] = val if addr[0] == 'ref': a = self.value(addr[1]) if len(addr) == 3: a += self.value(addr[2]) self._ram[a] = val def cycle (self, ip): instr = self._code[ip][0] opcode = instr[0] # opcodes with no arguments if opcode == 'nop': pass elif opcode == 'ret': return self.value(('reg', self.__ra)) else: # opcode with 1 argument dst = instr[1] if opcode == 'cal': self.write(('reg', self.__ra), ip + 1) return self.value(dst) if opcode == 'jmp': return self.value(dst) if opcode == 'dbg': self._probe.dbg(dst) elif opcode == 'prn': print(self.value(dst), end='') elif opcode == 'prx': print(format(self.value(dst), '02x'), end='') elif opcode == 'prX': print(format(self.value(dst), '04x'), end='') elif opcode == 'prc': print(chr(self.value(dst)), end='') else: # opcodes with 2 arguments val1 = instr[2] if opcode == 'prs': ptr = self.value(dst) for i in range(0, self.value(val1)): print(chr(self._ram[ptr+i]), end='') print('') elif opcode == 'mov': self.write(dst, self.value(val1)) elif opcode == 'not': self.write(dst, 0xffff ^ self.value(val1)) else: # opcodes with 3 arguments val2 = instr[3] if opcode == 'beq': if self.value(val1) == self.value(val2): return self.value(dst) elif opcode == 'bne': if self.value(val1) != self.value(val2): return self.value(dst) elif opcode == 'and': self.write(dst, self.value(val1) & self.value(val2)) elif opcode == 'orr': self.write(dst, self.value(val1) | self.value(val2)) elif opcode == 'xor': self.write(dst, self.value(val1) ^ self.value(val2)) elif opcode == 'lsl': self.write(dst, self.value(val1) << self.value(val2)) elif opcode == 'lsr': self.write(dst, self.value(val1) >> self.value(val2)) elif opcode == 'min': self.write(dst, min(self.value(val1), self.value(val2))) elif opcode == 'max': self.write(dst, max(self.value(val1), self.value(val2))) elif opcode == 'add': self.write(dst, self.value(val1) + self.value(val2)) elif opcode == 'sub': self.write(dst, self.value(val1) - self.value(val2)) elif opcode == 'mul': self.write(dst, self.value(val1) * self.value(val2)) elif opcode == 'div': self.write(dst, self.value(val1) // self.value(val2)) elif opcode == 'mod': self.write(dst, self.value(val1) % self.value(val2)) elif opcode == 'cmp': v1 = self.value(val1) v2 = self.value(val2) if v1 < v2: self.write(dst, 1) elif v1 > v2: self.write(dst, -1) else: self.write(dst, 0) return None def run (self): try: max_ip = len(self._code) self._reg[self.__sp] = self._ram._size # init stack pointer self._reg[self.__ra] = max_ip # init return address self._ip = 0 # init instruction pointer while self._ip >= 0 and self._ip < max_ip: ip = self.cycle(self._ip) if ip is not None: self._ip = ip else: self._ip += 1 self._probe.read(self._ram.get_activity()) self._probe.read(self._reg.get_activity()) self._probe.output_activity() sys.stdout.flush() except AddrError as e: print('Invalid address ' + str(e.addr) + self.dbg(self._ip)) except ValError as e: print('Invalid value ' + str(e.val) + self.dbg(self._ip)) except WriteError: print('Invalid write ' + self.dbg(self._ip)) def dbg (self, ip): return (' on line ' + str(self._code[ip][1][1]) + ' of file ' + self._code[ip][1][0])
socketio = SocketIO(app) config = settings.Config() MAT_SERIAL_IDENTIFIER = "M" LIGHT_SERIAL_IDENTIFIER = "L" ZERO_CROSS_IDENTIFIER = "Z" ON = 1 OFF = 0 MAT_TEMPERATURE_APPROACH_DELTA_LIMIT = 0.12 AMBIENT_TEMPERATURE_APPROACH_DELTA_LIMIT = 0.2 display = Display() probe = Probe(config.get('probe', 'READ_DIRECTORY'), 'Probe') dht1_temp = DHT22(gpio.DHT_SENSOR1_PIN, 1, 'Sensor1 (temperature)') dht2_humidity = DHT22(gpio.DHT_SENSOR2_PIN, 2, 'Sensor2 (humidity)') mat = DutyCycle("Mat", MAT_SERIAL_IDENTIFIER) light = DutyCycle("Light", LIGHT_SERIAL_IDENTIFIER) serialConnection = serial.Serial('/dev/serial0', 9600) poll_sensors = True probe_temp = 0.0 ambient_temp = 0.0 sensor_values = [{'temp': 0.0, 'hum': 0.0}, {'temp': 0.0, 'hum': 0.0}] parser = argparse.ArgumentParser() parser.add_argument('--gpio_disabled', action="store_true")
def mainProgram(): createLogger() logger.info("Starting wine fermentation data collection...") secrets = loadJSONConfig('/'.join([dirName, "secrets.json"])) settings = loadJSONConfig('/'.join([dirName, "settings.json"])) losantHelper.init(secrets["deviceId"], secrets["key"], secrets["secret"]) s1 = Probe('/sys/bus/w1/devices/28-000004d109a8/w1_slave') s2 = Probe('/sys/bus/w1/devices/28-0416841e1fff/w1_slave') s3 = Probe('/sys/bus/w1/devices/28-031683b233ff/w1_slave') s4 = Probe('/sys/bus/w1/devices/28-0316839543ff/w1_slave') s5 = Probe('/sys/bus/w1/devices/28-000004d0bcce/w1_slave') s6 = Probe('/sys/bus/w1/devices/28-04168411d7ff/w1_slave') logger.info("Initialization done."); while True: # get a reading from the plant state = {"S1": s1.read_temp(), "S2": s2.read_temp(), "S3": s3.read_temp(), "S4": s4.read_temp(), "S5": s5.read_temp(), "S6": s6.read_temp()} logger.debug(state) losantHelper.sendMeasurement(state) # delay between readings time.sleep(5) #close endMeasurements()
def setUp(self): self.probe = Probe() self.probe.url = 'http://www.google.co.kr'
def __init__(self, shn, plunge=1): digitizer = DigitizerRCP(shn, plunge) Probe.__init__(self, digitizer)
def matchData(): target = open(matchedData, "w") recentID = None candidate = [] # Iterate Probe data to match it against links for cnt, line in enumerate(open(probeData).readlines()): probe = Probe(line) # Just for status if cnt % 500 == 0: print(cnt) # Small - current smallest distance # linkID - holds the smallest distance # dist_point_refnode - the distance from the point to ref node of the link latlong = GeoPoint(probe.latitude + "/" + probe.longitude) # Check if probe sampleID is already matched or not if probe.sampleID != recentID: recentID = probe.sampleID # Iterate through each link to match with current probe for key in links.keys(): for link in links[key]: # Calculate distance of probe lat-long from link distance = link.calculateDistance(latlong) if not probe.distFromRef or distance < probe.distFromRef: probe.distFromRef, probe.linkID = distance, link.id # Calculating distance of current probe from Link probe.distFromLink = links[ probe.linkID][0].calculateDistanceFromLink(latlong) # Determine direction probe.getDirection(float(probe.heading), link.radian) # saving candidate with reference point and its respective candidate = [link.ref_point, link.non_ref_point] else: for candidate_point in candidate: for link in points[candidate_point.ID]: distance = link.calculateDistance(latlong) if not probe.distFromRef or distance < probe.distFromRef: probe.distFromRef, probe.linkID = distance, link.id # Calculating distance of current probe from Link probe.distFromLink = links[ probe.linkID][0].calculateDistanceFromLink(latlong) # Determine direction probe.getDirection(float(probe.heading), link.radian) # distance from the reference node to the map-matched probe point location on the link in decimal meters. probe.distFromRef = math.sqrt(probe.distFromRef) * AVG_EARTH_RADIUS_MLT # perpendicular distance from the map-matched probe point location on the link to the probe point in decimal meters. probe.distFromLink = probe.distFromLink * AVG_EARTH_RADIUS_MLT # Writing comma seperated string into file target.write(probe.toString()) target.close()
def predict(model_prefix, probes_dir, preds_dir, data_dir, data_file, layers, batch_size, hidden_dim, max_seq_length, device): # Extract examples tokenizer = AutoTokenizer.from_pretrained(model_prefix) processor = SquadV2Processor() dev_examples = processor.get_dev_examples(data_dir=data_dir, filename=data_file) # Extract dev features print("Loading dev features") dev_features, dev_dataset = squad_convert_examples_to_features( examples=dev_examples, tokenizer=tokenizer, max_seq_length=max_seq_length, doc_stride=128, max_query_length=64, is_training=False, return_dataset="pt", threads=1) # Initialize config and model config = AutoConfig.from_pretrained(model_prefix, output_hidden_states=True) model = AutoModelForQuestionAnswering.from_pretrained(model_prefix, config=config) # multi-gpu evaluate model = torch.nn.DataParallel(model) # Load probe for each layer print("Loading probes") probes = [] for i in range(layers): p = Probe(hidden_dim) p.load(probes_dir, i + 1, device) probes.append(p) # Extract IDs print("Extracting dev IDs") n = len(dev_examples) q_ids = [] for i in range(n): q_ids.append(dev_examples[i].qas_id) # Initialize dev data loader eval_sampler = SequentialSampler(dev_dataset) eval_dataloader = DataLoader(dev_dataset, sampler=eval_sampler, batch_size=batch_size) # Initialize predictions predictions = [] for i in range(layers): pred = pd.DataFrame() pred['Id'] = q_ids pred['Predicted'] = [""] * len(dev_examples) pred['Question'] = [""] * len(dev_examples) pred['Score'] = [0] * len(dev_examples) predictions.append(pred) # List to keep track of how many unique questions we've seen in each df, questions with # contexts longer than max seq len get split into multiple features based on doc_stride # a good alternative we may implement later is recording for all features, then simplifying with groupby and max # e.g. something like df.sort_values('Score', ascending=False).drop_duplicates(['Question']) question_ids = [0] * layers # Evaluation batches print("Predicting on dev set") for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(device) for t in batch) with torch.no_grad(): inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], } # Distil does not use token type ids if "distil" in model_dir: inputs.pop('token_type_ids') # ALBERT/BERT/Distilibert forward pass idx = batch[3] outputs = model(**inputs) attention_hidden_states = outputs[2][1:] # Compute prediction on eval indices for j, index in enumerate(idx): index = int(index.item()) # Extract tokens for the current batch tokens = tokenizer.convert_ids_to_tokens(batch[0][j]) # Find where context starts and ends, since we want to predict in context context_start = int(max_seq_length - torch.argmax( torch.flip(batch[2][j], [0])).item()) - 1 context_end = int(torch.argmax(batch[2][j]).item()) # Find the question, starting right after [CLS] and subtracting 1 to chop off the [SEP] token question_start = 1 question_end = context_start question = tokenizer.convert_tokens_to_string( tokens[question_start:question_end - 1]) # For each layer ... for i, p in enumerate(probes): # Extract predicted indicies score, start_idx, end_idx = p.predict( attention_hidden_states[i][j].unsqueeze(0), device, threshold=0, context_start=context_start, context_end=context_end) start_idx = int(start_idx[0]) end_idx = int(end_idx[0]) # Extract predicted answer, converting start tokens to empty strings (no answer) answer = tokenizer.convert_tokens_to_string( tokens[start_idx:end_idx + 1]) if answer == '[CLS]': answer = '' # Check if the question is the same as the last one, if it is go back to the last question id and keep the higher score. # If the question is not already in the dataframe, then assign it to the dataframe. # Note we first handle the case where there are no prior questions by storing since we know there are no duplicates if question_ids[i] == 0: predictions[i].loc[question_ids[i], 'Question'] = question predictions[i].loc[question_ids[i], 'Predicted'] = answer predictions[i].loc[question_ids[i], 'Score'] = score elif (predictions[i].loc[int(question_ids[i] - 1), 'Question'] == question): question_ids[i] -= 1 old_score = predictions[i].loc[question_ids[i], 'Score'] if score > old_score: predictions[i].loc[question_ids[i], 'Predicted'] = answer predictions[i].loc[question_ids[i], 'Score'] = score else: predictions[i].loc[question_ids[i], 'Question'] = question predictions[i].loc[question_ids[i], 'Predicted'] = answer predictions[i].loc[question_ids[i], 'Score'] = score # Increment to new question id (note, for duplicate answers this gets us back to where we were) question_ids[i] += 1 # Save predictions for each layer print("Saving predictions") if not os.path.exists(preds_dir): os.mkdir(preds_dir) for i, pred in enumerate(predictions): pred[['Id', 'Predicted']].to_csv(preds_dir + "/layer_" + str(i + 1) + ".csv", index=False)
def __init__(self, data_limits_for_women): self.data_limits_for_women = data_limits_for_women self.dates = [] self.probes = { "RBC": Probe("RBC", "[mln/µl]", [4.5, 5.9], [4.2, 5.4], u"Ilość czerwonych krwinek (erytrocytów)"), "HGB": Probe("HGB", "[g/dl]", [13.5, 18.5], [12, 16], u"Ilość hemoglobiny w ogólnej masie erytrocytów"), "HCT": Probe("HCT", "[%]", [40, 54], [40, 51], u"Hematokryt - procentowa objętość erytrocytów we krwi"), "MCV": Probe("MCV", "[fl]", [80, 98], [76, 102.9], u"Średnia objętość erytrocytu"), "MCH": Probe("MCH", "[pg]", [27, 34], [25.7, 35.7], u"Stężenie hemoglobiny w erytrocycie"), "MCHC": Probe("MCHC", "[g/dl]", [31, 37], [29.5, 38.8], u"Stężenie hemoglobiny w ogólnej masie erytrocytów"), "RDW": Probe("RDW", "[%]", [11.4, 14.5], [10.3, 15.9], u"Anizocytoza erytrocytów - rozpiętość rozkładu objętości erytrocytów"), "PLT": Probe("PLT", "[tys/µl]", [150, 450], [135, 495], u"Ilość płytek krwi (trombocytów)"), "PDW": Probe("PDW", "[%]", [11, 18], [11, 18], u"Anizocytoza trombocytów - rozpiętość rozkładu objętości trombocytów"), "MPV": Probe("MPV", "[fl]", [6, 11], [6, 11], u"Średnia objętość trombocytu"), "WBC": Probe("WBC", "[tys/µl]", [4, 10], [3.8, 10.5], u"Ilość białych krwinek (leukocytów)"), "PCT": Probe("PCT", "[%]", [0.2, 0.5], [0.2, 0.5], u"Poziom prokalcytoniny (białka tarczycy) w osoczu krwi"), "LYM%": Probe("LYM%", "[%]", [20, 40], [18, 44], u"Procentowy udział limfocytów w leukocytach"), "MON%": Probe("MON%", "[%]", [4, 8], [2.8, 10.4], u"Procentowy udział monocytów w leukocytach"), "NEU%": Probe("NEU%", "[%]", [55, 70], [49.5, 77], u"Procentowy udział neutrofili w leukocytach"), "BAS%": Probe("BAS%", "[%]", [0, 2], [0.0, 3.0], u"Procentowy udział bazofili w leukocytach"), "EOS%": Probe("EOS%", "[%]", [1, 5], [0.7, 6.5], u"Procentowy udział eozynofili w leukocytach"), "LYM": Probe("LYM", "[tys/µl]", [1, 4], [0.9, 4.4], u"Ilościowy udział limfocytów w leukocytach"), "MON": Probe("MON", "[tys/µl]", [0.2, 1], [0.1, 1.3], u"Ilościowy udział monocytów w leukocytach"), "NEU": Probe("NEU", "[tys/µl]", [2, 7.5], [1.8, 8.25], u"Ilościowy udział neutrofili w leukocytach"), "BAS": Probe("BAS", "[tys/µl]", [0, 0.2], [0, 0.3], u"Ilościowy udział bazofili w leukocytach"), "EOS": Probe("EOS", "[tys/µl]", [0, 0.5], [0, 0.65], u"Ilościowy udział eozynofili w leukocytach"), "ALY%": Probe("ALY%", "[%]", [0, 2.0], [0, 2.0], u"Procent limfocytów atypowych (reaktywnych)"), "ALY": Probe("ALY", "[tys/µl]", [0, 0.25], [0, 0.25], u"Ilość limfocytów atypowych (reaktywnych)"), "LIC": Probe("LIC", "[tys/µl]", [0, 0.3], [0, 0.3], u"Ilość dużych niedojrzałych komórek (limfoblastów)"), }
def probing(self, qs=2): pb = Probe(self.root, self.name) for frame_id, opt in pb.diff_next(qs=qs): self.elog.info("Qs: {}, FrameID: {}, time: {}". format(qs, frame_id, opt)) self.save("dn/size_{}".format(qs), pb.pdf)
def add_file(self, file_path, cuts): metadata = Probe(file_path).run() summary = metadata.extract_summary() timeline_rate = { "ntsc": str(is_ntsc(summary['video']['frame_rate'])).upper(), "timebase": str(summary['video']['frame_rate']) } masterclip_id = "masterclip-" + str(self._masterclip) clip = otio.schema.ExternalReference( target_url=file_path, available_range=otio.opentime.TimeRange( start_time=otio.opentime.RationalTime( value=0, rate=float(summary['video']['frame_rate'])), duration=otio.opentime.RationalTime( value=int(metadata.extract_video_data('nb_frames')), rate=float(summary['video']['frame_rate']))), metadata={ "fcp_xml": { "masterclipid": masterclip_id, "media": { "audio": { "channelcount": str(summary['audio']['channels']), "samplecharacteristics": { "depth": "16", "samplerate": str(summary['audio']['sample_rate']) } }, "video": { "samplecharacteristics": { "anamorphic": "FALSE", "fielddominance": "none", "width": str(summary['video']['width']), "height": str(summary['video']['height']), "pixelaspectratio": "square", "rate": timeline_rate } } }, "rate": timeline_rate, "timecode": { "displayformat": "DF" if is_ntsc(summary['video']['frame_rate']) else "NDF", "rate": timeline_rate } } }) for cut in cuts: self.video_track_1.append( otio.schema.Clip( name=Path(file_path).stem, media_reference=clip, source_range=otio.opentime.TimeRange( start_time=otio.opentime.RationalTime( value=cut['start'], rate=float(summary['video']['frame_rate'])), duration=otio.opentime.RationalTime( value=cut['end'] - cut['start'], rate=float(summary['video']['frame_rate']))), metadata={ "fcp_xml": { "masterclipid": masterclip_id, "pixelaspectratio": "square", } })) self.audio_track_1.append( otio.schema.Clip( name=Path(file_path).stem, media_reference=clip, source_range=otio.opentime.TimeRange( start_time=otio.opentime.RationalTime( value=cut['start'], rate=float(summary['video']['frame_rate'])), duration=otio.opentime.RationalTime( value=cut['end'] - cut['start'], rate=float(summary['video']['frame_rate']))), metadata={ "fcp_xml": { "@premiereChannelType": summary['audio']['channel_layout'].capitalize(), "masterclipid": masterclip_id, "sourcetrack": { "mediatype": "audio", "trackindex": "1" } } })) self._masterclip += 1 return self
aux_bc_pml(pml,pml_type,xi,xf,yi,yf,nx,ny) #from matplotlib import pylab #for i in range(8): # pylab.figure() # pylab.contourf(pml[i,:,:].copy()) # pylab.colorbar() #pylab.show() from probe import Probe entries=[[ 0, 0], [34,10], [34,34], [10,34], [0,15], ] prb = Probe(da, entries) def write(Q1,Q2,Q3,filename): io = PETSc.Viewer().createBinary(filename,mode="w") Q1.view(io) Q2.view(io) Q3.view(io) io.destroy() draw = PETSc.Viewer.DRAW() for t in range(1,100): if t == 1: qinit(Q1,Q2,Q3,da) qbc(Q1,Q2,Q3,da)
heading, linkPVID, direction, distFromRef, distFromLink, slope''' header = header.replace(" ", "") header = header.replace("\n", "") header = header.replace("\t", "") result_data.write(header + "\n") prev_probe = None print "Calculating the slope at each map matched point" print "This will take some time.." for line in probe_data: probe = Probe(line) if not prev_probe: probe.slope = 'X' elif probe.linkPVID != prev_probe.linkPVID: probe.slope = 'X' else: #################################################################### # LOGIC TO CALCULATE SLOPE OF LINK # #################################################################### opposite = float(probe.altitude) - float(prev_probe.altitude) start = map(float, [probe.longitude, probe.latitude]) end = map(float, [prev_probe.longitude, prev_probe.latitude]) #import pdb;pdb.set_trace() hypotenuse = haversine.haversine(start[0], start[1], end[0], end[1]) / 1000 probe.slope = math.atan(opposite / hypotenuse)
def get_sub_url(self, base_url, url): probe = Probe() probe.open(url) return probe.get_internal_url(base_url)
def probing(self, qs=2): pb = Probe(self.root, self.name) for frame_id, opt in pb.diff_next(qs=qs): self.elog.info("Qs: {}, FrameID: {}, time: {}".format( qs, frame_id, opt)) self.save("dn/size_{}".format(qs), pb.pdf)
def train(model_prefix, model_dir, data_dir, data_file, epochs, layers, batch_size, hidden_dim, max_seq_length, device): # Extract examples tokenizer = AutoTokenizer.from_pretrained(model_prefix) processor = SquadV2Processor() train_examples = processor.get_train_examples(data_dir=data_dir, filename=data_file) # Extract train features print("Loading train features") train_features, train_dataset = squad_convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=max_seq_length, doc_stride=128, max_query_length=64, is_training=True, return_dataset="pt", threads=1, ) # Initialize model config = AutoConfig.from_pretrained(model_prefix, output_hidden_states=True) model = AutoModelForQuestionAnswering.from_pretrained(model_prefix, config=config) # multi-gpu evaluate model = torch.nn.DataParallel(model) # Initialize probes print("Initializing probes") probes = [] for i in range(layers): p = Probe(hidden_dim) probes.append(p) # Training epochs for epoch in range(epochs): print("Training epoch: {}".format(epoch + 1)) # Initialize train data loader train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size) # Training batches for batch in tqdm(train_dataloader, desc="Iteration"): # Get batch on the right device and prepare input dict batch = tuple(t.to(device) for t in batch) inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "start_positions": batch[3], "end_positions": batch[4], } # Distil does not use token type ids if "distil" in model_dir: inputs.pop('token_type_ids') # ALBERT/BERT/Distilibert forward pass model.eval() with torch.no_grad(): outputs = model(**inputs) # Extract hiddent states all_layer_hidden_states = outputs[3][ 1:] # (layers, batch_size, max_seq_len, hidden_size) # Get labels, and update probes for batch start = batch[3] # (batch_size) end = batch[4] # (batch_size) for i, p in enumerate(probes): hiddens = all_layer_hidden_states[ i] # (batch_size, max_seq_len, hidden_size) p.train(hiddens, start, end, device) # Save probes after each epoch print("Epoch complete, saving probes") epoch_dir = model_dir + "/epoch_" + str(epoch + 1) if not os.path.exists(epoch_dir): os.mkdir(epoch_dir) probes_dir = epoch_dir + "/probes" if not os.path.exists(probes_dir): os.mkdir(probes_dir) # Save probes for each layer, both start and end index for i, p in enumerate(probes): p.save(probes_dir, i + 1)