def upload(): alphabet = string.digits + string.ascii_letters + '$%. ♠♥♦♣' # '$%. ' recognizer_alphabet = ''.join(sorted(set(alphabet.lower()))) blank_label_idx = len(recognizer_alphabet) file = request.files['file'] if file and allowed_file(file.filename): filename = file.filename #save_filename = filename.split('.')[0]+'.jpg' # save received image img_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(img_path) #img = Image.open(img_path) img = tools.read(img_path) img = utils.compute_input(img) img = np.expand_dims(img, 0) # Call TensorFlowServing api for predict masks_async = detects_calculation.delay(img.tolist()) predictions = masks_async.get() bboxes = utils.getBoxes([ np.array(predictions[0]), ], detection_threshold=0.7, text_threshold=0.4, link_threshold=0.4, size_threshold=10) #img_for_recognition = tools.read(img_path) # diferent read algor ???? img = tools.read(img_path) recognized_res = recognition_task.delay(img.tolist(), bboxes[0].tolist()) res = recognized_res.get() res = [ ''.join([ recognizer_alphabet[idx] for idx in row if idx not in [blank_label_idx, -1] ]) for row in res ] prediction_groups = [ list(zip(predictions, boxes)) for predictions, boxes in zip([res], bboxes) ] fig, axs = plt.subplots(nrows=1, figsize=(10, 10)) tools.drawAnnotations(img, predictions=prediction_groups[0], ax=axs) fig.savefig( os.path.join(app.config['UPLOAD_FOLDER'], 'ocr_' + filename)) return send_from_directory(app.config['UPLOAD_FOLDER'], 'ocr_' + filename)
def main(): try: input = raw_input except NameError: pass print('Block request http://www.douyu.com/lapi/live/getPlay/(roomid)') print('Enter getPlay request query:') req=input() if req: rtmp_url=getRtmpUrl(req) print('### rtmp_url: ',rtmp_url) tmpl=tools.read(conf.manual_tmpl_path).replace('<rtmp_url>',rtmp_url) else: tmpl=tools.read(conf.now_tmpl_path) tools.write(conf.douyutv_plug_path,tmpl) print('\n\n copy douyutv.py plug ok') print('streamlink http://www.douyutv.com/cold medium -o ')
def create_bboxes_array(images, box_groups, **kwargs) -> typing.List[str]: """Recognize text from images using lists of bounding boxes. Args: images: A list of input images, supplied as numpy arrays with shape (H, W, 3). boxes: A list of groups of boxes, one for each image """ assert len(box_groups) == len(images), \ 'You must provide the same number of box groups as images.' crops = [] start_end = [] for image_path, boxes in zip(images, box_groups): image = tools.read(image_path) # if self.prediction_model.input_shape[-1] == 1 and image.shape[-1] == 3: # Convert color to grayscale # image = cv2.cvtColor(image, code=cv2.COLOR_RGB2GRAY) for box in boxes: crops.append( tools.warpBox(image=image, box=box, target_height=31, target_width=200)) start = 0 if not start_end else start_end[-1][1] start_end.append((start, start + len(boxes))) if not crops: return [[] for image in images] X = np.float32(crops) / 255 if len(X.shape) == 3: X = X[..., np.newaxis] return X
def recognize_from_boxes(self, images, box_groups, **kwargs) -> typing.List[str]: """Recognize text from images using lists of bounding boxes. Args: images: A list of input images, supplied as numpy arrays with shape (H, W, 3). boxes: A list of groups of boxes, one for each image """ assert len(box_groups) == len(images), \ 'You must provide the same number of box groups as images.' crops = [] start_end = [] for image, boxes in zip(images, box_groups): image = tools.read(image) if self.prediction_model.input_shape[-1] == 1 and image.shape[-1] == 3: # Convert color to grayscale image = cv2.cvtColor(image, code=cv2.COLOR_RGB2GRAY) for box in boxes: crops.append( tools.warpBox(image=image, box=box, target_height=self.model.input_shape[1], target_width=self.model.input_shape[2])) start = 0 if not start_end else start_end[-1][1] start_end.append((start, start + len(boxes))) if not crops: return [[] for image in images] X = np.float32(crops) / 255 if len(X.shape) == 3: X = X[..., np.newaxis] predictions = [ ''.join([self.alphabet[idx] for idx in row if idx not in [self.blank_label_idx, -1]]) for row in self.prediction_model.predict(X, **kwargs) ] return [predictions[start:end] for start, end in start_end]
def detectChars(self, images: typing.List[typing.Union[np.ndarray, str]], detection_threshold=0.7, thickness=3, text_threshold=0.4, link_threshold=0.4, size_threshold=10, tolerance=0.1, **kwargs): """Recognize the Chars in a set of images. Args: images: Can be a list of numpy arrays of shape HxWx3 or a list of filepaths. link_threshold: This is the same as `text_threshold`, but is applied to the link map instead of the text map. detection_threshold: We want to avoid including boxes that may have represented large regions of low confidence text predictions. To do this, we do a final check for each word box to make sure the maximum confidence value exceeds some detection threshold. This is the threshold used for this check. text_threshold: When the text map is processed, it is converted from confidence (float from zero to one) values to classification (0 for not text, 1 for text) using binary thresholding. The threshold value determines the breakpoint at which a value is converted to a 1 or a 0. For example, if the threshold is 0.4 and a value for particular point on the text map is 0.5, that value gets converted to a 1. The higher this value is, the less likely it is that characters will be merged together into a single word. The lower this value is, the more likely it is that non-text will be detected. Therein lies the balance. size_threshold: The minimum area for a word. """ original_images = [(tools.read(image)) for image in images] images = [compute_input(image) for image in original_images] results = self.model.predict(np.array(images), **kwargs) boxes = getBoxes(results, detection_threshold=detection_threshold, text_threshold=text_threshold, link_threshold=link_threshold, size_threshold=size_threshold) resized_results = [(cv2.resize(i[..., 0],(images[cnt].shape[1], \ images[cnt].shape[0]))*255).astype(np.uint8) for cnt,i in enumerate(results)] all_transformed_chars, all_images = [], [] for image, resized_result, box in zip(original_images, resized_results, boxes): transformed_chars, transformed_coors = [], [] for words in box: transformed_char, transformed_coor = tools.warpChars( image, resized_result, words, tolerance) transformed_coors.extend(transformed_coor) transformed_chars.extend(transformed_char) transformed_coors = np.array(transformed_coors).astype(np.int32) new_img = tools.drawBoxes(image, transformed_coors, thickness=thickness) all_images.append(new_img) all_transformed_chars.append(transformed_chars) return all_images, all_transformed_chars, transformed_coors
def test_read_string_ven_changeunits(): '''Test reading data from an existing ven file and changing to English units.''' df = tools.read('tests/tabs_V_ven', units='E') # test column names dfcolumns = 'East [kts]\tNorth [kts]\tDir [deg T]\tWaterT [deg F]\tTx\tTy\tSpeed [kts]\tAcross [kts]\tAlong [kts]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[0.04, 0.46, 170, 75.2, 0.0, -2.0, 0.46, -0.45, 0.1]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_string_met(): '''Test reading data from an existing met file.''' df = tools.read('tests/tabs_V_met') # test column names dfcolumns = 'East [m/s]\tNorth [m/s]\tAirT [deg C]\tAtmPr [MB]\tGust [m/s]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [m/s]\tDir from [deg T]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[-4.29, -0.45, 15.70, 1015.74, 6.34, 169.50, 0, 0, 0.00, 108.20, 4.31, 84.00]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_string_wave(): '''Test reading data from an existing wave file.''' df = tools.read('tests/tabs_V_wave') # test column names dfcolumns = 'WaveHeight [m]\tMeanPeriod [s]\tPeakPeriod [s]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[0.86, 4.26, 5.13]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_string_wave_changeunits(): '''Test reading data from an existing wave file and changing to English units.''' df = tools.read('tests/tabs_V_wave', units='E') # test column names dfcolumns = 'WaveHeight [ft]\tMeanPeriod [s]\tPeakPeriod [s]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[2.8, 4.26, 5.13]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_string_ven(): '''Test reading data from an existing ven file.''' df = tools.read('tests/tabs_V_ven') # test column names dfcolumns = 'East [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[2.06, 23.59, 170., 24.02, 0., -2., 23.68, -23.16, 4.92]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_string_met_changeunits(): '''Test reading data from an existing met file and changing to English units.''' df = tools.read('tests/tabs_V_met', units='E') # test column names dfcolumns = 'East [kts]\tNorth [kts]\tAirT [deg F]\tAtmPr [inHg]\tGust [kts]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [kts]\tDir from [deg T]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[-8.34, -0.87, 60.3, 29.99, 12.32, 169.5, 0., 0., 0., 108.2, 8.38, 84.]]) assert np.allclose(dftail1, df.tail(1).values) pass
def test_read_mysql_met(): '''Test reading 1st line of met data buoy V from mysql database.''' engine = tools.engine() query = 'select * from tabs_V_met limit 0,1' df = tools.read([query, engine]) # test column names dfcolumns = 'East [m/s]\tNorth [m/s]\tAirT [deg C]\tAtmPr [MB]\tGust [m/s]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [m/s]\tDir from [deg T]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[-1.45, 1.07, 24.4, 1020.14, 8.33, 21.3, 0., -3., 0., 91.8, 1.8, 126.]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_mysql_ven(): '''Test reading 1st line of ven data buoy V from mysql database.''' engine = tools.engine() query = 'select * from tabs_V_ven limit 0,1' df = tools.read([query, engine]) # test column names dfcolumns = 'East [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[15.11, -3.58, 256., 23.54, 0., -1., 15.53, 5.39, 14.56]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_mysql_met_changeunits(): '''Test reading 1st line of met data buoy V from mysql database.''' engine = tools.engine() query = 'select * from tabs_V_met limit 0,1' df = tools.read([query, engine], units='E') # test column names dfcolumns = 'East [kts]\tNorth [kts]\tAirT [deg F]\tAtmPr [inHg]\tGust [kts]\tComp [deg M]\tTx\tTy\tPAR \tRelH [%]\tSpeed [kts]\tDir from [deg T]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[-2.82, 2.08, 75.9, 30.12, 16.19, 21.3, 0., -3., 0., 91.8, 3.5, 126.]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_mysql_eng(): '''Test reading 1st line of eng data buoy V from mysql database.''' engine = tools.engine() query = 'select * from tabs_V_eng limit 0,1' df = tools.read([query, engine]) # test column names dfcolumns = 'VBatt [Oper]\tSigStr [dB]\tComp [deg M]\tNping\tTx\tTy\tADCP Volt\tADCP Curr\tVBatt [sleep]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[13.9, -3.69, 256., 121., 0., -1., 29.94, 6.45, 13.9]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_mysql_wave_changeunits(): '''Test reading 1st line of wave data buoy V from mysql database.''' engine = tools.engine() query = 'select * from tabs_V_wave limit 0,1' df = tools.read([query, engine], units='E') # test column names dfcolumns = 'WaveHeight [ft]\tMeanPeriod [s]\tPeakPeriod [s]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[3.8, 5.0539, 6.6667]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_mysql_salt(): '''Test reading 1st line of salt data buoy V from mysql database.''' engine = tools.engine() query = 'select * from tabs_V_salt limit 0,1' df = tools.read([query, engine]) # test column names dfcolumns = 'Temp [deg C]\tCond [ms/cm]\tSalinity\tDensity [kg/m^3]\tSoundVel [m/s]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[32.52, -0.01, 0.01, 99.9999, 99.9999]]) assert np.allclose(dftail1, df.tail(1).values)
def test_read_mysql_ven_changeunits(): '''Test reading 1st line of ven data buoy V from mysql database and change to English units.''' engine = tools.engine() query = 'select * from tabs_V_ven limit 0,1' df = tools.read([query, engine], units='E') # test column names dfcolumns = 'East [kts]\tNorth [kts]\tDir [deg T]\tWaterT [deg F]\tTx\tTy\tSpeed [kts]\tAcross [kts]\tAlong [kts]' assert '\t'.join(df.columns.values) == dfcolumns # test values in final line dftail1 = np.array([[0.29, -0.07, 256, 74.4, 0.0, -1.0, 0.3, 0.1, 0.28]]) assert np.allclose(dftail1, df.tail(1).values)
def get_icdar_2013_detector_dataset(labels): """ DEPRECATED Get the ICDAR 2013 text segmentation dataset for detector training. Only the training set has the necessary annotations. For the test set, only segmentation maps are provided, which do not provide the necessary information for affinity scores. Args: cache_dir: The directory in which to store the data. skip_illegible: Whether to skip illegible characters. Returns: Lists of (image_path, lines, confidence) tuples. Confidence is always 1 for this dataset. We record confidence to allow for future support for weakly supervised cases. """ for index in itertools.cycle(range(len(labels))): print(labels[index]) image_path, gt_filepath = labels[index] image = tools.read(image_path) lines = [] character_bboxes = [] characters = [] confidences = [] with open(gt_filepath, 'r') as f: current_line = [] current_bbox = [] for row in f.read().split('\n'): if row == '': lines.append(current_line) current_line = [] else: row = row.split(' ')[5:] character = row[-1][1:-1] if character == '' and skip_illegible: continue x1, y1, x2, y2 = map(int, row[:4]) current_bbox.append([[x1, y1], [x2, y1], [x2, y2], [x1, y2]]) characters.append(character) # Some lines only have illegible characters and if skip_illegible is True, # then these lines will be blank. character_bboxes.append(np.array(current_bbox)) confidences.append(1.0) yield image, character_bboxes, characters, np.ones( (image.shape[0], image.shape[1]), np.float32), confidences
def test_present(): '''Test functionality of present. http://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call ''' df = tools.read('tests/tabs_V_ven') f = io.StringIO() with redirect_stdout(f): tools.present(df) out = f.getvalue() assert out # make sure not empty assert isinstance(out, str)
def test_make_text(): '''Test file writing.''' df = tools.read('tests/tabs_V_ven') fname = 'tests/write_tabs_V_ven' run_daily.make_text(df, fname) assert open(fname).readlines() == ['Dates [UTC]\tEast [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]\n', '2017-01-05 00:00:00\t-2.50\t28.27\t139.00\t24.03\t0\t-1\t28.38\t-28.36\t0.96\n', '2017-01-05 00:30:00\t0.69\t25.37\t144.00\t24.03\t0\t-1\t25.38\t-25.10\t3.78\n', '2017-01-05 01:00:00\t1.06\t24.70\t161.00\t24.02\t0\t-1\t24.72\t-24.39\t4.06\n', '2017-01-05 01:30:00\t2.06\t23.59\t170.00\t24.02\t0\t-2\t23.68\t-23.16\t4.92\n'] # remove file after checking remove(fname)
def recognize(self, images, detection_kwargs=None, recognition_kwargs=None): """Run the pipeline on one or multiples images. Args: images: The images to parse (can be a list of actual images or a list of filepaths) detection_kwargs: Arguments to pass to the detector call recognition_kwargs: Arguments to pass to the recognizer call Returns: A list of lists of (text, box) tuples. """ # Make sure we have an image array to start with. if not isinstance(images, np.ndarray): images = [tools.read(image) for image in images] # This turns images into (image, scale) tuples temporarily images = [ tools.resize_image(image, max_scale=self.scale, max_size=self.max_size) for image in images ] max_height, max_width = np.array( [image.shape[:2] for image, scale in images]).max(axis=0) scales = [scale for _, scale in images] images = np.array([ tools.pad(image, width=max_width, height=max_height) for image, _ in images ]) if detection_kwargs is None: detection_kwargs = {} if recognition_kwargs is None: recognition_kwargs = {} box_groups = self.detector.detect(images=images, **detection_kwargs) prediction_groups = self.recognizer.recognize_from_boxes( images=images, box_groups=box_groups, **recognition_kwargs) box_groups = [ tools.adjust_boxes( boxes=boxes, boxes_format='boxes', scale=1 / scale) if scale != 1 else boxes for boxes, scale in zip(box_groups, scales) ] return [ list(zip(predictions, boxes)) for predictions, boxes in zip(prediction_groups, box_groups) ]
def get_detector_image_generator(labels, width, height, augmenter=None, area_threshold=0.5): """Generated augmented (image, lines) tuples from a list of (filepath, lines, confidence) tuples. Confidence is not used right now but is included for a future release that uses semi-supervised data. Args: labels: A list of (image, lines, confience) tuples. augmenter: An augmenter to apply to the images. width: The width to use for output images height: The height to use for output images area_threshold: The area threshold to use to keep characters in augmented images. """ labels = labels.copy() for index in itertools.cycle(range(len(labels))): if index == 0: random.shuffle(labels) image_filepath, lines = labels[index] image = tools.read(image_filepath) if augmenter is not None: image, lines = tools.augment(boxes=lines, boxes_format='lines', image=image, area_threshold=area_threshold, augmenter=augmenter) image, scale = tools.fit(image, width=width, height=height, mode='letterbox', return_scale=True) lines = tools.adjust_boxes(boxes=lines, boxes_format='lines', scale=scale) bboxes = [line[0] for line in lines] words = [line[1] for line in lines] words = ''.join(words) yield image[np.newaxis, ...], np.array(bboxes)[np.newaxis, ...],\ np.array(words)[np.newaxis, ... ], np.ones((image.shape[0], image.shape[1]), np.float32)[np.newaxis, ...],\ np.ones(len(words), np.float32)[np.newaxis, ...]
def run_continue(filename, generations, save_interval): """Run a set number of tournaments, saving along the way every save_interval generations.""" start = time.time() mga = read(filename) for g in range(int( generations / save_interval)): # Run X generations and save every Y generations print('Running generations %i - %i...' % (g * save_interval, (g + 1) * save_interval)) # Run simulation mga.runTournaments(save_interval * mga.popsize, report=True) # Save data generation = int(mga.generationsRun) date = mga.dateEdited filename = '%s_G%i_%s' % (filename[:-14], generation, date) save(filename, mga) print('%f sec elapsed so far \n' % (time.time() - start))
def test_make_text(): '''Test file writing.''' df = tools.read('tests/tabs_V_ven') fname = 'tests/write_tabs_V_ven' run_daily.make_text(df, fname) assert open(fname).readlines() == [ 'Dates [UTC]\tEast [cm/s]\tNorth [cm/s]\tDir [deg T]\tWaterT [deg C]\tTx\tTy\tSpeed [cm/s]\tAcross [cm/s]\tAlong [cm/s]\n', '2017-01-05 00:00:00\t-2.50\t28.27\t139.00\t24.03\t0\t-1\t28.38\t-28.36\t0.96\n', '2017-01-05 00:30:00\t0.69\t25.37\t144.00\t24.03\t0\t-1\t25.38\t-25.10\t3.78\n', '2017-01-05 01:00:00\t1.06\t24.70\t161.00\t24.02\t0\t-1\t24.72\t-24.39\t4.06\n', '2017-01-05 01:30:00\t2.06\t23.59\t170.00\t24.02\t0\t-2\t23.68\t-23.16\t4.92\n' ] # remove file after checking remove(fname)
def get_recognizer_image_generator(labels, height, width, alphabet, augmenter=None): """Generate augmented (image, text) tuples from a list of (filepath, box, label) tuples. Args: labels: A list of (filepath, box, label) tuples height: The height of the images to return width: The width of the images to return alphabet: The alphabet which limits the characters returned augmenter: The augmenter to apply to images """ n_with_illegal_characters = sum( any(c not in alphabet for c in text) for _, _, text in labels) if n_with_illegal_characters > 0: print( f'{n_with_illegal_characters} / {len(labels)} instances have illegal characters.' ) labels = labels.copy() for index in itertools.cycle(range(len(labels))): if index == 0: random.shuffle(labels) filepath, box, text = labels[index] cval = cval = np.random.randint(low=0, high=255, size=3).astype('uint8') if box is not None: image = tools.warpBox(image=tools.read(filepath), box=box.astype('float32'), target_height=height, target_width=width, cval=cval) else: image = tools.read_and_fit(filepath_or_array=filepath, width=width, height=height, cval=cval) text = ''.join([c for c in text if c in alphabet]) if not text: continue if augmenter: image = augmenter.augment_image(image) yield (image, text)
def remake_file(buoys=None, tables=None, remaketype='hdf', remakefrom='txt'): '''Remake file from another file if messed up. Overwrites existing remaketype files. buoys (list): buoys to remake tables (list): tables to remake (just for TABS buoys). If buoys is None, tables will be read in for each buoy to cover all options. remaketype (str), default 'hdf': which type of file to remake remakefrom (str), default 'txt': which type of existing file to use to remake other file from. Options for both are 'hdf' and 'txt'. ''' if buoys is None: buoys = bys.index # loop through buoys for buoy in buoys: # pulls out the non-nan table values to loop over valid table names if len(buoy) == 1 and tables is None: tables = [ bys.loc[buoy, table] for table in tablekeys if not pd.isnull(bys.loc[buoy, table]) ] elif tables is None: tables = ['unused'] for table in tables: # loop through tables for each buoy if len(buoy) == 1: assert table is not None, 'need to input table when using TABS buoy' fname = path.join('..', 'daily', 'tabs_' + buoy + '_' + table + '_all') else: fname = path.join('..', 'daily', buoy + '_all') # read from remakefrom file, write to remaketype file df = tools.read(fname, remakefrom) tools.write_file(df, fname, filetype=remaketype, mode='w', append=False)
def getRoomObjList(): global room_obj_list global files global isinit if isinit: return room_obj_list logging.info('init room obj list') md = read(conf.videolist_path) lines = md.split('\n') for l in lines: match = re.match(r'\[(.*)\]\((.*)\)', l) if l and match: room_obj = { 'file_name': match.group(1) + '.mp4', 'url': match.group(2) } if room_obj.get('file_name', '') in files: logging.info(room_obj.get('file_name', '') + ' is exist') else: room_obj_list.append(room_obj) isinit = True return room_obj_list
def __init__(self, source) -> None: self.source = tools.read(source) if isinstance(self.source, str): self.source = self.source.splitlines() parts = [] #List[str] self.individuals = [] # List['ParseDisplayOutput.Individual'] in_header = True for line in self.source: line = line.strip() ## skip header, until a line starts with Version if in_header: if line.startswith('Version'): in_header = False else: continue if line.startswith('Version'): mpos = line.index(', Market:') self.version = line[8:mpos].strip() self.market = line[mpos + 9:].strip() continue if line.startswith('First'): last = line.find('Last:') - 1 while last > 0 and line[last] == ' ': last = last - 1 if last > 0 and line[last] != ',': line = line[:last + 1] + ',' + line[last + 1:] if len(line) > 0: parts.extend(map(str.strip, line.split(','))) else: individual = self.parse_individual(parts) if individual is not None: self.individuals.append(individual) parts = []
def load_data(*filenames): features = None labels = None for name in filenames: data = read(name) file_features = np.array([ np.append(np.unpackbits(state), i % 2) for game in data for i, state in enumerate(game[0]) ]) file_labels = np.array([game[1] for game in data for _ in game[0]]) if features is None: features = file_features labels = file_labels else: features = np.append(features, file_features, axis=0) labels = np.append(labels, file_labels, axis=0) return features, labels
def detect(self, images: typing.List[typing.Union[np.ndarray, str]], detection_threshold=0.7, text_threshold=0.4, link_threshold=0.4, size_threshold=10, **kwargs): """Recognize the text in a set of images. Args: images: Can be a list of numpy arrays of shape HxWx3 or a list of filepaths. """ images = [compute_input(tools.read(image)) for image in images] boxes = [] for image in images: boxes.append( getBoxes(self.model.predict(image[np.newaxis], **kwargs), detection_threshold=detection_threshold, text_threshold=text_threshold, link_threshold=link_threshold, size_threshold=size_threshold)[0]) return boxes
def remake_file(buoys=None, tables=None, remaketype='hdf', remakefrom='txt'): '''Remake file from another file if messed up. Overwrites existing remaketype files. buoys (list): buoys to remake tables (list): tables to remake (just for TABS buoys). If buoys is None, tables will be read in for each buoy to cover all options. remaketype (str), default 'hdf': which type of file to remake remakefrom (str), default 'txt': which type of existing file to use to remake other file from. Options for both are 'hdf' and 'txt'. ''' if buoys is None: buoys = bys.keys() # loop through buoys for buoy in buoys: # pulls out the non-nan table values to loop over valid table names if len(buoy) == 1 and tables is None: tables = [bys[buoy][table] for table in tablekeys if not pd.isnull(bys[buoy][table])] elif tables is None: tables = ['unused'] for table in tables: # loop through tables for each buoy if len(buoy) == 1: assert table is not None, 'need to input table when using TABS buoy' fname = path.join('..', 'daily', 'tabs_' + buoy + '_' + table + '_all') else: fname = path.join('..', 'daily', buoy + '_all') # read from remakefrom file, write to remaketype file df = tools.read(fname, remakefrom) tools.write_file(df, fname, filetype=remaketype, mode='w', append=False)
def readwrite(buoy, table=None, dstart=pd.Timestamp('1980-1-1', tz='utc')): '''Creates or updates buoy data files. Reads through yesterday so that when appended to everything is consistent. This will take a long time to run if none of the files exist. Note that dstart is ignored if buoy data file already exists. ''' # bring data in file up through yesterday. This way files are # consistent regardless of what time of day script is run. dend = pd.Timestamp('now', tz='UTC').normalize() # file write flag mode = 'w' append = False # for hdf file if len(buoy) == 1: assert table is not None, 'need to input table when using TABS buoy' fname = path.join('..', 'daily', 'tabs_' + buoy + '_' + table + '_all') else: fname = path.join('..', 'daily', buoy + '_all') # if buoy is inactive and its "all" file exists, don't read if buoy in bys.keys() and not bys[buoy]['active'] and path.exists(fname): return # two types of files Types = ['txt', 'hdf'] # if any of the files exist, then we want to make sure they are consistent if np.asarray([path.exists(fname + '.' + Type) for Type in Types]).any(): lastrows = [] for Type in Types: # get last row in file try: lastrows.append(tools.read(fname, Type, lastlineonly=True)) # if can't get last row, remake file except: logging.warning('Could not access existing file %s of type %s. Will remake.' % (fname, Type)) # try other type of files to remake this file if needed othertype = [temp for temp in Types if temp != Type] try: remake_file(buoys=[buoy], tables=[table], remaketype=Type, remakefrom=othertype[0]) logging.warning('Remade file of type %s from type %s for buoy %s' % (Type, othertype[0], buoy)) except: logging.warning('Could not remake file for buoy %s' % (buoy)) # now the file should exist, so can read in lastrow lastrows.append(tools.read(fname, Type, lastlineonly=True)) # if last rows are not the same, remake shorter file if not lastrows[0] == lastrows[1]: lastrow = lastrows[0]; lastrow2 = lastrows[1] Type = Types[0]; Type2 = Types[1] if lastrow < lastrow2: remake_file(buoys=[buoy], remaketype=Type, remakefrom=Type2) logging.warning('File type %s for buoy %s was short and remade with file type %s.' % (Type, buoy, Type2)) elif lastrow2 < lastrow: remake_file(buoys=[buoy], remaketype=Type2, remakefrom=Type) logging.warning('File type %s for buoy %s was short and remade with file type %s.' % (Type2, buoy, Type)) # now files should be consistent at this point if they already exist # if file already exists, overwrite dstart with day after day from last line of file if path.exists(fname + '.hdf'): dstart = tools.read(fname, Type, lastlineonly=True).normalize().tz_localize('UTC') + pd.Timedelta('1 days') mode = 'a' # overwrite write mode append = True # overwrite append mode for hdf df = read.read(buoy, dstart, dend, table=table, units='M', tz='UTC', usemodel=False, userecent=False) # can't append to file with empty dataframe if df is not None and not (mode == 'a' and df.empty): for Type in Types: try: tools.write_file(df, fname, filetype=Type, mode=mode, append=append) except: logging.warning('Could not write to file %s of type %s. Will remake.' % (fname, Type)) # try both other types of files to remake this file if needed othertype = [temp for temp in Types if temp != Type] try: remake_file(buoys=[buoy], tables=[table], remaketype=Type, remakefrom=othertype[0]) logging.warning('Remade file of type %s from type %s for buoy %s' % (Type, othertype[0], buoy)) except: logging.warning('Could not remake file for buoy %s' % (buoy)) else: logging.warning('No new data has been read in for buoy ' + buoy + ' table ' + table)
os.chdir(lookup_dir) # print welcome message print 'markdown to html conversion for all txt files in this directory' print 'and all subdirectories - for usage see comments in the python source' print 'start: ' + str(datetime.time(datetime.now())) # clean and recreate install directory if os.path.isdir(install_dir): shutil.rmtree(install_dir) os.mkdir(install_dir) # load header header = template_dir + os.sep + header_file if os.path.exists(header) and os.path.isfile(header): header = tools.read(header) else: header = u'' # copy stylesheet if existing css = template_dir + os.sep + style_file if os.path.exists(css) and os.path.isfile(css): text = tools.read(css) tools.write(install_dir + os.sep + style_file, text, 'ascii') # must save as ascii because Jave CSS import in JEditorPane does not read utf-8 # locate all lookup files print "locating markdown files" folders, files = tools.locate(lookup_files) number = len(files) print "found %d files" %number