def init_name_proposals(self): """Gets name proposals for new measurements Returns: NameIndex """ name_proposals = NameIndex() for db in [ 'crinacle', 'oratory1990', 'rtings', 'referenceaudioanalyzer' ]: name_index = NameIndex.read_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) name_proposals.concat(name_index) for db in ['innerfidelity', 'headphonecom']: name_index = NameIndex.read_files( os.path.join(DIR_PATH, db, 'data', '**', '*.csv')) name_proposals.concat(name_index) name_proposals.remove_duplicates() manufacturer_pattern = rf'^({"|".join([m[0] for m in self.manufacturers.manufacturers])})' proposal_data = {'form': [], 'manufacturer': [], 'model': []} for item in name_proposals.items: if not item.true_name or item.form == 'ignore': continue manufacturer = re.search(manufacturer_pattern, item.true_name, flags=re.IGNORECASE) if not manufacturer: continue manufacturer = manufacturer[0] proposal_data['form'].append(item.form) proposal_data['manufacturer'].append(manufacturer) proposal_data['model'].append( item.true_name.replace(manufacturer, '').strip()) self.name_proposals = pd.DataFrame(proposal_data)
def main(): for db in ['crinacle', 'oratory1990', 'rtings']: name_index = NameIndex.read_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) for fp in glob(os.path.join(DIR_PATH, db, 'data', '*', '*')): d, name = os.path.split(fp) _, form = os.path.split(d) if not name_index.find(true_name=name, form=form): print(f'Removing: "{fp}"')
def rename_manufacturers(): manufacturers = ManufacturerIndex() for db in DBS: if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')): # Rename entries in name index if such exists name_index = NameIndex.read_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) for item in name_index.items: if item.form == 'ignore' or not item.true_name: continue true_name = manufacturers.replace(item.true_name) if true_name is None: print(f'"{name}" not found in manufacturers') continue if true_name == item.true_name: continue print(f'Renamed "{item.true_name}" with "{true_name}"') name_index.update( NameItem(item.false_name, true_name, item.form), item.false_name, item.true_name, item.form) name_index.write_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) # Rename existing files existing_files = list( glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True)) for fp in existing_files: dir_path, name = os.path.split(fp) name = name.replace('.csv', '') true_name = manufacturers.replace(name) if true_name is None: print(f'"{name}" not found in manufacturers') continue new_dir_path = os.path.abspath( os.path.join(dir_path, os.pardir, true_name)) new_file_path = os.path.join(new_dir_path, f'{true_name}.csv') os.makedirs(new_dir_path, exist_ok=True) if os.path.normcase( os.path.normpath(new_file_path)) != os.path.normcase( os.path.normpath(fp)): print( f'Moved "{os.path.relpath(fp, DIR_PATH)}" to "{os.path.relpath(new_file_path, DIR_PATH)}"' ) shutil.move(fp, new_file_path) try: os.rmdir(dir_path) except OSError: pass
def get_name_proposals(self): """Gets name proposals for new measurements Returns: NameIndex """ name_proposals = NameIndex() for db in ['crinacle', 'oratory1990', 'rtings', 'referenceaudioanalyzer']: name_index = NameIndex.read_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv')) name_proposals.concat(name_index) for db in ['innerfidelity', 'headphonecom']: name_index = NameIndex.read_files(os.path.join(DIR_PATH, db, 'data', '**', '*.csv')) name_proposals.concat(name_index) name_proposals.remove_duplicates() return name_proposals
def main(): manufacturers = ManufacturerIndex() for db in ['crinacle', 'headphonecom', 'innerfidelity', 'oratory1990', 'rtings']: if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')): name_index = NameIndex.read_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv')) else: name_index = NameIndex() for item in name_index.items: if item.form == 'ignore' or not item.true_name: continue true_name = manufacturers.replace(item.true_name) if true_name is None: print(f'"{name}" not found in manufacturers') continue if true_name == item.true_name: continue print(f'Renamed "{item.true_name}" with "{true_name}"') name_index.update( NameItem(item.false_name, true_name, item.form), item.false_name, item.true_name, item.form ) if name_index: name_index.write_tsv(os.path.join(DIR_PATH, db, 'name_index.tsv')) existing = list(glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True)) for fp in existing: dir_path, name = os.path.split(fp) name = name.replace('.csv', '') true_name = manufacturers.replace(name) if true_name is None: print(f'"{name}" not found in manufacturers') continue new_dir_path = os.path.abspath(os.path.join(dir_path, os.pardir, true_name)) new_file_path = os.path.join(new_dir_path, f'{true_name}.csv') os.makedirs(new_dir_path, exist_ok=True) if os.path.normcase(os.path.normpath(new_file_path)) != os.path.normcase(os.path.normpath(fp)): print(f'Moved "{os.path.relpath(fp, DIR_PATH)}" to "{os.path.relpath(new_file_path, DIR_PATH)}"') shutil.move(fp, new_file_path) try: os.rmdir(dir_path) except OSError: pass
def read_name_index(): return NameIndex.read_tsv(os.path.join(DIR_PATH, 'name_index.tsv'))
def rename_groups(databases=DBS): with open(os.path.join(DIR_PATH, 'name_groups.tsv'), 'r', encoding='utf-8') as fh: lines = fh.read().strip().split('\n') # First column is always the true name # Create dict with each false name as key and it's true name as value name_map = dict() for line in lines: names = line.split('\t') if len(names) > 1: for i in range(1, len(names)): name_map[names[i]] = names[0] # Read name indexes and existing files for all supported measurement databases dbs = [] for db in databases: if os.path.isfile(os.path.join(DIR_PATH, db, 'name_index.tsv')): # Read name index name_index = NameIndex.read_tsv( os.path.join(DIR_PATH, db, 'name_index.tsv')) else: # No name index, create one anew name_index = NameIndex() # Read all the existing files for the database files = list( glob(os.path.join(DIR_PATH, db, 'data', '**', '*.csv'), recursive=True)) files = [{ 'name': os.path.split(file)[1].replace('.csv', ''), 'path': file } for file in files] # Save both to dbs dbs.append({'name': db, 'name_index': name_index, 'files': files}) for old_name, new_name in name_map.items(): print(f'"{old_name}" -> "{new_name}"') for db in dbs: name_index = db['name_index'] # Replace true names in name index with the new name updated_item = False matches = name_index.find(true_name=old_name) for item in matches.items: if new_name == 'ignore': name_index.update(NameItem(false_name=item.false_name, true_name=item.true_name, form='ignore'), true_name=old_name) print( f' Updated item: "{item.false_name}", "{new_name}", "ignore"' ) else: name_index.update(NameItem(false_name=item.false_name, true_name=new_name, form=item.form), true_name=old_name) print( f' Updated item: "{item.false_name}", "{new_name}", "{item.form}"' ) updated_item = True # Rename existing files for name, path in [(f['name'], f['path']) for f in db['files'] if f['name'].lower() == old_name.lower()]: if new_name == 'ignore': print(f' Removing "{os.path.split(path)[0]}"') shutil.rmtree(os.path.split(path)[0]) if not updated_item: name_index.add( NameItem(false_name=old_name, true_name=None, form='ignore')) print(f' Added item: "{old_name}", "", "ignore"') continue new_path = re.sub(re.escape(name), new_name, path) print( f' Moving "{os.path.relpath(path, DIR_PATH)}" to "{os.path.relpath(new_path, DIR_PATH)}"' ) os.makedirs(os.path.split(new_path)[0], exist_ok=True) shutil.move(path, new_path) os.rmdir(os.path.join(path, os.pardir)) matches = name_index.find(true_name=new_name) if not matches: d = path while True: d, f = os.path.split(d) if f in ['onear', 'inear', 'earbud']: form = f break name_index.add( NameItem(false_name=old_name, true_name=new_name, form=form)) print( f' Added item: "{old_name}", "{new_name}", "{form}"' ) print() for db in dbs: db['name_index'].write_tsv( os.path.join(DIR_PATH, db['name'], 'name_index.tsv'))