def visualizeMarks(marksPath, userInput=False): import cv2 def showResult(imagePath, box): image = cv2.imread(imagePath) y1, x1, y2, x2 = box cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0)) cv2.imshow("image", image) cv2.waitKey(100) marks = openJsonSafely(marksPath) framesPath = os.path.join(os.path.dirname(marksPath), const.frames) show = True while show: if userInput: framePath = input() frame = extractBasename(framePath) box = marks[frame][const.coords] showResult(framePath, box) else: for frame in marks: frameName = marks[frame][const.image] box = marks[frame][const.coords] framePath = os.path.join(framesPath, frameName) showResult(framePath, box)
def updateCategoriesIndices(datasetPath, categories): from utils import walk, makeJSONname from verifier import getFullCategory marks = walk(datasetPath, targetFiles=makeJSONname(const.marks)).get("files") for mrk in marks: try: marksPath = os.path.join(datasetPath, *mrk) category, subcategory = mrk[-3:-1] fullCategory = getFullCategory(category, subcategory) if fullCategory not in categories: continue marks = openJsonSafely(marksPath) for f, value in marks.items(): fullCategory = value[const.fullCategory] value[const.ctgIdx] = categories.index(fullCategory) json.dump(marks, open(marksPath, "w"), indent=3) print(f"{Fore.BLUE}JSON file {marksPath} has been fixed{Style.RESET_ALL}") except Exception as e: print(e)
def summarizeInfo(rawPath=Path.raw, summarizedPath=Path.summarizedRaw, allowedCategories=None, allowedSubCtgList=None, overwrite=True): summarized = openJsonSafely(summarizedPath) if not overwrite else {} rawVideosPath = os.path.join(rawPath, const.videos) rawJsonsPath = os.path.join(rawPath, const.json) rawVideos = sorted([j for j in os.listdir(rawVideosPath) if j.endswith(Extensions.videos())]) maxIdx = summarized.get(const.maxIdx, 0) for i, video in enumerate(rawVideos): print(f"\rProcessing {video} ({i + 1} out of {len(rawVideos)})", end="") category, name = extractCategory(video) if category not in allowedCategories: continue categoryInfo = summarized.get(category, {}) videoJson = os.path.join(rawJsonsPath, makeJSONname(name)) videoMarks = getVideoMarks(os.path.join(rawVideosPath, video), videoJson) for subctg, subctgMarks in videoMarks.items(): if allowedSubCtgList is not None and subctg not in allowedSubCtgList: continue if subctg not in categoryInfo: subctgIdx = maxIdx maxIdx += 1 curSubctgMarks = { const.overall: 0, const.ctgIdx: subctgIdx, const.videos: {}, const.parent: category } else: curSubctgMarks = categoryInfo[subctg] if video not in curSubctgMarks[const.videos]: curSubctgMarks[const.videos][video] = subctgMarks curSubctgMarks[const.overall] += len(subctgMarks) categoryInfo[subctg] = curSubctgMarks if categoryInfo: summarized[category] = categoryInfo summarized[const.maxIdx] = maxIdx json.dump(summarized, open(summarizedPath, "w"), indent=3) print(f"\n{Fore.GREEN}Summarized info file {summarizedPath} has been updated{Style.RESET_ALL}")
def fixFrameNumbers(jsonPath): jsons = [j for j in os.listdir(jsonPath) if j.endswith(Extensions.json)] for js in jsons: path = os.path.join(jsonPath, js) marks = openJsonSafely(path) if not marks: continue offset = getKeysOffset(marks.keys()) if offset == 0: continue fixedMarks = {} for frame, info in marks.items(): newFrame = "frame_{}".format(getFrameNumber(frame) - offset) fixedMarks[newFrame] = info json.dump(fixedMarks, open(path, "w"), indent=3) print(f"{Fore.BLUE}JSON file {path} has been fixed{Style.RESET_ALL}")
def frameVideo(filePath, marksPath, datasetPath, actualInfo, overwrite=False, extension=Extensions.jpg, params=None, ctgLimit=None): categories = readLines(Path.categories) basename = extractBasename(filePath) try: jsonName = makeJSONname(basename) marks = json.load(open(os.path.join(marksPath, jsonName), "r")) except: print( f"{Fore.RED}There is no json file {marksPath} for {filePath} {Style.RESET_ALL}" ) return framesGenerator = generateFrames(filePath) offset = getKeysOffset(marks.keys()) marksSeparated = {} total = 0 for idx, frame in enumerate(framesGenerator): # if idx == 20: # break frameMarks = getFrameMarks(idx, marks, offset) if not frameMarks: continue category = frameMarks[const.category] subcategory = frameMarks[const.subcategory] countKeys = [const.original, category, subcategory] if idx == 0: globalIdx = getNested(dictionary=actualInfo, keys=countKeys, default=0) localIdx = idx + globalIdx if ctgLimit is not None and localIdx == ctgLimit: break frameID = f"frame_{localIdx}" fullCategory = getFullCategory(category, subcategory) if fullCategory not in categories: categories.append(fullCategory) ctgIdx = categories.index(fullCategory) frameName = f"{fullCategory}{const.separator}{frameID}{const.separator}{const.original}" dirPath = os.path.join(datasetPath, const.original, category, subcategory) framesPath = os.path.join(dirPath, const.frames) framePath = os.path.join(framesPath, extendName(frameName, extension)) updateNested(dictionary=actualInfo, keys=countKeys, value=1) if not overwrite and os.path.exists(framePath): print("\rFrame #{} has been passed".format(idx), end="") continue os.makedirs(framesPath, exist_ok=True) frameInfo = { const.image: extendName(frameName, extension), const.coords: fitCoords(frameMarks[const.coords], frame.shape[:2]), const.fullCategory: fullCategory, const.ctgIdx: ctgIdx, const.imageShape: frame.shape[:2] } keySet = countKeys + [ frameName ] # ["original", category, subcategory, frameName] putNested(dictionary=marksSeparated, keys=keySet, value=frameInfo) cv2.imwrite(framePath, frame, params) total += 1 print("\rFrame #{} has been added".format(idx), end="") marksSeparated = marksSeparated[const.original] print() for ctg, value in marksSeparated.items(): for subctg, subctgMarks in value.items(): subctgMarksJson = os.path.join( datasetPath, const.original, ctg, subctg, extendName(const.marks, Extensions.json)) oldMarks = openJsonSafely(subctgMarksJson) for k, v in subctgMarks.items(): oldMarks[k] = v json.dump(oldMarks, open(subctgMarksJson, "w"), indent=3) print( f"{Fore.GREEN}Added marks to {subctgMarksJson} {Style.RESET_ALL}" ) writeLines(categories, Path.categories) print( f"{Fore.GREEN}Updated categories file {Path.categories} {Style.RESET_ALL}" ) print(f"{Fore.GREEN}Added {total} frames in total {Style.RESET_ALL}")
def extract(ctg, ctgInfo, videosPath=Path.rawVideos, extractionPath=Path.original, extension=Extensions.jpg, limit=None, augmentFunc=None, augmentations=None, augmentationName=const.augmented, augmentationPath=None, overwriteOriginal=False, overwriteAugmented=True): try: parent = ctgInfo.get(const.parent, "") fullExtractionPath = os.path.join(extractionPath, parent, ctg) os.makedirs(os.path.join(fullExtractionPath, const.frames), exist_ok=True) videos = ctgInfo[const.videos] overall = ctgInfo[const.overall] limit = limit if limit is not None else overall if augmentFunc is not None: augmentFunc = proxifyAugmentFunc(augmentFunc) augmentations = int( augmentations) if augmentations is not None else min( limit, overall) augmentations = max(augmentations, augmentations + limit - overall) augRepeats = ceil(augmentations / min(limit, overall)) augmentationPath = augmentationPath if augmentationPath is not None \ else extractionPath.replace(const.original, augmentationName) fullAugmentationPath = os.path.join(augmentationPath, parent, ctg) os.makedirs(os.path.join(fullAugmentationPath, const.frames), exist_ok=True) existingAugs = len( os.listdir(os.path.join(fullAugmentationPath, const.frames))) augMarks = {} totalAugs = 0 fullCategory = getFullCategory(parent, ctg) print( "Cutting videos: {:>50} \t expected orig frames {:>10} \t expected aug frames \t {:>10} process id: {:>10}" .format(fullCategory, min(limit, overall), augmentations, os.getpid())) sleep(0.5) # time.sleep(0.5) generator = createGenerator(videosPath, videos, overall, limit) marks = {} total = 0 for idx, genInfo in enumerate(generator): frame, frameName, coords = genInfo fullFrameName = const.separator.join( (fullCategory, frameName, const.original)) framePath = os.path.join(fullExtractionPath, const.frames, extendName(fullFrameName, extension)) coords = fitCoords(coords, frame.shape[:2]) status = "passed" if not os.path.exists(framePath) or overwriteOriginal: status = "added" frameMarks = { const.fullCategory: fullCategory, const.ctgIdx: ctgInfo[const.ctgIdx], const.image: extendName(fullFrameName, extension), const.coords: coords, const.imageShape: frame.shape[:2] } cv2.imwrite(framePath, frame) marks[frameName] = frameMarks total += 1 if augmentFunc is not None: frameAugments = 0 for i in range(augRepeats): augFrameName = f"{fullCategory}{const.separator}{frameName}_{i}{const.separator}{augmentationName}" augFramePath = os.path.join( fullAugmentationPath, const.frames, extendName(augFrameName, extension)) if totalAugs >= augmentations or ( existingAugs >= augmentations and not overwriteAugmented): break if os.path.exists(augFramePath) and not overwriteAugmented: continue augFrame, augCoords = augmentFunc(frame, coords) augFrameMarks = { const.fullCategory: fullCategory, const.image: extendName(augFrameName, extension), const.ctgIdx: ctgInfo[const.ctgIdx], const.coords: fitCoords(augCoords, augFrame.shape[:2]), const.imageShape: augFrame.shape[:2] } cv2.imwrite(augFramePath, augFrame) augMarks[augFrameName] = augFrameMarks frameAugments += 1 totalAugs += frameAugments print("\rFrame #{} has been {} with {} augmentations".format( idx + 1, status, frameAugments), end="") marksPath = os.path.join(fullExtractionPath, makeJSONname(const.marks)) oldMarks = openJsonSafely(marksPath) json.dump(updateMarks(oldMarks, marks, overwriteOriginal), open(marksPath, "w"), indent=3, sort_keys=True) print( f"\n{Fore.GREEN}Added marks to {fullExtractionPath} {Style.RESET_ALL}" ) if augmentFunc is not None: augMarksPath = os.path.join(fullAugmentationPath, makeJSONname(const.marks)) oldAugMarks = openJsonSafely(augMarksPath) json.dump(updateMarks(oldAugMarks, augMarks, overwriteAugmented), open(augMarksPath, "w"), indent=3, sort_keys=True) print( f"{Fore.GREEN}Added marks to {fullAugmentationPath} {Style.RESET_ALL}" ) print( f"{Fore.GREEN}Added {total} pure frames and {totalAugs} augmented frames in total {Style.RESET_ALL}" ) except Exception as e: print(e)
def extractCategories(videosPath=Path.rawVideos, summarizedPath=Path.summarizedRaw, categoriesList=None, extractionPath=Path.original, subcategories=None, framesLimit=None, augmentationsLimit=None, augmentationFunc=None, augmentationName="augmented", augmentationPath=None, parallel=True, threads=8, overwriteOriginal=False, overwriteAugmented=False): summarized = openJsonSafely(summarizedPath) categoriesList = list( summarized.keys()) if categoriesList is None else categoriesList try: categoriesList.remove(const.maxIdx) except: pass if parallel: threads = min(threads, mp.cpu_count()) else: threads = 1 threadsList = [] with mp.Pool(threads) as pool: for category in categoriesList: categoryInfo = summarized[category] neededSubcategories = list(categoryInfo.keys()) if subcategories is not None: if isinstance(subcategories, (list, tuple)): neededSubcategories = subcategories elif isinstance(subcategories, dict): neededSubcategories = subcategories.get( category, neededSubcategories) else: raise TypeError for subctg in neededSubcategories: if subctg not in categoryInfo: continue subctgInfo = categoryInfo[subctg] # extract( # subctg, # subctgInfo, # videosPath=Path.rawVideos, # extractionPath=Path.original, # limit=framesLimit, # augmentations=augmentationsLimit, # augmentFunc=augmentationFunc, # augmentationName=augmentationName, # augmentationPath=augmentationPath, # overwriteOriginal=overwriteOriginal, # overwriteAugmented=overwriteAugmented # ) threadsList.append( pool.apply_async(extract, args=(subctg, subctgInfo), kwds={ "videosPath": videosPath, "extractionPath": extractionPath, "extension": Extensions.jpg, "limit": framesLimit, "augmentFunc": augmentationFunc, "augmentations": augmentationsLimit, "augmentationName": augmentationName, "augmentationPath": augmentationPath, "overwriteOriginal": overwriteOriginal, "overwriteAugmented": overwriteAugmented })) for r in threadsList: r.get()
def downloadActualInfo(): return openJsonSafely(Path.actualInfo)