def frameFolderSmart(folderPath, ctgLimit): processedVideos = readLines(Path.processedFiles) fullCategories = readLines(Path.fullCategories) videos = [ video for video in os.listdir(folderPath) if video and video.endswith(Extensions.videos()) ] videosByCtgs = getSameCtgVideo(fullCategories, videos) pass
def __init__(self, input_path, output_path): self.n = FeedForwardNetwork() self.pix_size = 50 self.input_value = utils.getImages(utils.readLines(input_path)) self.output_value = utils.readLines(output_path) self.inputUnits = self.pix_size * self.pix_size self.nbHiddenLayers = 1 self.hiddenUnits = 500 self.outputUnits = len(results) self.ds = SupervisedDataSet( self.pix_size * self.pix_size, len(results)) self.initializeDataSet() self.initilizeNetwork() self.trainingOnDataSet()
def processVideoFolder(folderPath=Path.rawVideos, marksPath=Path.rawJson, datasetPath=Path.dataset, overwrite=False, extension=Extensions.jpg, params=None): processedVideos = readLines(Path.processedFiles) videos = [ video for video in os.listdir(folderPath) if video not in processedVideos and ( video.endswith(Extensions.mov) or video.endswith(Extensions.mp4)) ] actualInfo = downloadActualInfo() for video in videos: actualizeInfoWithFrames(Path.dataset) filePath = os.path.join(folderPath, video) print( f"\n{Fore.GREEN}Video {filePath} is being processed {Style.RESET_ALL}" ) frameVideo(filePath=filePath, marksPath=marksPath, datasetPath=datasetPath, actualInfo=actualInfo, overwrite=overwrite, extension=extension, params=params) processedVideos.append(video) writeLines(set(processedVideos), Path.processedFiles)
def makeDividedSets(): ctgInPart = 35 import os from math import ceil from utils import readLines, writeLines from verifier import splitFullCategory categories = readLines(Path.categories) divisions = ceil(len(categories) / ctgInPart) categories = [categories[i * ctgInPart:(i + 1) * ctgInPart] for i in range(divisions)] for i, ctgList in enumerate(categories): pathsList = [] for ctg in ctgList: category, subcategory = splitFullCategory(ctg) originalPath = os.path.join(Path.dataset, Constants.original, category, subcategory) augmentedPath = os.path.join(Path.dataset, Constants.augmented, category, subcategory) pathsList.extend([originalPath, augmentedPath]) setPath = os.path.join(Path.sets, f"part_{i}") makeSets(pathsList, wpath=setPath, trainPart=0.9, validPart=0.05) writeLines(ctgList, os.path.join(setPath, "set_categories.txt"))
def get_genome_seqs( self ): features = self.get_features() contigs = np.unique( features.contig ) seqs = {} for contig in contigs: genome_file = './cache/' + self.species + '_' + contig seq = ut.readLines( genome_file )[0].strip().upper() seqs[contig] = seq return seqs
def fixJsons(): import os from utils import walk, readLines categories = readLines(Path.categories) jsons = walk(Path.dataset, targetFiles="marks.json").get("files") for i, jsn in enumerate(jsons): print(f"\rProcessing {i} json file", end="") path = os.path.join(Path.dataset, *jsn) marks = json.load(open(path, "r")) for name, items in marks.items(): ctgIdx = categories.index(items[Constants.fullCategory]) items[Constants.ctgIdx] = ctgIdx json.dump(marks, open(path, "w"), indent=4)
def purifySets(): sets = { const.train: os.path.join(Path.sets, extendName(const.train, Extensions.txt)), const.valid: os.path.join(Path.sets, extendName(const.valid, Extensions.txt)), const.test: os.path.join(Path.sets, extendName(const.test, Extensions.txt)), } for set_, path in sets.items(): files = readLines(path) total = len(files) files = [f for f in files if os.path.exists(f)] writeLines(files, path) print(f"Cleaned {total - len(files)} from {path}")
def main(): from utils import readLines from verifier import splitFullCategory fullCategories = readLines(Path.categories) categories = set([splitFullCategory(ctg)[0] for ctg in fullCategories]) extractCategories(videosPath=Path.rawVideos, summarizedPath=Path.summarizedRaw, categoriesList=categories, subcategories=(const.avers, ), extractionPath=Path.original, framesLimit=2000, augmentationsLimit=2000, augmentationFunc=const.default, augmentationName="augmented", augmentationPath=None, parallel=True, threads=8, overwriteOriginal=False, overwriteAugmented=False)
def main(): from utils import readLines categories = readLines(Path.categories) updateCategoriesIndices(Path.dataset, categories)
def makeSets(directories, wpath=Path.sets, trainPart=0.9, validPart=0.05, ignoreOld=False, matchWithMarks=True): assert 0 < trainPart + validPart <= 1 os.makedirs(wpath, exist_ok=True) testPart = 1 - trainPart - validPart sets = { const.train: { "path": os.path.join(wpath, extendName(const.train, Extensions.txt)), "part": trainPart, "content": [] }, const.valid: { "path": os.path.join(wpath, extendName(const.valid, Extensions.txt)), "part": validPart, "content": [] }, const.test: { "path": os.path.join(wpath, extendName(const.test, Extensions.txt)), "part": testPart, "content": [] } } inUse = [] for set_, info in sets.items(): info["content"] = readLines(info["path"]) if not ignoreOld else [] inUse.extend(info["content"]) images = [] marks = [] for dirIdx, path in enumerate(directories): print( "\rSearching for images and marks in listed directories, {:.1f}% has been done" .format(dirIdx / len(directories) * 100), end="") dirImages = [ os.path.join(path, *img) for img in walk( path, targetExtensions=Extensions.images()).get("extensions") ] images.extend(dirImages) if matchWithMarks: dirMarks = [ os.path.join(path, *mrk) for mrk in walk( path, targetExtensions=Extensions.txt).get("extensions") ] marks.extend(dirMarks) if matchWithMarks: transformer = lambda x: changeExtension(x, Extensions.txt) print("Matching images to marks, please wait...") images = matchLists(master=marks, slave=images, transformer=transformer) # _, images = matchLists(master=inUse, slave=images, getMismatched=True) images = permutate(images) start = 0 for set_, info in sets.items(): part = info["part"] end = start + int(part * len(images)) total = end - start info["content"].extend(images[start:end]) info["content"] = permutate(info["content"]) start = end writeLines(lines=info["content"], path=info["path"]) print(f"\n{Fore.GREEN}Added {total} paths to {set_} {Style.RESET_ALL}")
from utils import readLines, manahattanDistance4d def checkConstellation(s, cs): for c in cs: if s in c: return True return False lines = readLines("constellation.txt") stars = [] for l in lines: items = list(map(int, l.split(","))) stars.append(items) constellations = [] while len(stars) > 0: #if checkConstellation(s1, constellations): # continue s1 = stars[0] temp = [s1] stars.remove(s1) i = 0 while len(stars) > 0: s2 = stars[i] for t in temp: if manahattanDistance4d(t, s2) <= 3: temp.append(s2)
def frameVideo(filePath, marksPath, datasetPath, actualInfo, overwrite=False, extension=Extensions.jpg, params=None, ctgLimit=None): categories = readLines(Path.categories) basename = extractBasename(filePath) try: jsonName = makeJSONname(basename) marks = json.load(open(os.path.join(marksPath, jsonName), "r")) except: print( f"{Fore.RED}There is no json file {marksPath} for {filePath} {Style.RESET_ALL}" ) return framesGenerator = generateFrames(filePath) offset = getKeysOffset(marks.keys()) marksSeparated = {} total = 0 for idx, frame in enumerate(framesGenerator): # if idx == 20: # break frameMarks = getFrameMarks(idx, marks, offset) if not frameMarks: continue category = frameMarks[const.category] subcategory = frameMarks[const.subcategory] countKeys = [const.original, category, subcategory] if idx == 0: globalIdx = getNested(dictionary=actualInfo, keys=countKeys, default=0) localIdx = idx + globalIdx if ctgLimit is not None and localIdx == ctgLimit: break frameID = f"frame_{localIdx}" fullCategory = getFullCategory(category, subcategory) if fullCategory not in categories: categories.append(fullCategory) ctgIdx = categories.index(fullCategory) frameName = f"{fullCategory}{const.separator}{frameID}{const.separator}{const.original}" dirPath = os.path.join(datasetPath, const.original, category, subcategory) framesPath = os.path.join(dirPath, const.frames) framePath = os.path.join(framesPath, extendName(frameName, extension)) updateNested(dictionary=actualInfo, keys=countKeys, value=1) if not overwrite and os.path.exists(framePath): print("\rFrame #{} has been passed".format(idx), end="") continue os.makedirs(framesPath, exist_ok=True) frameInfo = { const.image: extendName(frameName, extension), const.coords: fitCoords(frameMarks[const.coords], frame.shape[:2]), const.fullCategory: fullCategory, const.ctgIdx: ctgIdx, const.imageShape: frame.shape[:2] } keySet = countKeys + [ frameName ] # ["original", category, subcategory, frameName] putNested(dictionary=marksSeparated, keys=keySet, value=frameInfo) cv2.imwrite(framePath, frame, params) total += 1 print("\rFrame #{} has been added".format(idx), end="") marksSeparated = marksSeparated[const.original] print() for ctg, value in marksSeparated.items(): for subctg, subctgMarks in value.items(): subctgMarksJson = os.path.join( datasetPath, const.original, ctg, subctg, extendName(const.marks, Extensions.json)) oldMarks = openJsonSafely(subctgMarksJson) for k, v in subctgMarks.items(): oldMarks[k] = v json.dump(oldMarks, open(subctgMarksJson, "w"), indent=3) print( f"{Fore.GREEN}Added marks to {subctgMarksJson} {Style.RESET_ALL}" ) writeLines(categories, Path.categories) print( f"{Fore.GREEN}Updated categories file {Path.categories} {Style.RESET_ALL}" ) print(f"{Fore.GREEN}Added {total} frames in total {Style.RESET_ALL}")
from utils import readLines inputs = readLines("corrections.txt") freq = 0 freqs = [] while True: for i in inputs: #print (freq, i) freq = freq + int(i) if freq in freqs: print ("PRIMA RIPETUTA: ", freq) import sys sys.exit() freqs.append(freq) print (freq)
from utils import readLines import networkx as nx lines = readLines("instructions.txt") # instr = [] # temp = [] # for l in lines: # items = l.split() # temp.append((items[1], items[7])) # for i in range(3): # for t in temp: # if t[0] not in instr and t[1] not in instr: # instr.append(t[0]) # instr.append(t[1]) # elif t[0] not in instr: # index = instr.index(t[1]) # i = 1 # while ord(instr[index-i]) > ord(t[0]): # i = i + 1 # instr.insert(index-i, t[0]) # elif t[1] not in instr: # index = instr.index(t[0]) # i = 1 # while index+i < len(instr) and ord(instr[index+i]) < ord(t[1]): # i = i + 1 # instr.insert(index+i, t[1]) # else: # index0 = instr.index(t[0])
from utils import readLines ids = readLines("ids.txt") letters2 = [] letters3 = [] for id in ids: temp = list(id) in2 = False in3 = False for c in temp: count = temp.count(c) if count == 2 and not in2: in2 = True letters2.append(id) if count == 3 and not in3: in3 = True letters3.append(id) print("With 2: ", len(letters2)) print("With 3: ", len(letters3)) print(len(letters2) * len(letters3)) ids = list(set(letters2 + letters3)) right_ids = [] for i1 in range(len(ids) - 1): id1 = ids[i1] for i2 in range(i1, len(ids)): id2 = ids[i2] diff = 0 for i in range(len(id1)):
import pandas as pd import os import numpy as np from utils import findFiles, readLines, letterToTensor, lineToTensor from tqdm import tqdm data_dir = '../../../data/data' csv_file_name = 'train.csv' if __name__ == '__main__': category_lines = {} all_categories = [] df = pd.DataFrame() #file_names = os.listdir(os.path.join(data_dir, 'names')) for filename in tqdm(findFiles(os.path.join(data_dir, 'names/*.txt')), desc='Creating csv file'): category = os.path.splitext(os.path.basename(filename))[0] #all_categories.append(category) lines = readLines(filename) #category_lines[category] = lines categories = [category for _ in range(len(lines))] for (line, cat) in zip(lines, categories): df = df.append({'name': line, 'target': cat}, ignore_index=True) # shuffle datasets df = df.sample(frac=1).reset_index(drop=True) # save dataframe as csv file df.to_csv(os.path.join(data_dir, csv_file_name), index=False) print(f"[INFO] {csv_file_name} saved to {data_dir}")
from utils import readLines, manahattanDistance3d from operator import itemgetter lines = readLines("nanobots.txt") bots = [] for l in lines: r = int(l.split("r=")[1]) x, y, z =list(map(int, l.split("=<")[1].split(">")[0].split(","))) bots.append((x,y,z,r)) max_r = max(bots,key=itemgetter(3)) inRange = 0 for b in bots: d = manahattanDistance3d(b, max_r) if d <= max_r[3]: inRange = inRange + 1 print (inRange) # part 2 # xmin = min(bots,key=itemgetter(0))[0] # xmax = max(bots,key=itemgetter(0))[0] # ymin = min(bots,key=itemgetter(1))[1] # ymax = max(bots,key=itemgetter(1))[1] # zmin = min(bots,key=itemgetter(2))[2] # zmax = max(bots,key=itemgetter(2))[2] #