def promptUserForInput(): simulation_to_run = input( "-------Main Menu-------\n" "Choose your task:\n" "\t0: Analysis of cell lines\n" "\t1: Convert MATLAB to CSV file\n" "\t2: Dr.S Analysis (Drug Recommendations System)\n" "\tQ: Quit\n") option_as_int = SafeCastUtil.safeCast(simulation_to_run, int) option_as_string = SafeCastUtil.safeCast(simulation_to_run, str, "Q") if option_as_string == "Q": return elif option_as_int == 0: input_folder = recursivelyPromptUser("Enter path of input folder:\n", str) runMainCellLineAnalysis(input_folder) elif option_as_int == 1: matlab_files_directory = recursivelyPromptUser( "Enter folder path of the matlab files:\n", str) FileConverter.convertMatLabToCSV(matlab_files_directory) elif option_as_int == 2: input_folder = recursivelyPromptUser( "Enter folder path of the input folder:\n", str) fetchRecommendations(input_folder)
def main(): arguments = sys.argv[1:] if len(arguments) == 0: promptUserForInput() elif len(arguments) == 2 and arguments[0] == '0': runMainCellLineAnalysis(arguments[1]) elif len(arguments) == 2 and arguments[0] == '1': FileConverter.convertMatLabToCSV(arguments[1]) elif len(arguments) == 2 and arguments[0] == '2': fetchRecommendations(arguments[1]) else: log.error("Exiting program, invalid data sent in target folder.") return
def testMatlabFileConversionProperlyFormatsMatrices(self): FileConverter.convertMatLabToCSV(self.input_folder) for generated_csv in [ file for file in os.listdir(self.createdFolder) if ".csv" in file ]: with open(self.createdFolder + "/" + generated_csv) as csv: try: for line in csv: assert "['" not in line assert "']" not in line except ValueError as valueError: self.log.error(valueError) finally: csv.close()
def __DownloadTargetTransfers(self): """ * Pull all targeted XML transfer configs from gsfts url. """ movebutton = self.__driver.find_element_by_xpath( '//*[@id="next_xferpager"]/span') pageindicator = self.__driver.find_element_by_xpath( '//*[@id="xferpager_center"]/table/tbody/tr/td[4]') currpage = 1 maxpage = int(pageindicator.find_element_by_id('sp_1_xferpager').text) while currpage <= maxpage: elems = [ elem for elem in self.__driver.find_elements_by_tag_name('tr') if elem.get_attribute('role') == 'row' ] for elem in elems: cells = elem.find_elements_by_tag_name('td') if self.__targetregex.match(cells[1].text): # Click button to download file to temporary location: a = cells[9].find_element_by_tag_name('a') a.find_element_by_tag_name("span").click() # Proceed to next page: movebutton.click() movebutton = self.__driver.find_element_by_xpath( '//*[@id="next_xferpager"]/span') currpage += 1 # Wait so all files are downloaded: sleep(3) self.__driver.close() self.__driver = None # Get all paths to downloaded xml files: self.__paths = FileConverter.GetAllFilePaths( self.__downloaddir, FileTransferServiceAggregator.__xferFileSig)
def GetDataAttributes(self, folder_path, fileExp, dateFormat, sheets=None, delim=None, recursive=False, skiprows=None): """ * Get all column attributes for files matching expression at path. Inputs: * folder_path: String to folder containing files. * fileExp: regex string corresponding to files representing data source. Optional: * sheets: Sheets to use if using xls/xlsx file (will create one ETL/table definition per sheet). * delim: String delimiter used in csv file. * recursive: Search for all folders within folder to find matching files. """ errs = [] if not isinstance(folder_path, str): errs.append("path must be a string.") if not dateFormat is None: if not isinstance(dateFormat, dict): errs.append( "dateFormat must be a dictionary with keys ['regex', 'dateformat']." ) elif not 'regex' in dateFormat and 'dateformat' not in dateFormat: errs.append( "dateFormat must have 'regex' and 'dateformat' keys.") if not isinstance(fileExp, (DataColumnAttributes.__regType, str)): errs.append( "fileExp must be a regular expression object or string regular expression." ) elif isinstance(fileExp, str): if not CheckRegex(fileExp): errs.append('fileExp must be valid regular expression.') else: fileExp = re.compile(fileExp) if not sheets is None and not isinstance(sheets, list): errs.append('sheets must be a list if provided.') if errs: raise Exception("\n".join(errs)) self.__hasuniques = {sheet: False for sheet in sheets } if not sheets is None else self.__hasuniques self.__sheets = sheets self.__dateFormat = dateFormat # Get all files that match data file expression at provided path if not supplied: filePaths = FileConverter.GetAllFilePaths(path, fileExp, recursive) if len(filePaths) == 0: raise Exception( 'Could not find any matching files matching regex.') # Get column attributes of all target files: self.__GetAllColumnAttributes(filePaths) # Generate single column definition that has least restrictive types: self.__AssignLeastRestrictive()
def __GetChromeDriverPaths(self, chromedriverpath): """ * Get all chromedriver.exe versions stored locally. """ folder, file = os.path.split(chromedriverpath) folder, folderRE = os.path.split(folder) folderRE = re.compile(folderRE) folders = FileConverter.GetAllFolderPaths(folder, folderRE) self.__chromedriverpaths = [ os.path.join(folder, file) for folder in folders ]
def __FindIssues(self, servicelogfolder): """ * Find all etl issues that occurred. """ # Find matching files: self.__data = DynamicETLServiceIssueParser.__dataDict files = FileConverter.GetAllFilePaths( servicelogfolder, DynamicETLServiceIssueParser.__logfileSig) for file in files: with open(files[file], 'r') as f: groups = DynamicETLServiceIssueParser.__GroupAllJobs(f) for jobkey in groups: self.__DetermineIssues(jobkey, groups[jobkey]) self.__data = DataFrame(self.__data).sort_values('TimeStamp', ascending=False)
def __GetAllMatchingFiles(self, datafolder, fileregex): """ * Return all files matching regular expression for use in ETL. """ return FileConverter.GetAllFilePaths(datafolder, fileregex)
def GetDataAttributes(self, path, fileExp, dateFormat=None, filePaths=None, sheets=None, delim=None, recursive=False, skiprows=None): """ * Get all column attributes in files at path or at provided paths. Inputs: * path: String to folder. * dateFormat: Regex string for file dates. Optional: * fileExp: Regular expressions to select files or None. If not supplied then all files in folder will be chosen. * filePaths: Dictionary mapping { FileName -> Path }. * sheets: Sheets to use if using xls/xlsx file (will create one ETL/table definition per sheet). * delim: String delimiter used in csv file. * recursive: Search for all folders within folder to find matching files. """ errs = [] if not isinstance(path, str): errs.append("path must be a string.") if not dateFormat is None: if not isinstance(dateFormat, dict): errs.append( "dateFormat must be a dictionary with keys ['regex', 'dateformat']." ) elif not 'regex' in dateFormat and 'dateformat' not in dateFormat: errs.append( "dateFormat must have 'regex' and 'dateformat' keys.") if fileExp and not isinstance(fileExp, DataColumnAttributes.__regType): errs.append( "fileExp must be a regular expression object, or None.") if not filePaths is None and not isinstance(filePaths, dict): errs.append( 'filePaths must be a dictionary mapping { FileName -> Path } or None.' ) if not sheets is None and not isinstance(sheets, list): errs.append('sheets must be a list if provided.') if errs: raise Exception("\n".join(errs)) self.__hasuniques = {sheet: False for sheet in sheets } if not sheets is None else self.__hasuniques self.__sheets = sheets self.__dateFormat = dateFormat # Get all files that match data file expression at provided path if not supplied: if filePaths is None: filePaths = FileConverter.GetAllFilePaths(path, fileExp, recursive) if len(filePaths) == 0: raise Exception( 'Could not find any matching files matching regex.') # Get column attributes of all target files: for file in filePaths: path = filePaths[file] if self.__sheets is None: self.__ExtractFile(path, delim, skiprows) else: self.__ExtractAllSheets(path, skiprows) self.__filepaths = set([filePaths[key] for key in filePaths]) # Determine if columns have changed: prevAttrs = None if len(self.__dateToAttrs) > 1 and self.__sheets is None: for dt in self.__dateToAttrs: currAttrs = self.__dateToAttrs[dt] if not prevAttrs is None and currAttrs != prevAttrs: self.__columnChgDates[ currAttrs.FileDate] = currAttrs - prevAttrs prevAttrs = currAttrs elif len(self.__dateToAttrs) > 1 and not self.__sheets is None: # Determine if columns have changed for each sheet: for dt in self.__dateToAttrs: prevAttrs = None for sheetname in self.__dateToAttrs[dt]: currAttrs = self.__dateToAttrs[dt][sheetname] if not prevAttrs is None and currAttrs != prevAttrs: self.__columnChgDates[currAttrs.FileDate][ sheetname] = currAttrs - prevAttrs prevAttrs = currAttrs