Пример #1
0
def promptUserForInput():
    simulation_to_run = input(
        "-------Main Menu-------\n"
        "Choose your task:\n"
        "\t0: Analysis of cell lines\n"
        "\t1: Convert MATLAB to CSV file\n"
        "\t2: Dr.S Analysis (Drug Recommendations System)\n"
        "\tQ: Quit\n")

    option_as_int = SafeCastUtil.safeCast(simulation_to_run, int)
    option_as_string = SafeCastUtil.safeCast(simulation_to_run, str, "Q")

    if option_as_string == "Q":
        return
    elif option_as_int == 0:
        input_folder = recursivelyPromptUser("Enter path of input folder:\n",
                                             str)
        runMainCellLineAnalysis(input_folder)
    elif option_as_int == 1:
        matlab_files_directory = recursivelyPromptUser(
            "Enter folder path of the matlab files:\n", str)
        FileConverter.convertMatLabToCSV(matlab_files_directory)
    elif option_as_int == 2:
        input_folder = recursivelyPromptUser(
            "Enter folder path of the input folder:\n", str)
        fetchRecommendations(input_folder)
Пример #2
0
def main():
    arguments = sys.argv[1:]
    if len(arguments) == 0:
        promptUserForInput()
    elif len(arguments) == 2 and arguments[0] == '0':
        runMainCellLineAnalysis(arguments[1])
    elif len(arguments) == 2 and arguments[0] == '1':
        FileConverter.convertMatLabToCSV(arguments[1])
    elif len(arguments) == 2 and arguments[0] == '2':
        fetchRecommendations(arguments[1])
    else:
        log.error("Exiting program, invalid data sent in target folder.")
    return
Пример #3
0
 def testMatlabFileConversionProperlyFormatsMatrices(self):
     FileConverter.convertMatLabToCSV(self.input_folder)
     for generated_csv in [
             file for file in os.listdir(self.createdFolder)
             if ".csv" in file
     ]:
         with open(self.createdFolder + "/" + generated_csv) as csv:
             try:
                 for line in csv:
                     assert "['" not in line
                     assert "']" not in line
             except ValueError as valueError:
                 self.log.error(valueError)
             finally:
                 csv.close()
Пример #4
0
 def __DownloadTargetTransfers(self):
     """
     * Pull all targeted XML transfer configs from 
     gsfts url.
     """
     movebutton = self.__driver.find_element_by_xpath(
         '//*[@id="next_xferpager"]/span')
     pageindicator = self.__driver.find_element_by_xpath(
         '//*[@id="xferpager_center"]/table/tbody/tr/td[4]')
     currpage = 1
     maxpage = int(pageindicator.find_element_by_id('sp_1_xferpager').text)
     while currpage <= maxpage:
         elems = [
             elem for elem in self.__driver.find_elements_by_tag_name('tr')
             if elem.get_attribute('role') == 'row'
         ]
         for elem in elems:
             cells = elem.find_elements_by_tag_name('td')
             if self.__targetregex.match(cells[1].text):
                 # Click button to download file to temporary location:
                 a = cells[9].find_element_by_tag_name('a')
                 a.find_element_by_tag_name("span").click()
         # Proceed to next page:
         movebutton.click()
         movebutton = self.__driver.find_element_by_xpath(
             '//*[@id="next_xferpager"]/span')
         currpage += 1
     # Wait so all files are downloaded:
     sleep(3)
     self.__driver.close()
     self.__driver = None
     # Get all paths to downloaded xml files:
     self.__paths = FileConverter.GetAllFilePaths(
         self.__downloaddir, FileTransferServiceAggregator.__xferFileSig)
 def GetDataAttributes(self,
                       folder_path,
                       fileExp,
                       dateFormat,
                       sheets=None,
                       delim=None,
                       recursive=False,
                       skiprows=None):
     """
     * Get all column attributes for files matching expression at path.
     Inputs:
     * folder_path: String to folder containing files.
     * fileExp: regex string corresponding to files representing data source.
     Optional:
     * sheets: Sheets to use if using xls/xlsx file (will create one ETL/table definition per sheet).
     * delim: String delimiter used in csv file.
     * recursive: Search for all folders within folder to find matching files.
     """
     errs = []
     if not isinstance(folder_path, str):
         errs.append("path must be a string.")
     if not dateFormat is None:
         if not isinstance(dateFormat, dict):
             errs.append(
                 "dateFormat must be a dictionary with keys ['regex', 'dateformat']."
             )
         elif not 'regex' in dateFormat and 'dateformat' not in dateFormat:
             errs.append(
                 "dateFormat must have 'regex' and 'dateformat' keys.")
     if not isinstance(fileExp, (DataColumnAttributes.__regType, str)):
         errs.append(
             "fileExp must be a regular expression object or string regular expression."
         )
     elif isinstance(fileExp, str):
         if not CheckRegex(fileExp):
             errs.append('fileExp must be valid regular expression.')
         else:
             fileExp = re.compile(fileExp)
     if not sheets is None and not isinstance(sheets, list):
         errs.append('sheets must be a list if provided.')
     if errs:
         raise Exception("\n".join(errs))
     self.__hasuniques = {sheet: False
                          for sheet in sheets
                          } if not sheets is None else self.__hasuniques
     self.__sheets = sheets
     self.__dateFormat = dateFormat
     # Get all files that match data file expression at provided path if not supplied:
     filePaths = FileConverter.GetAllFilePaths(path, fileExp, recursive)
     if len(filePaths) == 0:
         raise Exception(
             'Could not find any matching files matching regex.')
     # Get column attributes of all target files:
     self.__GetAllColumnAttributes(filePaths)
     # Generate single column definition that has least restrictive types:
     self.__AssignLeastRestrictive()
Пример #6
0
 def __GetChromeDriverPaths(self, chromedriverpath):
     """
     * Get all chromedriver.exe versions stored
     locally.
     """
     folder, file = os.path.split(chromedriverpath)
     folder, folderRE = os.path.split(folder)
     folderRE = re.compile(folderRE)
     folders = FileConverter.GetAllFolderPaths(folder, folderRE)
     self.__chromedriverpaths = [
         os.path.join(folder, file) for folder in folders
     ]
Пример #7
0
 def __FindIssues(self, servicelogfolder):
     """
     * Find all etl issues that occurred.
     """
     # Find matching files:
     self.__data = DynamicETLServiceIssueParser.__dataDict
     files = FileConverter.GetAllFilePaths(
         servicelogfolder, DynamicETLServiceIssueParser.__logfileSig)
     for file in files:
         with open(files[file], 'r') as f:
             groups = DynamicETLServiceIssueParser.__GroupAllJobs(f)
             for jobkey in groups:
                 self.__DetermineIssues(jobkey, groups[jobkey])
     self.__data = DataFrame(self.__data).sort_values('TimeStamp',
                                                      ascending=False)
 def __GetAllMatchingFiles(self, datafolder, fileregex):
     """
     * Return all files matching regular expression for use
     in ETL.
     """
     return FileConverter.GetAllFilePaths(datafolder, fileregex)
Пример #9
0
 def GetDataAttributes(self,
                       path,
                       fileExp,
                       dateFormat=None,
                       filePaths=None,
                       sheets=None,
                       delim=None,
                       recursive=False,
                       skiprows=None):
     """
     * Get all column attributes in files at path or at provided paths.
     Inputs:
     * path: String to folder.
     * dateFormat: Regex string for file dates.
     Optional:
     * fileExp: Regular expressions to select files or None. If not supplied then all files in folder will
     be chosen.
     * filePaths: Dictionary mapping { FileName -> Path }.
     * sheets: Sheets to use if using xls/xlsx file (will create one ETL/table definition per sheet).
     * delim: String delimiter used in csv file.
     * recursive: Search for all folders within folder to find matching files.
     """
     errs = []
     if not isinstance(path, str):
         errs.append("path must be a string.")
     if not dateFormat is None:
         if not isinstance(dateFormat, dict):
             errs.append(
                 "dateFormat must be a dictionary with keys ['regex', 'dateformat']."
             )
         elif not 'regex' in dateFormat and 'dateformat' not in dateFormat:
             errs.append(
                 "dateFormat must have 'regex' and 'dateformat' keys.")
     if fileExp and not isinstance(fileExp, DataColumnAttributes.__regType):
         errs.append(
             "fileExp must be a regular expression object, or None.")
     if not filePaths is None and not isinstance(filePaths, dict):
         errs.append(
             'filePaths must be a dictionary mapping { FileName -> Path } or None.'
         )
     if not sheets is None and not isinstance(sheets, list):
         errs.append('sheets must be a list if provided.')
     if errs:
         raise Exception("\n".join(errs))
     self.__hasuniques = {sheet: False
                          for sheet in sheets
                          } if not sheets is None else self.__hasuniques
     self.__sheets = sheets
     self.__dateFormat = dateFormat
     # Get all files that match data file expression at provided path if not supplied:
     if filePaths is None:
         filePaths = FileConverter.GetAllFilePaths(path, fileExp, recursive)
         if len(filePaths) == 0:
             raise Exception(
                 'Could not find any matching files matching regex.')
     # Get column attributes of all target files:
     for file in filePaths:
         path = filePaths[file]
         if self.__sheets is None:
             self.__ExtractFile(path, delim, skiprows)
         else:
             self.__ExtractAllSheets(path, skiprows)
     self.__filepaths = set([filePaths[key] for key in filePaths])
     # Determine if columns have changed:
     prevAttrs = None
     if len(self.__dateToAttrs) > 1 and self.__sheets is None:
         for dt in self.__dateToAttrs:
             currAttrs = self.__dateToAttrs[dt]
             if not prevAttrs is None and currAttrs != prevAttrs:
                 self.__columnChgDates[
                     currAttrs.FileDate] = currAttrs - prevAttrs
             prevAttrs = currAttrs
     elif len(self.__dateToAttrs) > 1 and not self.__sheets is None:
         # Determine if columns have changed for each sheet:
         for dt in self.__dateToAttrs:
             prevAttrs = None
             for sheetname in self.__dateToAttrs[dt]:
                 currAttrs = self.__dateToAttrs[dt][sheetname]
                 if not prevAttrs is None and currAttrs != prevAttrs:
                     self.__columnChgDates[currAttrs.FileDate][
                         sheetname] = currAttrs - prevAttrs
                 prevAttrs = currAttrs