Python csv_to_dict示例，lib.cfel_filetools.csv_to_dict Python示例

示例#1

0

显示文件

文件： cheetah-gui.py 项目： tatkeller/cheetah

    def refresh_table(self):
        print('Table refreshed at ', str(datetime.datetime.now()))

        # Button is busy
        self.ui.button_refresh.setEnabled(False)

        # Load the table data
        status = cfel_file.csv_to_dict('crawler.txt')

        # Fix some legacy issues with old crawler.txt file format (different key names in crawler.txt)
        if not 'Run' in status.keys() and '#Run' in status.keys():
            status.update({'Run': status['#Run']})
            status.update({'H5Directory': status['H5 Directory']})
            del status['#Run']
            del status['H5 Directory']
            status['fieldnames'][status['fieldnames'].index('#Run')] = 'Run'
            status['fieldnames'][status['fieldnames'].index(
                'H5 Directory')] = 'H5Directory'

        # Remember table for later use in other functions
        ncols = len(list(status.keys())) - 1
        self.crawler_txt = status

        # Length of first list is number of rows - except when it is the fieldnames list
        #nrows = len(status[list(status.keys())[0]])
        nrows = len(status['Run'])
        self.table.setRowCount(nrows)
        self.table.setColumnCount(ncols)
        self.table.updateGeometry()

        # Populate the table
        numbercols = [0]
        for col, key in enumerate(status['fieldnames']):
            for row, item in enumerate(status[key]):

                #if col in numbercols:
                if item.isnumeric():
                    newitem = PyQt4.QtGui.QTableWidgetItem()
                    newitem.setData(PyQt4.QtCore.Qt.DisplayRole, float(item))
                else:
                    newitem = PyQt4.QtGui.QTableWidgetItem(item)

                self.table.setItem(row, col, newitem)

        # Table fiddling
        #TODO: Make columns resizeable
        self.table.setWordWrap(False)
        self.table.setHorizontalHeaderLabels(status['fieldnames'])
        self.table.verticalHeader().setVisible(False)
        #self.table.resizeColumnsToContents()
        #self.table.horizontalHeader().setSectionResizeMode(PyQt4.QtGui.QHeaderView.Interactive)
        self.table.resizeRowsToContents()
        self.table.show()

        # Button is no longer busy; set timer for next refresh
        self.ui.button_refresh.setEnabled(True)
        self.refresh_timer.start(60000)

示例#2

0

显示文件

文件： cheetah-gui.py 项目： tatkeller/cheetah

    def relabel_dataset(self):

        # Simple dialog box: http: // www.tutorialspoint.com / pyqt / pyqt_qinputdialog_widget.htm
        text, ok = PyQt4.QtGui.QInputDialog.getText(self,
                                                    'Change dataset label',
                                                    'New label:')
        if ok == False:
            return
        newlabel = str(text)
        print('New label is: ', newlabel)

        dataset_csv = cfel_file.csv_to_dict('datasets.csv')

        # Label all selected runs
        runs = self.selected_runs()
        for i, run in enumerate(runs['run']):

            # Format directory string
            olddir = runs['directory'][i]
            newdir = '---'

            if olddir != '---':
                newdir = 'r{:04d}'.format(int(run))
                newdir += '-' + newlabel

            # Update Dataset in table
            table_row = runs['row'][i]
            self.table.setItem(table_row, 1,
                               PyQt4.QtGui.QTableWidgetItem(newlabel))
            self.table.setItem(table_row, 5,
                               PyQt4.QtGui.QTableWidgetItem(newdir))

            # Update dataset file
            if run in dataset_csv['Run']:
                ds_indx = dataset_csv['Run'].index(run)
                dataset_csv['DatasetID'][ds_indx] = newlabel
                dataset_csv['Directory'][ds_indx] = newdir
            else:
                dataset_csv['Run'].append(run)
                dataset_csv['DatasetID'].append(newlabel)
                dataset_csv['Directory'].append(newdir)
                dataset_csv['iniFile'].append('---')

            # Rename the directory
            if olddir != '---':
                cmdarr = [
                    'mv', self.config['hdf5dir'] + '/' + olddir,
                    self.config['hdf5dir'] + '/' + newdir
                ]
                self.spawn_subprocess(cmdarr)

        # Sort dataset file to keep it in order
        # Save datasets file
        keys_to_save = ['Run', 'DatasetID', 'Directory', 'iniFile']
        cfel_file.dict_to_csv('datasets.csv', dataset_csv, keys_to_save)

示例#3

0

显示文件

文件： crawler_merge.py 项目： clydeph/cheetah

def crawler_merge(info):
    #print("Crawler merge")

    if info.datatype is 'XTC' or info.datatype is 'exfel':
        XTCdirToInt = True
    else:
        XTCdirToInt = False

    #
    #   Fix legacy issue with old datasets.txt format the first time we encounter it
    #
    if os.path.exists('datasets.txt') and not os.path.exists('datasets.csv'):
        print('Updating old datasets.txt format to new datasets.csv format')
        oldstyle = cfel_file.csv_to_dict('datasets.txt')

        oldstyle.update({'Run': oldstyle['# Run']})
        oldstyle.update({'iniFile': ['---'] * len(oldstyle['Run'])})
        del oldstyle['# Run']

        keys_to_save = ['Run', 'DatasetID', 'Directory', 'iniFile']
        cfel_file.dict_to_csv('datasets.csv', oldstyle, keys_to_save)

    #
    #   Read .csv files
    #
    data = cfel_file.csv_to_dict('data_status.csv')
    #run,status

    cheetah = cfel_file.csv_to_dict('cheetah_status.csv')
    #run,status,directory,processed,hits,hitrate%

    crystfel = cfel_file.csv_to_dict('crystfel_status.csv')
    #run,status,directory,processed,indexed,indexrate%

    #datasets = cfel_file.csv_to_dict('datasets.txt')
    datasets = cfel_file.csv_to_dict('datasets.csv')
    #Run, DatasetID, Directory, iniFile

    # Check for missing data
    #if data=={} or cheetah=={} or datasets=={}:
    #    return

    #
    # Update for P11:
    #   Run identifier should be a string and not a number, so don't do this conversion at P11
    #   Eventually adopt this at SLAC too...
    #
    # Old: convert r0002 (string) to 2 (integer) so that run is in the same format in each dict
    #   This may disappear later if datasets['run'] is in the same format and we fix the de-referencing elsewhere
    #
    if XTCdirToInt:
        try:
            if data != {}:
                for i, run in enumerate(data['run']):
                    run_num = int(run[1:])
                    data['run'][i] = run_num
        except:
            pass

        try:
            if cheetah != {}:
                for i, run in enumerate(cheetah['run']):
                    run_num = int(run[1:])
                    cheetah['run'][i] = run_num
        except:
            pass

        try:
            if crystfel != {}:
                for i, run in enumerate(crystfel['run']):
                    run_num = int(run[1:])
                    crystfel['run'][i] = run_num
        except:
            pass

        try:
            if datasets != {}:
                for i, run in enumerate(datasets['Run']):
                    #run_num = int(run[1:])
                    run_num = int(run)
                    datasets['Run'][i] = run_num
        except:
            pass

    # Find unique run identifiers
    # (some runs may be missing from some of the tables)
    all_runs = []
    if 'run' in data.keys():
        all_runs += data['run']
    if 'run' in cheetah.keys():
        all_runs += cheetah['run']
    if 'run' in crystfel.keys():
        all_runs += crystfel['run']
    if 'Run' in datasets.keys():
        all_runs += datasets['Run']

    uniq_runs = list(sorted(set(all_runs)))
    #print(uniq_runs)

    # Output should be:
    # Run, Dataset, XTC, Cheetah, CrystFEL, H5 Directory, Nprocessed, Nhits, Nindex, Hitrate%
    run_out = []
    dataset_out = []
    datastatus_out = []
    cheetahstatus_out = []
    crystfel_out = []
    h5dir_out = []
    nprocessed_out = []
    nhits_out = []
    nindexed_out = []
    hitrate_out = []
    inifile_out = []
    calibfile_out = []

    #
    # Loop through all possible runs and collate information
    #   being sensible when data is not in one of the other files
    #
    for run in uniq_runs:

        # Stuff contained in XTC info
        # run,status
        datastatus = '---'
        if data != {}:
            if run in data['run']:
                i = data['run'].index(run)
                datastatus = data['status'][i]

        # Stuff contained in datasets file
        # Run, DatasetID, Directory
        dataset = '---'
        h5dir = '---'
        inifile = '---'
        calibfile = '---'
        if datasets != {}:
            if run in datasets['Run']:
                i = datasets['Run'].index(run)
                dataset = datasets['DatasetID'][i].strip()
                h5dir = datasets['Directory'][i].strip()
                inifile = datasets['iniFile'][i].strip()
                if ('calibFile' in datasets.keys()):
                    calibfile = datasets['calibFile'][i].strip()

        # Stuff contained in Cheetah status file
        # Match on dataset directory (to handle one run having multiple output directories)
        # Check run numbers match to guard against matching '---' entries
        # run,status,directory,processed,hits,hitrate%
        cheetahstatus = '---'
        nprocessed = '---'
        nhits = '---'
        hitrate = '---'
        if cheetah != {}:
            # Use any matches in the directory column (handles multiple directories per run)
            if h5dir in cheetah['directory']:
                i = cheetah['directory'].index(h5dir)
                if cheetah['run'][i] == run:
                    cheetahstatus = cheetah['status'][i].strip()
                    nprocessed = cheetah['processed'][i].strip()
                    nhits = cheetah['hits'][i].strip()
                    hitrate = cheetah['hitrate%'][i].strip()

            # Else fall back to the first directory matching the run number
            elif run in cheetah['run']:
                i = cheetah['run'].index(run)
                cheetahstatus = cheetah['status'][i].strip()
                nprocessed = cheetah['processed'][i].strip()
                nhits = cheetah['hits'][i].strip()
                hitrate = cheetah['hitrate%'][i].strip()

            if hitrate.replace('.', '', 1).isnumeric():
                hitrate = '{:0.2f}'.format(float(hitrate))

        # CrystFEL stuff is not yet included
        crystfel_status = '---'
        indexrate = '---'
        if crystfel != {}:
            # Use any matches in the directory column (handles multiple directories per run)
            if h5dir in crystfel['directory']:
                i = crystfel['directory'].index(h5dir)
                if crystfel['run'][i] == run:
                    crystfel_status = crystfel['status'][i].strip()
                    indexrate = crystfel['indexrate%'][i].strip()
            # Else fall back to the first directory matching the run number
            elif run in crystfel['run']:
                i = crystfel['run'].index(run)
                crystfel_status = crystfel['status'][i].strip()
                indexrate = crystfel['indexrate%'][i].strip()

        # Concatenate info for this run into output list
        run_out.append(run)
        datastatus_out.append(datastatus)
        dataset_out.append(dataset)
        h5dir_out.append(h5dir)
        cheetahstatus_out.append(cheetahstatus)
        nprocessed_out.append(nprocessed)
        nhits_out.append(nhits)
        hitrate_out.append(hitrate)
        crystfel_out.append(crystfel_status)
        nindexed_out.append(indexrate)
        inifile_out.append(inifile)
        calibfile_out.append(calibfile)

    #
    # Output should be:
    # Run, Dataset, XTC, Cheetah, CrystFEL, H5 Directory, , Nhits, Nindex, Hitrate%
    #
    result = {
        'Run': run_out,
        'Dataset': dataset_out,
        'Rawdata': datastatus_out,
        'Cheetah': cheetahstatus_out,
        'CrystFEL': crystfel_out,
        'H5Directory': h5dir_out,
        'Nprocessed': nprocessed_out,
        'Nhits': nhits_out,
        'Nindex': nindexed_out,
        'Hitrate%': hitrate_out,
        'Recipe': inifile_out,
        'Calibration': calibfile_out
    }

    # Write dict to CSV file
    keys_to_save = [
        'Run', 'Dataset', 'Rawdata', 'Cheetah', 'CrystFEL', 'H5Directory',
        'Nprocessed', 'Nhits', 'Nindex', 'Hitrate%', 'Recipe', 'Calibration'
    ]
    cfel_file.dict_to_csv('crawler.txt', result, keys_to_save)

示例#4

0

显示文件

文件： cheetah-gui.py 项目： tatkeller/cheetah

    def run_cheetah(self):

        # Find .ini files for dropdown list
        inifile_list = []
        for file in glob.iglob('../process/*.ini'):
            basename = os.path.basename(file)
            inifile_list.append(basename)
        #inifile_list = ['test1.ini','test2.ini']

        # Info needed for the dialog box
        dialog_info = {
            'inifile_list': inifile_list,
            'lastini': self.lastini,
            'lasttag': self.lasttag
        }
        # Dialog box for dataset label and ini file
        gui, ok = gui_dialogs.run_cheetah_gui.cheetah_dialog(dialog_info)

        # Extract values from return dict
        dataset = gui['dataset']
        inifile = gui['inifile']

        # Exit if cancel was pressed
        if ok == False:
            return

        dataset_csv = cfel_file.csv_to_dict('datasets.csv')

        self.lasttag = dataset
        self.lastini = inifile

        # Process all selected runs
        runs = self.selected_runs()
        for i, run in enumerate(runs['run']):
            print('------------ Start Cheetah process script ------------')
            cmdarr = [self.config['process'], run, inifile, dataset]
            self.spawn_subprocess(cmdarr)

            # Format directory string
            dir = 'r{:04d}'.format(int(run))
            dir += '-' + dataset

            #Update Dataset and Cheetah status in table
            table_row = runs['row'][i]
            self.table.setItem(table_row, 1,
                               PyQt4.QtGui.QTableWidgetItem(dataset))
            self.table.setItem(table_row, 5, PyQt4.QtGui.QTableWidgetItem(dir))
            self.table.setItem(table_row, 3,
                               PyQt4.QtGui.QTableWidgetItem('Submitted'))

            # Update dataset file
            if run in dataset_csv['Run']:
                ds_indx = dataset_csv['Run'].index(run)
                dataset_csv['DatasetID'][ds_indx] = dataset
                dataset_csv['Directory'][ds_indx] = dir
                dataset_csv['iniFile'][ds_indx] = inifile
            else:
                dataset_csv['Run'].append(run)
                dataset_csv['DatasetID'].append(dataset)
                dataset_csv['Directory'].append(dir)
                dataset_csv['iniFile'].append(inifile)
            print('------------ Finish Cheetah process script ------------')

        # Sort dataset file to keep it in order

        # Save datasets file
        keys_to_save = ['Run', 'DatasetID', 'Directory', 'iniFile']
        cfel_file.dict_to_csv('datasets.csv', dataset_csv, keys_to_save)

示例#5

0

显示文件

    def run_cheetah(self):

        # Find .ini files for dropdown list
        inifile_list = []
        for file in glob.iglob('../process/*.ini'):
            basename = os.path.basename(file)
            inifile_list.append(basename)
        #inifile_list = ['test1.ini','test2.ini']

        # Info needed for the dialog box
        dialog_info = {
            'inifile_list': inifile_list,
            'lastini': self.lastini,
            'lastcalib': self.lastcalib,
            'lasttag': self.lasttag
        }
        # Dialog box for dataset label and ini file
        gui, ok = gui_dialogs.run_cheetah_gui.cheetah_dialog(dialog_info)

        # Exit if cancel was pressed
        if ok == False:
            return

        # Extract values from return dict
        dataset = gui['dataset']
        inifile = gui['inifile']
        calibfile = gui['calibfile']
        self.lasttag = dataset
        self.lastini = inifile
        self.lastcalib = calibfile

        try:
            dataset_csv = cfel_file.csv_to_dict('datasets.csv')
        except:
            print('Error occured reading datasets.csv (blank file?)')
            print('Check file contents.  Will return and do nothing.')
            return

        # Failing to read the dataset file looses all information (bad)
        if len(dataset_csv['DatasetID']) is 0:
            print("Error reading datasets.csv (blank file)")
            print("Try again...")
            return

        if 'calibFile' not in dataset_csv.keys():
            print('Adding calibFile to datasets.csv')
            dataset_csv['calibFile'] = dataset_csv['iniFile']

        # Process all selected runs
        runs = self.selected_runs()
        for i, run in enumerate(runs['run']):
            print('------------ Start Cheetah process script ------------')
            cmdarr = [self.config['process'], run, inifile, calibfile, dataset]
            cfel_file.spawn_subprocess(cmdarr, shell=True)

            # Format output directory string
            # This clumsily selects between using run numbers and using directory names
            # Need to fix this up sometime
            print("Location: ", self.compute_location['location'])
            if 'LCLS' in self.compute_location['location']:
                dir = 'r{:04d}'.format(int(run))
            elif 'max-exfl' in self.compute_location['location']:
                dir = 'r{:04d}'.format(int(run))
            elif 'max-cfel' in self.compute_location['location']:
                dir = 'r{:04d}'.format(int(run))
            else:
                dir = run
            dir += '-' + dataset
            print('Output directory: ', dir)

            #Update Dataset and Cheetah status in table
            table_row = runs['row'][i]
            self.table.setItem(table_row, 1,
                               PyQt5.QtWidgets.QTableWidgetItem(dataset))
            self.table.setItem(table_row, 3,
                               PyQt5.QtWidgets.QTableWidgetItem('Submitted'))
            self.table.setItem(table_row, 5,
                               PyQt5.QtWidgets.QTableWidgetItem(dir))

            self.table.setItem(table_row, 10,
                               PyQt5.QtWidgets.QTableWidgetItem(inifile))
            self.table.setItem(table_row, 11,
                               PyQt5.QtWidgets.QTableWidgetItem(calibfile))

            self.table.item(table_row,
                            3).setBackground(PyQt5.QtGui.QColor(255, 255, 100))

            # Update dataset file
            if run in dataset_csv['Run']:
                ds_indx = dataset_csv['Run'].index(run)
                dataset_csv['DatasetID'][ds_indx] = dataset
                dataset_csv['Directory'][ds_indx] = dir
                dataset_csv['iniFile'][ds_indx] = inifile
                dataset_csv['calibFile'][ds_indx] = calibfile
            else:
                dataset_csv['Run'].append(run)
                dataset_csv['DatasetID'].append(dataset)
                dataset_csv['Directory'].append(dir)
                dataset_csv['iniFile'].append(inifile)
                dataset_csv['calibFile'].append(calibfile)
            print('------------ Finish Cheetah process script ------------')

        # Sort dataset file to keep it in order

        # Save datasets file
        keys_to_save = [
            'Run', 'DatasetID', 'Directory', 'iniFile', 'calibFile'
        ]
        cfel_file.dict_to_csv('datasets.csv', dataset_csv, keys_to_save)