示例#1
0
def post_gentemplate_cleanup(argsdict):
    # this portion of the above function has been separated to allow for individual testing.

    # normally, we combine chunks, but in the case of styles generation, this is not needed except for roismap.

    logs.sts("gentemplates_by_tasklists completed.\n", 3)
    
    #import pdb; pdb.set_trace()

    if argsdict['include_maprois']:
        #styles_completed = DB.list_subdirs_with_filepat('styles', file_pat=r'\.json$', s3flag=None)
        #attempted_but_failed_styles = [s for s in styles_on_input if s not in styles_completed]

        logs.sts("Combining roismap for each style into a single .csv file.", 3)
        DB.combine_dirname_chunks(dirname='styles', subdir="roismap", dest_name='roismap.csv', file_pat=r'_roismap\.csv')

        good_map_num = logs.get_and_merge_s3_logs(dirname='styles', rootname='map_report', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs_good_maps')
        fail_map_num = logs.get_and_merge_s3_logs(dirname='styles', rootname='map_report', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs_failed_maps')
        
        logs.sts(f"{good_map_num} styles successfully mapped; {fail_map_num} styles did not fully map.", 3)
    
    # style logs are placed in one folder in styles
    # logs are like exc_11010_styles_chunk_84.txt
    # downloads file_pat=fr"{rootname}_{chunk_pat}\.txt"
    logs.get_and_merge_s3_logs(dirname='styles', rootname='log', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs')
    logs.get_and_merge_s3_logs(dirname='styles', rootname='exc', chunk_pat=r'\d+_styles_chunk_\d+', subdir='logs')
示例#2
0
def save_style_ballot_images(ballots: list, style_num):
    for ballot in ballots:
        utils.sts(f"Saving images for ballot {ballot.ballotdict['ballot_id']}",
                  3)

        DB.save_data_list(data_list=ballot.ballotimgdict['images'],
                          dirname='styles',
                          name=ballot.ballotdict['ballot_id'],
                          format='.png',
                          subdir=style_num)
    def save_ballot_images(self):
        """Method to save ballot as an image (JPG) file.
            This saves them by precinct rather than by style.
        """

        DB.save_data_list(data_list=self.ballotimgdict['images'],
                          dirname='styles',
                          name=self.ballotdict['ballot_id'],
                          format='.png',
                          subdir=self.ballotdict['precinct'])
    def save_ballot(self):
        """
        Saves ballot data to JSON file. It coverts ballot attributes
        to a dictionary on its own with 'get_ballot_data' helper or
        use passed 'data' dictionary.
        """

        DB.save_data(data_item=self.ballotdict,
                     dirname='results',
                     name=self.ballotdict['ballot_id'] + '.json',
                     subdir=self.ballotdict['precinct'])
示例#5
0
    def load_excel_to_df(argsdict: dict, filename_list: list,
                         column_names_list: list):
        """
        Reads a CVR excel file and saves it as a pandas data frame.
        Combines multiple CVR files and assumes columns are identical.
        Renames unnamed columns by duplicating last column name.
        This is specific to ES&S cvr files.
        """
        for idx, file_name in enumerate(filename_list):
            utils.sts(f"Reading cvr file {file_name}...")
            if not idx:
                #CVR.data_frame = pd.read_excel(file, engine='xlrd')
                CVR.data_frame = DB.load_data(dirname='archives',
                                              name=file_name,
                                              user_format=True)
            else:
                # df = pd.read_excel(file, engine='xlrd')
                df = DB.load_data(dirname='archives',
                                  name=file_name,
                                  user_format=True)
                CVR.data_frame = CVR.data_frame.append(df, ignore_index=True)

        if argsdict.get('convert_cvr_image_cells_to_writein', False):
            CVR.set_cells_with_images_to_writeins(argsdict['cvr'])

        if column_names_list:
            utils.sts(
                "replacing column names with replacement column names provided."
            )
            # use the replacement column headers instead of those provided.
            orig_col_names = CVR.data_frame.columns
            if not len(orig_col_names) == len(column_names_list):
                utils.sts(
                    "replacement column headers not right length to replace header names in CVR"
                )
                sys.exit(1)
            # we will replace any "blank" col names with "Unnamed: XXX" so we can remove them later.
            for i, orig_col_name in enumerate(orig_col_names):
                if re.match(r'Unnamed:', orig_col_name):
                    column_names_list[i] = orig_col_name
            CVR.data_frame.columns = column_names_list

        utils.sts("Checking for duplicate column names.")
        # at this point, there should be no duplicate column names.
        column_name_set = len(set(CVR.data_frame.columns))
        column_name_list = len(list(CVR.data_frame.columns))
        if not column_name_set == column_name_list:
            utils.sts("Column Names are duplicated")
            sys.exit(1)

        utils.sts(
            "Replacing columns with 'Unnamed' with prior named column name.")
        CVR.data_frame.columns = CVR.rename_unnamed(
            list(CVR.data_frame.columns))
def combine_dirname_chunks_each_archive(argsdict, dirname):
    """ combine all the chunks in a specific dirname into {archive_rootname}_{dirname}.csv files, one per archive.
        Do this in the dirname folder.
    """

    for archive_idx, source in enumerate(argsdict['source']):
        archive_rootname = os.path.splitext(os.path.basename(source))[0]
        DB.combine_dirname_chunks(
            dirname=dirname,
            subdir='chunks',
            dest_name=f"{archive_rootname}_{dirname}.csv",
            file_pat=fr"{archive_rootname}_{dirname}_chunk_\d+\.csv")
def accept_delegation_task_chunk(request_id, task_args):
    """ This is a locally callable function to allow debugging.
        right after args are unpacked.
    """
    args.argsdict = argsdict = task_args['argsdict']

    argsdict['on_lambda'] = True
    DB.set_DB_mode()
    chunk_name = task_args['chunk_name']
    #dirname         = task_args['dirname']
    #subdir          = task_args['subdir']
    job_name = argsdict['job_name']

    # we must be aware that lambdas are not fully initialized prior to use. If one lambda finishes its work of the same kind, and
    # another is started, the state in that lambda is indeterminate, but we have found that files and data structures may still
    # exist.

    # no need to report that the Lambda is 'Running' -- we already know that.
    # LambdaTracker.lambda_report_status(task_args, request_id, status='Running')

    try:
        launch_task(task_args, s3flag=True)

        # pylint: disable=broad-except
        # We need to catch broad exception.
    except Exception as err:
        error_info = {
            'error_type': err.__class__.__name__,
            'error_message': repr(err),
            'error_stack': traceback.format_tb(err.__traceback__),
            'task_args': task_args,
        }
        LambdaTracker.lambda_report_status(task_args,
                                           request_id,
                                           status="Failed",
                                           error_info=error_info)
        msg = f"{job_name} Failed"
    else:
        LambdaTracker.lambda_report_status(task_args,
                                           request_id,
                                           status='Completed')
        error_info = None
        msg = f"{job_name} Completed"

    return {
        'body':
        json.dumps({
            'msg': msg,
            'error_info': error_info,
            'chunk_name': chunk_name,
        })
    }
    def save_ballot_pdf(self):
        """Extracts ballot pdf file to be able to view it in the web browser.
            This appears to be unused.
        
        """
        precinct = self.ballotdict['precinct']
        ballot_id = self.ballotdict['ballot_id']
        pdf_file = self.ballotimgdict['pdf_file']

        DB.save_data(data_item=pdf_file.get('bytes_array'),
                     dirname='disagreements',
                     name=f'{ballot_id}.pdf',
                     format='.pdf',
                     subdir=precinct)
示例#9
0
 def getByLogin(self, login):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('SELECT * FROM clientes WHERE login = %s', (login))
         for linha in c:
             self.id = linha[0]
             self.login = linha[1]
             self.senha = linha[2]
             self.grupo = linha[3]
             self.nome = linha[4]
             self.endereco = linha[5]
             self.numero = linha[6]
             self.observacao = linha[7]
             self.cep = linha[8]
             self.bairro = linha[9]
             self.cidade = linha[10]
             self.estado = linha[11]
             self.telefone = linha[12]
             self.email = linha[12]
         c.close()
         if not self.id:
             return 'Usuário não encontrado!'
         return 'Busca feita com sucesso!'
     except:
         return 'Ocorreu um erro na busca do usuário'
示例#10
0
    def set_cells_with_images_to_writeins(file_paths):
        """Reads CVR spreadsheet as a ZIP and extracts information from
        the .xml file about the cells that have images in them.
        Then sets null cells in CVR data frame to write-in, if the cell
        has an image within.
        :param file_path: Path to the CVR file.
        @TODO: Need to fix for s3 operation.
                probably first download the file and then perform the work.
        """
        dirpath = DB.dirpath_from_dirname('archives')
        if dirpath.startswith('s3'):
            utils.sts("Cannot convert images to writeins on s3")
            sys.exit(1)

        if isinstance(file_paths, str):
            file_paths = [file_paths]
        for file_path in file_paths:
            archive = ZipFile(file_path, 'r')
            xml_path = 'xl/drawings/drawing1.xml'
            try:
                xml_file = archive.read(xml_path)
            except KeyError:
                utils.sts(f'Couldn\'t find {xml_path} in {file_path}')
                break
            doc = xml.dom.minidom.parseString(xml_file.decode())
            for cellAnchorElement in doc.getElementsByTagName(
                    'xdr:twoCellAnchor'):
                fromElement = cellAnchorElement.getElementsByTagName(
                    'xdr:from')[0]
                row = fromElement.getElementsByTagName(
                    'xdr:row')[0].firstChild.data
                col = fromElement.getElementsByTagName(
                    'xdr:col')[0].firstChild.data
                CVR.data_frame.iat[int(row) - 1, int(col)] = 'write-in:'
示例#11
0
def load_bof_df(argsdict):
    """returns conversions for ballot options.
    This function implements the Ballot Options File (BOF)
    """
    bof_columns = [
        'official_contest_name',
        # official contest name used as a means to look up the ballot option.
        'official_option',
        # one option per record used as a second index to look up the ballot option
        'ballot_option',
        # ballot options as shown on the ballot, and only provided if the ballot
        # option differs from the official option.
    ]
    bof_filename = argsdict.get('bof')
    if not bof_filename:
        return None

    bof_df = DB.load_data(dirname='EIFs',
                          name=bof_filename,
                          silent_error=False,
                          user_format=True)

    bof_df = check_table(bof_df,
                         table_name=bof_filename,
                         required_columns_list=bof_columns,
                         strip_cols=bof_columns)

    utils.sts(f"BOF {bof_filename} loaded.")
    return bof_df
def read_settings_csv_file(dirname, name, argspecs_dod, name_field='name', value_field='value'):
    """ reads settings with columns name_field and value_field into dict[name] = value
    """

    inputdict = {}  
    error_flag = False
    if not name:
        return {}

    print(f"Input file specified. Reading input from file '{name}'...")
    
    # need to be able to load from s3 or local.
    settings_df = DB.load_data(dirname='input_files', name=name, format='.csv', user_format=True, s3flag=False)
    
    settings_lod = settings_df.to_dict(orient='records')
    
    for setting_dict in settings_lod:
        name = setting_dict[name_field].strip(' ')
        
        if name not in argspecs_dod:
            print (f"{name_field} '{name}' not supported.")
            error_flag = True
            continue
            
        add_value_of_type(
            inputdict, 
            name=name, 
            spec_type=argspecs_dod[name]['type'], 
            valstr=setting_dict[value_field]
            )
            
    if error_flag:
        sys.exit(1)
            
    return inputdict
def get_dirname_results(dirname, s3flag=None):
    """ return list of s3paths or file_paths to result files, one per archive.
    """

    file_pat=f".*_{dirname}\\.csv"
    file_paths = DB.list_filepaths_in_dirname_filtered(dirname, file_pat=file_pat, s3flag=s3flag)
    return file_paths
示例#14
0
def get_replacement_cvr_header(argsdict: dict) -> list:
    """
    :param args_dict: Dict of arguments passed on script input.
    """
    utils.sts("Loading EIF...", 3)

    eif_filename = argsdict.get('eif')

    eif_df = DB.load_data(dirname='EIFs', name=eif_filename, user_format=True)

    eif_df = check_table(eif_df,
                         table_name=eif_filename,
                         required_columns_list=EIF_REQUIRED_COLS,
                         strip_cols=EIF_STRIP_COLS)

    cvr_replacement_header_list = list(eif_df['official_contest_name'])
    expected_initial_cvr_cols = argsdict.get(
        'initial_cvr_cols', ['Cast Vote Record', 'Precinct', 'Ballot Style'])
    if not all(item in cvr_replacement_header_list
               for item in expected_initial_cvr_cols):
        expected_cols = ','.join(expected_initial_cvr_cols)
        utils.sts(
            f"ERROR: CVR does not have the expected fields in the header {expected_cols}",
            0)
        sys.exit(1)
    return cvr_replacement_header_list
def merge_csv_dirname_local(dirname,
                            subdir,
                            dest_name,
                            dest_dirname=None,
                            file_pat=None):
    """ merge all csv files in local dirname meeting file_pat into one to dest_name
        uses header line from first file, discards header is subsequent files.
        all csv files must have the same format.
    """

    if dest_dirname is None: dest_dirname = dirname

    sts(f"Merging csv from {dirname} to {dest_dirname}/{dest_name}", 3)

    src_dirpath = DB.dirpath_from_dirname(dirname, subdir=subdir, s3flag=False)
    dest_dirpath = DB.dirpath_from_dirname(dest_dirname, s3flag=False)
    destpath = os.path.join(dest_dirpath, dest_name)

    first_pass = True
    infilepath_list = glob.glob(f"{src_dirpath}*.csv")

    for idx, infilepath in enumerate(infilepath_list):
        basename = os.path.basename(infilepath)
        if file_pat is not None and not re.search(file_pat, basename):
            # skip any files that are not the lambda download format, including the one being built
            continue
        if infilepath == destpath:
            # make sure we are not appending dest to itself.
            continue
        #sts(f"Appending result #{idx} from {infilepath}", 3)
        if first_pass:
            shutil.copyfile(infilepath, destpath)
            # first file just copy to new name
            fa = open(destpath, 'a+', encoding="utf8")
            first_pass = False
            continue
        # the rest of the chunks, first strip header, and append
        with open(infilepath, encoding="utf8") as fi:
            buff = fi.read()
            lines = re.split(r'\n', buff)  # .decode('utf-8')
            non_header_lines = '\n'.join(lines[1:])  # skip header line
            fa.write(non_header_lines)

    try:
        fa.close()
    except UnboundLocalError:
        pass
def delegated_build_bif_chunk(dirname, task_args, s3flag=None):
    """ this function is suitable for execution in lambda after delegation
        can also use by local machine even if s3 is used for output.
    """

    # task_args: argsdict, archive_basename, chunk_idx, filelist
    args.argsdict = argsdict = task_args['argsdict']
    
    chunk_idx   = task_args['chunk_idx']
    filelist    = task_args['filelist']                         # the list of files to be processed in this chunk.
    subdir      = task_args['subdir']
    chunk_name  = task_args['chunk_name']
    
    archive_basename = task_args['group_name']
    archive = open_archive(argsdict, archive_basename)          # if using s3, this will open the archive on s3.
    full_file_list = get_file_paths(archive)
    if not full_file_list:
        raise LookupError(f"archive {archive_basename} appears empty")

    pstyle_region_dict = argsdict.get('pstyle_region')
    pstyle_pattern = argsdict.get('pstyle_pattern', '')

    df_dict = {}        # to save time, we will build the dataframe as a dict of dict, then in one swoop create the dataframe.
                        # format is {1: {'lkadsjf': asdlkfj, }, 2: {...} ...)
    
    #filelist = filelist[0:5]
    for index, file_paths in enumerate(filelist):
    
        ballot_file_paths = re.split(r';', file_paths)
        _, _, ballot_id = analyze_ballot_filepath(ballot_file_paths[0])

        df_dict[index] = create_bif_dict_by_reading_ballot(argsdict, 
                                                            ballot_id, 
                                                            index, 
                                                            archive_basename, 
                                                            archive, 
                                                            ballot_file_paths,
                                                            pstyle_region_dict, 
                                                            pstyle_pattern,
                                                            chunk_idx)
    # create the dataframe all at once.
    #print(df_dict)
    chunk_df = pd.DataFrame.from_dict(df_dict, "index")

    DB.save_data(data_item=chunk_df, dirname=dirname, subdir=subdir, name=chunk_name, format='.csv', s3flag=s3flag)
def remove_dirname_files_by_pattern(dirname, file_pat=None):
    """ remove files from dirpath that match regex file_pat
    """
    dirpath = DB.dirpath_from_dirname(dirname)
    for filename in os.listdir(dirpath):
        full_path = os.path.join(dirpath, filename)
        if os.path.isfile(full_path) and not (file_pat and bool(
                re.search(file_pat, filename))):
            os.remove(full_path)
def write_html_summary(html_doc, filename='summary'):
    summary_path = DB.dirpath_from_dirname(filename)
    if not os.path.exists(summary_path):
        os.makedirs(summary_path)
    html_file_path = f"{summary_path}{filename}.html"
    html_file = open(html_file_path, 'w')
    html_file.write(html_doc.render())
    html_file.close()
    return os.path.abspath(html_file_path)
def get_logfile_pathname(rootname='log'):
    """ lambdas can only open files in /tmp
        Used only within this module.
    """
    if utils.on_lambda():
        return f"/tmp/{rootname}.txt"
    else:
        dirpath = DB.dirpath_from_dirname('logs', s3flag=False)   # this also creates the dir
        return f"{dirpath}{rootname}.txt"
示例#20
0
 def delete(self):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('DELETE FROM produtos WHERE id = %s', (self.id))
         banco.conexao.commit()
         c.close()
         return 'Produto excluído com sucesso!'
     except:
         return 'Ocorreu um erro na exclusão do produto'
def load_one_marks_df(df_file):
    """
    prior operation creates a separate NNNNN_marks_df.csv file for each ballot.
    now creating .csv file
    This supports incremental operation.
    """
    #utils.sts(f"Loading df chunk {df_file}")
    #marks_df = DB.load_df(name=df_file, dirname='results')
    marks_df = DB.load_data(dirname='marks', name=df_file, format='.csv')
    return marks_df
示例#22
0
 def getAll(self):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('SELECT * FROM produtos')
         result = c.fetchall()
         c.close()
         return result
     except:
         return None
def extractvote_by_tasklists(argsdict: dict):
    """
    ACTIVE
    This replaces the extractvotes function.
    given tasklists which exist in the extraction_tasks folder,

    Tasklists are generated by reviewing the BIF tables.
    Each tasklist creates a separate f"marks_{tasklist_name}.csv" file in the results folder.

    """
    logs.sts('Extracting marks from extraction tasklists', 3)

    tasklists = DB.list_files_in_dirname_filtered(dirname='marks', subdir='tasks', file_pat=r'^[^~].*\.csv$', fullpaths=False)
    total_num = len(tasklists)
    utils.sts(f"Found {total_num} taskslists", 3)

    use_lambdas = argsdict['use_lambdas']

    if use_lambdas:
        LambdaTracker.clear_requests()
        #clear_instructions(config_d.TASKS_BUCKET, Job.get_path_name())

    biflist = get_biflist(no_ext=True)

    for bif_idx, bifname in enumerate(biflist):
        archive_name = re.sub(r'_bif', '', bifname)
        genmarks_tasks = [t for t in tasklists if t.startswith(archive_name)]
    
        for chunk_idx, tasklist_name in enumerate(genmarks_tasks):
        
            #----------------------------------
            # this call may delegate to lambdas and return immediately
            # if 'use_lambdas' is enabled.
            # otherwise, it blocks until the chunk is completed.
            
            build_one_chunk(argsdict, 
                dirname='marks', 
                chunk_idx=chunk_idx, 
                filelist=[tasklist_name], 
                group_name=bifname,
                task_name='extractvote', 
                incremental=False)

            #----------------------------------

            if not chunk_idx and not bif_idx and argsdict['one_lambda_first']:
                if not wait_for_lambdas(argsdict, task_name='extractvote'):
                    utils.exception_report("task 'extractvote' failed delegation to lambdas.")
                    sys.exit(1)           

    wait_for_lambdas(argsdict, task_name='extractvote')

    utils.combine_dirname_chunks_each_archive(argsdict, dirname='marks')
    logs.get_and_merge_s3_logs(dirname='marks', rootname='log', chunk_pat=r"_chunk_\d+", subdir="chunks")
    logs.get_and_merge_s3_logs(dirname='marks', rootname='exc', chunk_pat=r"_chunk_\d+", subdir="chunks")
示例#24
0
 def getByName(self, nome):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('SELECT * FROM clientes WHERE nome LIKE %s',
                   ('%' + nome + '%'))
         result = c.fetchall()
         c.close()
         return result
     except:
         return None
def combine_archive_bifs():
    """
    BIF tables are constructed for each archive. Combine these into a single BIF table.
    Returns full_bif_df. 
    
    NOTE! This function does not create any new files.
    
    """
    utils.sts("Combining archive bifs", 3)
    
    return DB.combine_dirname_dfs(dirname='bif', file_pat=r'_bif\.csv')
示例#26
0
 def deleteByPedido(self, order_id):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('DELETE FROM pedidos_produtos WHERE pedidos_id = %s',
                   (order_id))
         banco.conexao.commit()
         c.close()
         return True
     except:
         return False
示例#27
0
def gen_style_filepaths(style_num):
    #style_dict = DB.load_style(**{'name': style_num})
    style_dict = DB.load_data(dirname='styles',
                              subdir=style_num,
                              name=f'{style_num}_style',
                              silent_error=True)

    try:
        return style_dict['filepaths']
    except TypeError:
        return None
示例#28
0
 def filterByName(self, name):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('SELECT * FROM produtos WHERE descricao LIKE %s',
                   ('%' + name + '%'))
         result = c.fetchall()
         c.close()
         return result
     except:
         return None
示例#29
0
 def getByDesc(self, desc):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute('SELECT * FROM produtos WHERE descricao LIKE %s',
                   ('%' + desc + '%'))
         result = c.fetchall()
         c.close()
         return result
     except:
         return None
示例#30
0
 def update(self):
     banco = DB()
     try:
         c = banco.conexao.cursor()
         c.execute(
             'UPDATE produtos SET descricao = %s , valor = %s , imagem = %s WHERE id = %s',
             (self.descricao, self.valor, self.imagem, self.id))
         banco.conexao.commit()
         c.close()
         return 'Produto atualizado com sucesso!'
     except:
         return 'Ocorreu um erro na alteração do produto'