示例#1
0
 def do_all_steps(self, df):
     for name in self.steps:
         mylog.info("Step {0}".format(name))
         error = self._do_single_step(name, df)
         if error:
             mylog.error("Step {0} is not completed. Error: {1}".format(
                 name, error))
示例#2
0
    def __init__(self,
                 driver_exe_path,
                 browser='firefox',
                 profile_path='',
                 browser_binary=None):

        mylog.debug("Init WebClicker")

        self.webdriver = None
        self.profile = None
        try:
            if browser == 'firefox':
                if profile_path:
                    self.profile = wd.FirefoxProfile(profile_path)
                else:
                    self.profile = None
                self.webdriver = wd.Firefox(firefox_profile=self.profile,
                                            executable_path=driver_exe_path,
                                            firefox_binary=browser_binary)
            else:
                raise ClickerException(
                    'Not supported browser {0}'.format(browser))
        except Exception as e:
            mylog.error(e)
            mylog.error('Fail to initialize {0}'.format(browser))
示例#3
0
 def clear(self, name, value, partial=False):
     element = self.find_element(name, value, partial)
     if element:
         mylog.debug("Clear element {0}={1}".format(name, value))
         element.clear()
     else:
         mylog.error("Can't clear element {0}={1}".format(name, value))
示例#4
0
def include_if_match_string(df, col_name, val_list):
    if col_name in df.columns.values.tolist():
        for s in val_list:
            df = df.loc[df[col_name].str.contains(s)]
    else:
        mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name))

    return df
示例#5
0
 def switch_to_frame(self, name, value):
     element = self.find_element(name, value)
     if element:
         self.webdriver.switch_to.frame(element)
         mylog.debug('Switching to frame {0}={1}'.format(name, value))
         return True
     else:
         mylog.error("Can't switch to frame {0}={1}".format(name, value))
         return False
示例#6
0
 def get_website(self, url):
     if not url.startswith(r'https://'):
         url = r'https://' + url
     mylog.debug("Trying load '{0}'".format(url))
     try:
         self.webdriver.get(url)
         return True
     except WebDriverException:
         mylog.error("Can't open page: '{0}'".format(url))
         return False
示例#7
0
 def send_ctrl_key(self, name, value, key):
     element = self.find_element(name, value)
     if element:
         element.send_keys(Keys.CONTROL + key + Keys.NULL)
         mylog.debug("Sent Ctrl-{0} to element {1} {2}".format(
             key, name, value))
         return True
     else:
         mylog.error("Can't send Ctrl-{0} to element {1} {2}".format(
             key, name, value))
         return False
示例#8
0
def alias_replacement_in_place(working_df: pd.DataFrame, alias_cfg: dict,
                               working_folder: str) -> None:
    for cfg in alias_cfg:
        alias_file_path = os.path.join(working_folder, cfg['file'])
        alias_df, error = read_excel(alias_file_path, replace_nan='')

        if not error:
            add_aliases(working_df, cfg['key_col'], cfg['new_col'],
                        alias_dict_by_df(alias_df))
        else:
            mylog.error("Can't use alias file: {0} {1}".format(
                alias_file_path, error))
示例#9
0
 def send_string(self, name, value, string: str, end='', partial=False):
     element = self.find_element(name, value, partial)
     if not element:
         mylog.error(
             "Element {0}: {1} NOT found. String '{2}' NOT sent".format(
                 name, value, string))
         return False
     else:
         element.send_keys(string + end)
         mylog.debug("Sent string '{0}' to element {1} {2}".format(
             string, name, value))
         return True
示例#10
0
 def get_attribute(self, name, value, attribute_name):
     element = self.find_element(name, value)
     if element:
         attribute = element.get_attribute(attribute_name)
         mylog.debug("Element {0} = '{1}' attribute {2}={3}".format(
             name, value, attribute_name, attribute))
         return attribute
     else:
         mylog.error(
             "Can't read attribute {2}: Element {0}: {1} NOT found.".format(
                 name, value, attribute_name))
         return ''
示例#11
0
def exclude_data(df, col_name, val_list):
    if col_name in df.columns.values.tolist():
        for val in val_list:
            # filtered_df = df.loc[df[col_name] != val]
            filtered_df = df.loc[no_matching(df[col_name], val)]

            if filtered_df.shape == df.shape:
                mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name))
            df = filtered_df
    else:
        mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name))

    return df
示例#12
0
def merge_in_place(working_df: pd.DataFrame, merge_cfg: dict,
                   working_folder) -> None:
    for cfg in merge_cfg:
        merge_file_path = os.path.join(working_folder, cfg['file'])
        merge_df, error = read_excel(merge_file_path, replace_nan='')

        if not error:
            lookup_and_add(working_df,
                           key_col=cfg['pos_file_key'],
                           new_col=cfg['new_col'],
                           lookup_dict=lookup_dict_by_df(
                               merge_df, cfg['merge_file_key'],
                               cfg['merge_res_key']))
        else:
            mylog.error("Can't use merge file: {0}".format(merge_file_path))
示例#13
0
    def switch_to_frame_by_index(self, index, time_sec=10):
        t = 0
        while t < time_sec:
            try:
                self.webdriver.switch_to.frame(index)
                mylog.debug('Switching to frame index={0}; time={1}'.format(
                    index, t))
                return True
            except NoSuchFrameException:
                self.wait(1)
                t += 1
                mylog.debug('Waiting frame index={0}; time={1}'.format(
                    index, t))

        mylog.error("Can't switch to frame index = {0}; time = time_sec")
        return False
示例#14
0
    def _wait_element(self, how: str, what: str, timeout_sec=1):

        for i in range(timeout_sec):
            if self.is_element_ready(how, what):
                mylog.debug(
                    'Element {0}: {1} found. Waiting time is {2} sec'.format(
                        how, what, i))
                return True
            else:
                self.sleep(1)
                mylog.debug('Waiting element {0}: {1} {2} sec'.format(
                    how, what, i))

        mylog.error("Element {0}: {1} NOT found. Timeout = {2} sec".format(
            how, what, timeout_sec))
        return False
示例#15
0
    def click(self, name, value, partial=False, time_sec=1):
        element = self.find_element(name, value, partial)
        if element:
            mylog.debug("Click element {0}={1}".format(name, value))
            t = 0
            while t < time_sec:
                try:
                    element.click()
                    return True
                except WebDriverException:
                    self.wait(1)
                    t += 1
        else:
            mylog.error("Can't click element {0}={1}".format(name, value))

        return False
示例#16
0
    def get_element_value(self, how: By, path: str, timeout_sec=30):

        for i in range(timeout_sec):
            element = self.get_element(how, path)
            if element is not None:
                mylog.debug(
                    'Element {0}: {1} found. Waiting time is {2} sec'.format(
                        how, path, i))
                return element.get_attribute('value')
            else:
                self.sleep(1)
                mylog.debug('Waiting element {0}: {1} {2} sec'.format(
                    how, path, i))

        mylog.error("Element {0}: {1} NOT found. Timeout = {2} sec".format(
            how, path, timeout_sec))
        return ''
示例#17
0
def update_excel_sheet(updated_sheet_name: str,
                       file_name: str,
                       df: pd.DataFrame,
                       prompt=False,
                       convert_strings_to_urls=True) -> Error:
    original_sheet_list, error = read_sheet_names(file_name)

    #  overwrite first sheet if updates_sheet_name is empty
    if len(updated_sheet_name) == 0:
        updated_sheet_name = original_sheet_list[0]

    if error:
        # file doesn't exist yet, try to create new
        mylog.warning("File {0} doesn't exist. Creating new".format(file_name))
        error = write_excel(file_name,
                            df,
                            prompt=prompt,
                            convert_strings_to_urls=convert_strings_to_urls,
                            sheet_name=updated_sheet_name)
        return error
    else:
        # read all existing sheets
        excel_with_sheets_dict = OrderedDict()

        # reading all sheets
        for sheet in original_sheet_list:
            next_sheet, error = read_excel(file_name,
                                           replace_nan='',
                                           sheet_name=sheet)
            if error:
                mylog.error("Can't read {0} - {1}: {2}".format(
                    file_name, sheet, error))
            else:
                excel_with_sheets_dict[sheet] = next_sheet

        excel_with_sheets_dict[updated_sheet_name] = df

        mylog.debug("excel_with_sheets_dict={0}".format(
            list(excel_with_sheets_dict)))
        error = write_excel(file_name,
                            excel_with_sheets_dict,
                            prompt=prompt,
                            convert_strings_to_urls=convert_strings_to_urls)

        return error
示例#18
0
    def drop_down(self,
                  by_name,
                  how_value,
                  set_value,
                  time_sec=1,
                  repeat_if_fail=0):

        for repeat in range(repeat_if_fail + 1):
            success = self.click(by_name, how_value, time_sec=time_sec)
            if success:
                success = self.click('link_text', set_value, time_sec=time_sec)
                if success:
                    mylog.debug("Dropdown {0}='{1}' selected '{2}'".format(
                        by_name, how_value, set_value))
                    return True

        mylog.error("Dropdown {0}='{1}' FAIL to select '{2}'".format(
            by_name, how_value, set_value))
        return False
def set_multiple_parameters_by_ispn(*, df: pd.DataFrame, destination_col: str,
                                    source_cols: tuple, **options):
    del options
    try:
        index = df.index[df['Ispn'] == destination_col].tolist()[0]
    except Exception as e:
        mylog.error("Invalid Ispn {0}: {1}".format(destination_col, e))
        return

    for param_name, new_value in zip(*[iter(source_cols)] * 2):
        try:
            if df.at[index, param_name] != '':
                mylog.warning(
                    "Replacing non-blank value at {0} : {1} to {2}".format(
                        destination_col, df.at[index, param_name], new_value))
            df.at[index, param_name] = new_value
        except Exception as e:
            mylog.error("Invalid Parameter '{0}' in {1}: {2}".format(
                param_name, destination_col, e))
示例#20
0
def include_only_data(df, col_name, val_list):
    if not val_list:
        mylog.warning('Warning: Empty value list for "{0}"', col_name)
        return

    if col_name in df.columns.values.tolist():
        orig_df = df.copy()

        # df = df.loc[df[col_name] == val_list[0]]
        df = df.loc[matching(df[col_name], val_list[0])]

        for val in val_list[1:]:

            # filtered_df = orig_df.loc[orig_df[col_name] == val]
            filtered_df = orig_df.loc[matching(orig_df[col_name], val)]

            if filtered_df.empty:
                mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name))
            df = df.append(filtered_df)
    else:
        mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name))

    return df
示例#21
0
def main():
    arg = docopt(__doc__)

    file_in = arg['--in']
    file_out = arg['--out']

    df, error = read_excel(file_in, replace_nan='')

    if error:
        mylog.error(error)
        return

    dt.data_tree = BizDataTree(df, 'POS FY')

    while True:
        dt.data_tree.print_console()
        html = dt.data_tree.render_html(render_method_basic)

        with open(file_out, "w") as text_file:
            print(html, file=text_file)

        node_id = input("Click on id:")

        expanded, error = dt.data_tree.is_expanded(node_id)

        if error:
            mylog.error(error)
            continue

        if expanded:
            dt.data_tree.collapse(node_id)
        else:
            drill_by = input("Drill by:")
            error = dt.data_tree.expand_id(node_id, drill_by)
            if error:
                mylog.error(error)
                continue
示例#22
0
def build_tool1():
    args = docopt(__doc__)
    mylog.debug(args)

    input_folder = ''
    if args['--in_folder']:
        input_folder = args['--in_folder']

    output_folder = ''
    if args['--out_folder']:
        output_folder = args['--out_folder']

    main_menu = MainMenuTemplate()
    output_files_dict = {}

    for input_file_name in args['--files']:

        input_file_full_path = os.path.join(input_folder, input_file_name)

        config_df, error = read_excel(input_file_full_path,
                                      replace_nan='',
                                      sheet_name='html_config')
        if error:
            mylog.error(
                "Can't process file {0} - sheet html_config: {1}".format(
                    input_file_full_path, error))
            continue

        products_df, error = read_excel(input_file_full_path,
                                        replace_nan='',
                                        sheet_name='Data')
        if error:
            mylog.error("Can't process file {0} - sheet Data: {1}".format(
                input_file_full_path, error))
            continue

        config_dict = config_df.to_dict('index')

        row_index_list = list(map(int, list(config_dict)))

        mylog.debug(row_index_list)

        for i in row_index_list:
            row = config_dict[i]

            output_file_name = row['output_html']
            if output_file_name not in output_files_dict:
                output_files_dict.update(
                    {output_file_name: CompleteToolTemplate()})
                main_menu.add_item(row['main_menu_item'], output_file_name)

        processed_ispn_list = []
        for i in row_index_list:

            row = config_dict[i]

            mylog.debug("Open data: {0} - {1}".format(input_file_full_path,
                                                      'Data'))

            alias_to_col_name_dict = None
            try:
                mylog.info("Open column alias file: {0} - {1}".format(
                    input_file_full_path, 'column_aliases'))

                col_alias_df, error = read_excel(input_file_full_path,
                                                 replace_nan='',
                                                 sheet_name='column_aliases')
                if error:
                    mylog.error(error)
                    return

                alias_to_col_name_dict = aliases_to_dict(col_alias_df, 'alias')

            except FileNotFoundError as e:
                mylog.error(e)

            mylog.debug(row)

            row.setdefault('exclude', '')
            row.setdefault('include_only', '')
            row.setdefault('match', '')

            mylog.debug("exclude='{0}' include='{1}' match='{2}'".format(
                row['exclude'], row['include_only'], row['match']))
            selected_products_df = selected_products(
                products_df,
                exclude=row['exclude'],
                include_only=row['include_only'],
                match=row['match'],
                alias_to_col_name_dict=alias_to_col_name_dict)

            processed_ispn_list.extend(selected_products_df['Ispn'].tolist())

            mylog.debug("Build html for '{0}' -> '{1}' -> '{2}'".format(
                row['category'], row['subcategory'], row['view']))
            table_html, error = product_table_to_html(
                selected_products_df,
                category=row['category'],
                subcategory=row['subcategory'],
                view_name=row['view'],
                main_topic=row['main_topic'],
                tree_attributes=row['tree'],
                part_attributes=row['attributes'],
                datasheet_url=row['datasheet_url'],
                view_type=row['view_type'],
                product_page_url=row['product_page_url'],
                alias_to_col_name_dict=alias_to_col_name_dict)
            if error:
                mylog.error(error)
            else:
                template = output_files_dict[row['output_html']]
                template.add_table(table_html)

        #  mark processed Ispns
        mylog.info("Marking processed {0} Ispns...".format(
            len(processed_ispn_list)))
        products_df['_processed'] = ''
        products_df.loc[products_df['Ispn'].isin(processed_ispn_list),
                        '_processed'] = 'Y'
        error = update_excel_sheet('Data',
                                   input_file_full_path,
                                   products_df,
                                   prompt=True,
                                   convert_strings_to_urls=False)
        if error:
            mylog.error("Can't update {0} with processed Ispns marks".format(
                input_file_full_path))

    mylog.debug(output_files_dict)

    for file_name in output_files_dict:
        output_files_dict[file_name].add_main_menu_html(
            main_menu.make(selected_menu_link=file_name))
        output_files_dict[file_name].add_date_info(args['--date'])
        out_html = output_files_dict[file_name].make()
        with open(os.path.join(output_folder, file_name),
                  "w",
                  encoding='utf-8') as out_html_file:
            out_html_file.write(out_html)
示例#23
0
def main():
    arg = docopt(__doc__)

    ispns_fn = arg['--ispn']
    parameters_fn = arg['--parameters']
    docs_fn = arg['--docs']
    docs_assignment_fn = arg['--docs_assignment']
    folder_name = arg['--folder']
    output_fn = arg['--output']

    mylog.info(arg)

    mylog.info('Processing documents...')
    doc_filter = DocFilter()

    file_name = os.path.join(folder_name, docs_fn)
    file_size = os.path.getsize(file_name)
    mylog.info("File size {0} Bytes".format(file_size))
    error = doc_filter.prepare(file_name,
                               progress_indicator=progress,
                               estimated_items_count=int(file_size / 1200))
    if error:
        mylog.error(error)
        return
    mylog.info('Processing documents: Done!')

    mylog.info('Processing document assignment...')

    file_name = os.path.join(folder_name, docs_assignment_fn)
    file_size = os.path.getsize(file_name)
    mylog.info("File size {0} Bytes".format(file_size))
    doc_info_df, error = xml2excel_params(
        file_name,
        row_key='Ispn',
        column_key='DocumentGroup',
        convert2str_method=document_ref_to_str,
        is_filter_pass=doc_filter.is_english,
        progress_indicator=progress,
        estimated_items_count=int(file_size / 950))
    if error:
        mylog.error(error)
        return
    mylog.info('Processing document assignment: Done!')

    mylog.info('Processing parameters...')

    file_name = os.path.join(folder_name, parameters_fn)
    file_size = os.path.getsize(file_name)
    mylog.info("File size {0} Bytes".format(file_size))
    ispn_param_df, error = xml2excel_params(
        file_name,
        row_key='Ispn',
        column_key='ParameterName',
        column_modifier_key='ValueRemark',
        convert2str_method=ispn_xml_parameters_to_str,
        progress_indicator=progress,
        estimated_items_count=int(file_size / 870))
    if error:
        mylog.error(error)
        return

    mylog.info('Processing parameters: Done!')

    mylog.info('Processing ispns...')

    file_name = os.path.join(folder_name, ispns_fn)
    file_size = os.path.getsize(file_name)
    mylog.info("File size {0} Bytes".format(file_size))
    ispn_df, error = xml2excel_merge_partnums(file_name,
                                              'Ispn',
                                              progress_indicator=progress,
                                              estimated_items_count=int(
                                                  file_size / 2250))
    if error:
        mylog.error(error)
        return
    mylog.info('Processing ispns: Done!')

    merged_df = pd.merge(ispn_df,
                         ispn_param_df,
                         on='Ispn',
                         suffixes=('_1', '_2'))
    merged_df = pd.merge(merged_df,
                         doc_info_df,
                         on='Ispn',
                         suffixes=('_3', '_4'))

    # merged_df.replace("", "no_data", inplace=True)

    error = write_excel(os.path.join(folder_name, output_fn),
                        merged_df,
                        prompt=True,
                        convert_strings_to_urls=False)
    if error:
        print("Can't write excel file. {0}".format(error))
def make_product_tables():
    arg = docopt(__doc__)
    mylog.debug(arg)

    in_df, error = read_excel(arg['--source'], replace_nan='')
    if error:
        mylog.error("Can't read file '{0}': {1}".format(arg['--source'], error))
        return

    product_groups = []
    for fn in arg['--working_file']:
        name, ext = fn.split('.', 1)
        if ext == 'xlsx':
            product_groups.append(name)
        else:
            mylog.error("Wrong filename format {0}".format(fn))

    processed_ispn_list = []

    for p_group in product_groups:

        if arg['--only']:
            if p_group != arg['--only']:
                continue

        working_df = in_df.copy()

        mylog.info('Initialization "{0}"'.format(p_group))

        builder = ProductTableBuilder()

        fn = os.path.join(arg['--working_folder'], p_group + ".xlsx")
        sheet_name = 'xml_config'
        mylog.debug("Reading configuration from {0} : {1}".format(fn, sheet_name))
        error = builder.init_from_file(fn, sheet_name=sheet_name)
        if error:
            mylog.error("Can't read configuration from {0} - {1}: {2}".format(fn, sheet_name, error))
            return

        mylog.info('Performing correction steps...')

        builder.do_all_steps(working_df)

        mylog.info("{0} part-numbers processed".format(len(working_df.index)))
        writing_error = update_excel_sheet('Data',
                                           os.path.join(arg['--working_folder'], '{0}.xlsx'.format(p_group)),
                                           working_df,
                                           prompt=True,
                                           convert_strings_to_urls=False)
        if writing_error:
            mylog.error(writing_error)

        processed_ispn_list.extend(working_df['Ispn'].tolist())

    # mark processed Ispns
    if arg['--mark_processed']:
        mylog.info("Marking processes ispns...")
        in_df['_processed'] = ''
        in_df.loc[in_df['Ispn'].isin(processed_ispn_list), '_processed'] = 'Y'
        mylog.info("Writing back to file {0}...".format(arg['--source']))
        error = update_excel_sheet('', arg['--source'], in_df, prompt=True,
                                   convert_strings_to_urls=False)
        if error:
            mylog.error("Can't update {0} with processed Ispns marks".format(arg['--source']))
示例#25
0
def to_xmind():
    args = docopt(__doc__)

    mylog.debug(sys.argv)
    mylog.debug(args)

    a_info = args['--info']
    a_tree_levels = args['--tree']
    a_annotations = args['--ann']
    a_notes = args['--note']
    a_print = args['--print']
    a_file_xlsx = args['FILE_XLSX']
    a_file_xmind = args['--xmind']
    a_file_html = args['--html']
    a_main_topic_name = args['--main']
    a_url_col = args['--url']
    a_include_only = args['--include_only']
    a_exclude = args['--exclude']
    a_match = args['--match']
    a_add_parameter_names = args['--add_parameter_names']

    try:
        df = pd.read_excel(a_file_xlsx)
    except:
        mylog.error("Can't read file: {0}".format(a_file_xlsx))
        return

    df = df.astype(str)

    header_dict = table_headers_dict(df)
    header_list = df.columns.values.tolist()

    for a in a_include_only:
        col, val = parse_filter_arguments(a)
        df = include_only_data(df, arg_to_header(col, header_dict,
                                                 header_list), val)

    for a in a_exclude:
        col, val = parse_filter_arguments(a)
        df = exclude_data(df, arg_to_header(col, header_dict, header_list),
                          val)

    if a_match:
        col, val = parse_filter_arguments(a_match)
        df = include_if_match_string(
            df, arg_to_header(col, header_dict, header_list), val[0])

    root_node = XMindNode(a_main_topic_name)

    tree_levels = [
        arg_to_header(a, header_dict, header_list) for a in a_tree_levels
    ]
    tree_levels = [a for a in tree_levels if a]

    if a_add_parameter_names:
        parameter_names_tree(tree_levels, root_node)

    if len(tree_levels) > 1:
        anns = [
            arg_to_header(a, header_dict, header_list) for a in a_annotations
        ]
        anns = [a for a in anns if a]

        notes = [arg_to_header(a, header_dict, header_list) for a in a_notes]
        notes = [a for a in notes if a]

        url_col = arg_to_header(a_url_col, header_dict, header_list)

        if a_info:
            print_pretty_tree_plan(tree_levels, anns, notes, url_col)

        table_to_tree(df,
                      tree_levels,
                      root_node,
                      anns,
                      notes,
                      last_level_url_col_name=url_col)

    if a_info:
        print_header_value_variation_stat(df)

    if a_print:
        print_pretty_tree(root_node, 30)

    if a_file_xmind:
        xmb = XMindBuilder(a_file_xmind)
        print('File "{0}" will be overwritten, data can be lost!'.format(
            a_file_xmind))
        answer = input('Type "yes" if agree >>>')
        if answer == 'yes':
            root_node.parent = xmb.central_topic_tree_node
            xmb.build_from_tree(root_node,
                                xmind_central_topic=xmb.central_topic)
            xmb.save(a_file_xmind)
            print('XMIND saved to file "{0}"'.format(a_file_xmind))
        else:
            print('Quited without saving. File was not changed')

    if a_file_html:
        html_data = make_html(root_node, SimpleHtmlTemplate)
        # html_data = html_data.encode(encoding='UTF-8')
        with open(a_file_html, "w", encoding='utf-8') as html_file:
            html_file.write(html_data)