Exemplo n.º 1
0
def concat_xlsx_files_from_folder(xls_folder):
    files_list = os.listdir(os.path.join(project_folder, xls_folder))
    df_xlsx_list = []

    for file in files_list:
        if ".xlsx" in file:
            df_xlsx = load_some_xlsx(file, os.path.join(project_folder, xls_folder))
            df_xlsx["file_name"] = file[:-5]
            print(df_xlsx)
            df_xlsx_list.append(df_xlsx)

    df_xlsxs = pd.concat(df_xlsx_list, ignore_index=True)
    return df_xlsxs
def clean_tenders_names(dataframe):
    """
clean tender's and lot's names
    """

    replace_patterns = load_some_xlsx("replace_names_patterns.xlsx", folder="actual_funcs")
    replace_patterns.fillna('', inplace=True)

    for f, r in zip(replace_patterns["Find"], replace_patterns["Replace"]):
        dataframe["Название тендера и лота"] = dataframe["Название тендера и лота"].str.replace(
            pat=f,
            repl=r,
            case=False
        )
    dataframe["Название тендера и лота"] = dataframe["Название тендера и лота"].str.capitalize()
    return dataframe
Exemplo n.º 3
0
from non_actual_funcs.concat_xlsx_files import concat_xlsx_files_from_folder
from libs.db_libs.write import write_some_xlsx
from libs.db_libs.load import load_some_xlsx

#cnct = concat_xlsx_files_from_folder("test_xlsxs")
#write_some_xlsx(cnct, "plans_new.xlsx")
from actual_funcs.set_hyperlinks import set_hyperlinlks_in_excel_col

hyperlinks_adresses = load_some_xlsx(
    "планы.xlsx", folder="")["Номер тендера в системе бикотендер"].tolist()
hyperlinks_adresses = [str(adr) for adr in hyperlinks_adresses]

set_hyperlinlks_in_excel_col(
    hyperlinks_adresses,
    hyperlinks_texts=None,
    xl_col=1,
    tip="",
    preffix="http://www.bicotender.ru/tc/tender/show/tender_id/",
    xls_file="hyperlinks.xlsx",
    xls_worksheet="hyperlinks",
    folder="",
)
Exemplo n.º 4
0
import pandas as pd
from libs.db_libs.load import load_some_xlsx
from actual_funcs.set_hyperlinks import set_hyperlinlks_in_excel_col

xls = load_some_xlsx("с регионами.xlsx", "")

links_adresses = xls["Ссылка на поставщика"].tolist()
links_texts = xls["Победитель"].tolist()
for i in links_adresses:
    print(i)
set_hyperlinlks_in_excel_col(
    hyperlinks_adresses=links_adresses,
    hyperlinks_texts=links_texts,
    #preffix="https://www.bicotender.ru/tc/tender/show/tender_id/",
    xl_col=1,
    xls_file="winner_hyper_links.xlsx")
Exemplo n.º 5
0
import os
from actual_funcs.parse_search_results import download_html, parse_html_tables_from_folder, \
    beatify_parsed_tables
from libs.db_libs.write import write_some_xlsx
from actual_funcs.clean_tenders_and_lots_names import clean_tenders_names
from config.config import project_folder
from actual_funcs.mark_duplicates import mark_duplicates
from libs.db_libs.load import load_some_xlsx

my_link = """
https://www.bicotender.ru/crm/analytics/list/?atype=field_1&filter_id=302437&competitorRowNumber[to]=1&showConf[asLot]=2&showConf[competitorFilterMode]=2&submit=1
"""

download_html(my_link, results_qty=4000)
parsed_df = parse_html_tables_from_folder("html_downloads")
beatyfied_df = beatify_parsed_tables(parsed_df)
cleaned_names_df = clean_tenders_names(beatyfied_df)
marked_duplicates = mark_duplicates(cleaned_names_df)
write_some_xlsx(marked_duplicates,
                os.path.join(project_folder, "marked_duplicates.xlsx"),
                index=True)
print(load_some_xlsx("marked_duplicates.xlsx", ""))