def annotations_from_xlsx(xslxfile, delimiter='\t', rm_csv=False): """Read annotations from xlsx file. xlsx is converted to csv file and than parsed with csv reader. """ csvfile = "{}.csv".format(xslxfile) pyexcel.save_as(file_name=xslxfile, dest_file_name=csvfile, dest_delimiter=delimiter) res = ModelAnnotator.annotations_from_csv(csvfile, delimiter=delimiter) if rm_csv: import os os.remove(csvfile) return res
def create_sample_file1(file): data = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 1.1, 1] table = [] table.append(data[:4]) table.append(data[4:8]) table.append(data[8:12]) pyexcel.save_as(dest_file_name=file, array=table)
def save_to_database( self, session=None, table=None, initializer=None, mapdict=None, auto_commit=True, **keywords): """ Save data from a sheet to database :param session: a SQLAlchemy session :param table: a database table :param initializer: a custom table initialization function if you have one :param mapdict: the explicit table column names if your excel data do not have the exact column names :param keywords: additional keywords to :meth:`pyexcel.Sheet.save_to_database` """ params = self.get_params(**keywords) if 'name_columns_by_row' not in params: params['name_columns_by_row'] = 0 if 'name_rows_by_column' not in params: params['name_rows_by_column'] = -1 params['dest_session']=session params['dest_table'] = table params['dest_initializer']=initializer params['dest_mapdict'] = mapdict params['dest_auto_commit']=auto_commit pe.save_as(**params)
def setUp(self): """ Make a test csv file as: 1, 2, 3, 4 5, 6, 7, 8 9, 10, 11, 12 """ self.testfile1 = "testcsv1.csv" content = [ [1, 'a'], [2, 'b'], [3, 'c'], [4, 'd'], [5, 'e'], [6, 'f'], [7, 'g'], [8, 'h'] ] pe.save_as(dest_file_name=self.testfile1, array=content) self.testfile2 = "testcsv2.csv" content = [ [1, 'a', 'c'], [2, 'b', 'h'], [3, 'c', 'c'], [8, 'h', 'd'] ] pe.save_as(dest_file_name=self.testfile2, array=content)
def create_sample_file1(file): data = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 1.1, 1] table = [] table.append(data[:4]) table.append(data[4:8]) table.append(data[8:12]) pyexcel.save_as(array=table, dest_file_name=file)
def setUp(self): self.excel_filename = "testdateformat.csv" self.data = [[ datetime.date(2014,12,25), datetime.datetime(2014,12,25,11,11,11), datetime.datetime(2014,12,25,11,11,11,10) ]] pe.save_as(dest_file_name=self.excel_filename, array=self.data)
def setUp(self): self.data = { "1": [1, 2, 3, 4, 5, 6, 7, 8], "3": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], "5": [2, 3, 4, 5, 6, 7, 8, 9], } self.testfile = "test.xls" pe.save_as(dest_file_name=self.testfile, adict=self.data)
def test_writing_multiline_ods(): content = "2\n3\n4\n993939\na" testfile = "writemultiline.ods" array = [[content, "test"]] pyexcel.save_as(array=array, dest_file_name=testfile) sheet = pyexcel.get_sheet(file_name=testfile) assert sheet[0, 0] == content os.unlink(testfile)
def test_new_normal_usage(self): content = [ [1, 2, 3], [4, 588, 6], [7, 8, 999] ] pe.save_as(array=content, dest_file_name=self.testfile) self._check_test_file('new_normal_usage')
def test_new_normal_usage_irregular_columns(self): content = [ [1, 2, 3], [4, 588, 6], [7, 8] ] pe.save_as(array=content, dest_file_name=self.testfile) self._check_test_file('new_normal_usage_irregular_columns')
def test_issue_10(self): thedict = OrderedDict() thedict.update({"Column 1": [1,2,3]}) thedict.update({"Column 2": [1,2,3]}) thedict.update({"Column 3": [1,2,3]}) pe.save_as(adict=thedict, dest_file_name="issue10.xls") newdict = pe.get_dict(file_name="issue10.xls") assert isinstance(newdict, OrderedDict) == True assert thedict == newdict
def test_no_title_single_sheet(self): content = [ [1, 2, 3], [4, 588, 6], [7, 8, 999] ] pe.save_as(array=content, dest_file_name=self.testfile, dest_write_title=False) self._check_test_file('no_title_single_sheet')
def test_save_as_to_database(self): adict = { "X": [1, 4], "Y": [2, 5], "Z": [3, 6] } pe.save_as(adict=adict, dest_session=self.session, dest_table=Signature) result = pe.get_dict(session=self.session, table=Signature) assert adict == result
def test_save_as_and_append_colnames(self): data = [[1, 2, 3], [4, 5, 6]] sheet = pe.Sheet(data) testfile = "testfile.xls" testfile2 = "testfile.xls" sheet.save_as(testfile) pe.save_as(file_name=testfile, out_file=testfile2, colnames=["X", "Y", "Z"]) array = pe.get_array(file_name=testfile2) assert array == [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
def CVSOutput(self,sortedTempSimilarityTuple): data = [] for item in sortedTempSimilarityTuple: dataTuple = [] relations = item[0].split("-") dataTuple.append(relations[0]) dataTuple.append(relations[1]) dataTuple.append(item[1]) data.append(dataTuple) pyexcel.save_as(array = data, dest_file_name = 'testCSV.csv')
def test_new_normal_usage(self): content = [ [1, 2, 3], [4, 588, 6], [7, 8, 999] ] pe.save_as(array=content, dest_file_name=self.testfile) with open(self.testfile, "r") as f: written_content = json.load(f) assert written_content == content
def test_mapping_array(self): data2 = [ ["A", 1, 4], ["B", 2, 5], ["C", 3, 6] ] mapdict = ["X", "Y", "Z"] model=FakeDjangoModel() pe.save_as(array=data2, dest_model=model, dest_mapdict=mapdict, transpose_before=True) assert model.objects.objs == self.result
def test_mapping_array(self): data2 = [ ["A", "B", "C"], [1, 2, 3], [4, 5, 6] ] mapdict = ["X", "Y", "Z"] model=FakeDjangoModel() pe.save_as(array=data2, dest_model=model, dest_mapdict=mapdict) assert model.objects.objs == self.result
def setUp(self): self.data = { "1": [1, 2, 3, 4, 5, 6, 7, 8], "2": ["1", "2", "3", "4", "5", "6", "7", "8"], "3": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8], "4": ["1.1", "2.2", "3.3", "4.4", "5.5", "6.6", "7,7", "8.8"], "5": [2, 3, 4, 5, 6, 7, 8, 9], "6": ["2", "3", "4", "5", "6", "7", "8", "9"] } self.testfile = "test.xls" pe.save_as(dest_file_name=self.testfile, adict=self.data)
def test_data_frame(self): content = [ ["", "Column 1", "Column 2", "Column 3"], ["Row 1", 1, 2, 3], ["Row 2", 4, 5, 6], ["Row 3", 7, 8, 9] ] pe.save_as(array=content, name_rows_by_column=0, name_columns_by_row=0, dest_file_name=self.testfile) self._check_test_file('data_frame')
def setUp(self): self.testfile = "test.xlsm" self.content = [ ["X", "Y", "Z"], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3] ] pe.save_as(dest_file_name=self.testfile, array=self.content)
def test_csvbook_irregular_columns(self): content = [ [1, 2, 3], [4, 588, 6], [7, 8] ] self.testfile2 = "testfile.csv" pe.save_as(array=content, dest_file_name=self.testfile2) pe.save_as(file_name=self.testfile2, dest_file_name=self.testfile) self._check_test_file('csvbook_irregular_columns')
def test_row_series(self): content = [ ["Row 1", 1, 2, 3], ["Row 2", 4, 5, 6], ["Row 3", 7, 8, 9] ] pe.save_as(array=content, name_rows_by_column=0, dest_file_name=self.testfile) self._check_test_file('row_series')
def test_column_series_irregular_columns(self): content = [ ["Column 1", "Column 2", "Column 3"], [1, 2, 3], [4, 5, 6], [7, 8] ] pe.save_as(array=content, name_columns_by_row=0, dest_file_name=self.testfile) self._check_test_file('column_series_irregular_columns')
def test_save_file_as_another_one(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] sheet = pe.Sheet(data) testfile = "testfile.xls" testfile2 = "testfile2.csv" sheet.save_as(testfile) pe.save_as(file_name=testfile, out_file=testfile2) sheet = pe.get_sheet(file_name=testfile2) sheet.format(int) assert sheet.to_array() == data os.unlink(testfile) os.unlink(testfile2)
def setUp(self): self.testfile = "test.xlsm" self.content = { 'Series_6': ['', '', '', '', '', 6.0], 'Series_5': ['', '', '', '', '', 5.0, 5.0, '', '', ''], 'Series_4': ['', '', '', 4.0, 4.0, 4.0, '', '', '', 4.0, 4.0], 'Series_3': ['', '', 3.0, 3.0, 3.0, 3.0, '', '', 3.0, 3.0, 3.0], 'Series_2': ['', 2.0, 2.0, 2.0, 2.0, 2.0], 'Series_1': [1.0] } pe.save_as(dest_file_name=self.testfile, adict=self.content)
def test_write_texttable(): content = [[1,2]] test_file = "test.texttable" expected = dedent(""" pyexcel_sheet1: +---+---+ | 1 | 2 | +---+---+""").strip('\n') pe.save_as(array=content, dest_file_name=test_file) with open(test_file, 'r') as f: written = f.read() eq_(written, expected) os.unlink(test_file)
def test_new_normal_usage(self): content = [ [1, 2, 3], [4, 588, 6], [7, 8, 999] ] pe.save_as(array=content, dest_file_name=self.testfile) f = open(self.testfile, "r") written_content = f.read() f.close() content = dedent(""" [[1, 2, 3], [4, 588, 6], [7, 8, 999]]""").strip('\n') assert written_content == content
def test_writing_date_format(self): excel_filename = "testdateformat.xls" data = [[datetime.date(2014, 12, 25), datetime.time(11, 11, 11), datetime.datetime(2014, 12, 25, 11, 11, 11)]] pe.save_as(dest_file_name=excel_filename, array=data) r = pe.Reader(excel_filename) assert isinstance(r[0, 0], datetime.date) is True assert r[0, 0].strftime("%d/%m/%y") == "25/12/14" assert isinstance(r[0, 1], datetime.time) is True assert r[0, 1].strftime("%H:%M:%S") == "11:11:11" assert isinstance(r[0, 2], datetime.date) is True assert r[0, 2].strftime("%d/%m/%y %H:%M:%S") == "25/12/14 11:11:11" os.unlink(excel_filename)
def test_writing_date_format(self): excel_filename = "testdateformat.xlsx" data = [[datetime.date(2014,12,25), datetime.time(11,11,11), datetime.datetime(2014,12,25,11,11,11)]] pe.save_as(dest_file_name=excel_filename, array=data) r = pe.get_sheet(file_name=excel_filename, library="openpyxl") assert isinstance(r[0,0], datetime.date) == True assert r[0,0].strftime("%d/%m/%y") == "25/12/14" assert isinstance(r[0,1], datetime.time) == True assert r[0,1].strftime("%H:%M:%S") == "11:11:11" assert isinstance(r[0,2], datetime.date) == True assert r[0,2].strftime("%d/%m/%y") == "25/12/14" os.unlink(excel_filename)
def write_to_excel3(data, file_full_name): pyexcel.save_as(records=data, dest_file_name=file_full_name)
import pyexcel from collections import OrderedDict url = "http://s.cafef.vn/bao-cao-tai-chinh/VNM/IncSta/2017/3/0/0/ket-qua-hoat-dong-kinh-doanh-cong-ty-co-phan-sua-viet-nam.chn" conn = urlopen(url) raw_data = conn.read() page_content = raw_data.decode("utf8") soup = BeautifulSoup(page_content, "html.parser") div = soup.find( "div", style="overflow:hidden;width:100%;border-bottom:solid 1px #cecece;") table = div.find("table", id="tableContent") tr_list = table.find_all("tr") news_list = [] for t in tr_list: td_list = t.find_all("td") kq = {} for i in range(len(td_list)): if td_list[i].string != None: if i == 0: kq["Hạng mục"] = td_list[i].string.strip() elif i == 1: kq["Qúy 4-2016"] = td_list[i].string.strip() elif i == 2: kq["Quý 1-2017"] = td_list[i].string.strip() elif i == 3: kq["Quý 2-2017"] = td_list[i].string.strip() elif i == 4: kq["Quý 3-2017"] = td_list[i].string.strip() if kq != {}: news_list.append(kq) pyexcel.save_as(records=news_list, dest_file_name="Ketqua.xlsx")
import pyexcel from collections import OrderedDict data = [ OrderedDict({ 'name': 'quan', 'age': '22', 'city': 'hanoi' }), OrderedDict({ 'name': 'Hong', 'age': '19', 'city': 'campuchia' }), OrderedDict({ 'name': 'an', 'age': '18', 'city': 'laos' }) ] pyexcel.save_as(records=data, dest_file_name="asdfgh.xlsx")
item_content = { "picture_link": picture_link, "title": title, "source": source } new_item.append(item_content) # for pile in block2: # pile1 = pile.find_all("article","c-article c-article--summary") # for pile2 in pile1: # pile3 = pile2.find("div","c-article__container") # pile4 = pile3.find("div","c-article__image") # pile5 = pile3.find("div","c-article__summary") # pile6 = pile4.a # picture_link = url+ pile6["href"] # pile7 = pile5.h3 # title = pile7.text # pile8 = pile7.a # source = url+ pile8["href"] # item_content = { # "source": source, # "picture_link":picture_link, # "title":title # } # new_item.append(item_content) pyexcel.save_as(records=new_item, dest_file_name="treehugger_right_craw.xlsx")
for filename in input_files: # 간혹 xlsx 파일이 아닌 파일이 섞여있을 수 있습니다. 이걸 걸러냅니다. if ".xlsx" not in filename: continue # 엑셀 파일이 맞다면, 파일을 리스트 형태로 읽어옵니다. file = px.get_array(file_name=directory + "/" + filename) # 엑셀 파일의 첫 번째 열, 그러니까 헤더만 불러옵니다. header = file[0] # 불러온 파일의 헤더가 템플릿과 일치하는지 분석합니다. if HEADER != header: # 일치하지 않는다면 건너뛰어버립시다 continue # CONTENTS 리스트에 엑셀 파일의 내용물을 입력합니다. CONTENTS += file[1:] # 합쳐진 엑셀 파일을 저장합니다. px.save_as(array=CONTENTS, dest_file_name="merged_FILE.xlsx") # 총 몇개의 파일이 합쳐졌는지를 출력합니다. print("Total " + str(len(CONTENTS) - 1) + " files were merged.") # 작업 종료 메시지를 출력합니다. print("Process Done.") # 작업에 총 몇 초가 걸렸는지 출력합니다. end_time = time.time() print("The Job Took " + str(end_time - start_time) + " seconds.")
# print(text) # dan_tri_file = open("dantri.html","w") # dan_tri_file.write(text) # dan_tri_file.close() #2 find ROI soup = BeautifulSoup(text, "html.parser") # print(soup.prettify()) ul = soup.find("ul", "ul1 ulnew") # print(ul.prettify()) li_list = ul.find_all("li") item_list = [] for li in li_list: # print(li.prettify()) a = li.h4.a title = a.string #string or content link = url + a['href'] # print(title) # print(link) item = { "Title": title, "Link": link, } item_list.append(item) pyexcel.save_as(records=item_list, dest_file_name="dantri.xlsx")
def _create_a_file(self, file): pyexcel.save_as(dest_file_name=file, array=self.content)
import pyexcel from urllib.request import urlopen from bs4 import BeautifulSoup from collections import OrderedDict url = "http://s.cafef.vn/bao-cao-tai-chinh/VNM/IncSta/2017/3/0/0/ket-qua-hoat-dong-kinh-doanh-cong-ty-co-phan-sua-viet-nam.chn" conn = urlopen(url) raw_data = conn.read() content = raw_data.decode("utf8") # f = open("vinamilk.html", "wb") # f.write(raw_data) # f.close() soup = BeautifulSoup(content, "html.parser") table = soup.find("table", id="tableContent") tr_list = table.find_all("tr") table_list = [] for tr in tr_list: td_list = tr.find_all("td", "b_r_c") for td in td_list: td = td.string data = {"": td} table_list.append(OrderedDict(data)) pyexcel.save_as(records=table_list, dest_file_name="vinamilk.xlsx")
def test_issue_92_verify_save_as(): records = [{"a": 1, "b": 2, "c": 3}, {"b": 2}, {"c": 3}] csv_io = p.save_as(records=records, dest_file_type="csv") content = "a,b,c\r\n1,2,3\r\n,2,\r\n,,3\r\n" eq_(csv_io.getvalue(), content)
input_driver = input("What is the driver associated with this device? ") d = {"IP": input_ip, "driver": input_driver} return d ## This code is left turned off, but might help visualize how pyexcel works with data sets. ## IP is the first column, whereas driver is the second column. ## mylistdict = [ {"IP": "172.16.2.10", "driver": "arista_eos"}, {"IP": "172.16.2.20", "driver": "arista_eos"} ] ## pyexcel.save_as(records=mylistdict, dest_file_name="ip_list.xls") # Runtime mylistdict = [] # this will be our list we turn into a *.xls file print("Hello! This program will make you a *.xls file") while (True): mylistdict.append( get_ip_data() ) # add an item to the list returned by get_ip_data() {"IP": value, "driver": value} keep_going = input( "\nWould you like to add another value? Enter to continue, or enter 'q' to quit: " ) if (keep_going.lower() == 'q'): break filename = input("\nWhat is the name of the *.xls file? ") pyexcel.save_as(records=mylistdict, dest_file_name=filename) print("The file " + filename + ".xls should be in your local directory")
raw_data = conn.read() page_content = raw_data.decode("utf8") soup = BeautifulSoup(page_content, "html.parser") section = soup.find("section", "section chart-grid") div = section.find("div", "section-content") ul = div.find("ul") li_list = ul.find_all("li") top_songs = [] for li in li_list: h3 = li.h3.a h4 = li.h4.a name = h3.string singer = h4.string song = OrderedDict({"Song": name, "composer": singer}) top_songs.append(song) pyexcel.save_as(records=top_songs, dest_file_name="itunes.xlsx") #------------------------------------------------------------------------------------------- a = top_songs[1] options = {'default_search': 'ytsearch', 'max_downloads': 1} dl = YoutubeDL(options) dl.download(a)
def test_get_dict_from_memory(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] content = pe.save_as(dest_file_type="xls", array=data) adict = pe.get_dict(file_content=content.getvalue(), file_type="xls") assert adict == {"X": [1, 4], "Y": [2, 5], "Z": [3, 6]}
def test_get_sheet_from_file_stream(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] content = pe.save_as(dest_file_type="xls", array=data) sheet = pe.get_sheet(file_stream=content, file_type="xls") assert sheet.to_array() == data
#copy and save li_list = ul.find_all("li") new_list = [] for li in li_list: h3 = li.h3 a = h3.a title = a.string h4 = li.h4 a = h4.a artist = a.string songs = { "title": title, "artist": artist, } new_list.append(songs) pyexcel.save_as(records=new_list, dest_file_name="demo.xlsx") #dowload youtube options = { "default_search": "ytsearch", "max_dowload": 100, "format": "bestaudio/audio", } dl = YoutubeDL(options) # for i in new_list: # key = i["title"] # print(key) # dl.download([key]) for i in new_list: print(i["title"]) dl.download(i["title"])
def test_series_table(self): pyexcel.save_as(adict=self.content, dest_file_name=self.testfile) r = pyexcel.get_sheet(file_name=self.testfile, name_columns_by_row=0) eq_(r.dict, self.content)
from collections import OrderedDict url = "https://www.apple.com/itunes/charts/songs" conn = urlopen(url) raw_data = conn.read() page_content = raw_data.decode("utf-8") soup = BeautifulSoup(page_content, "html.parser") ul = soup.find("ul", "") li_list = ul.find_all("li") itune_list = [] for li in li_list: a = li.a h3 = li.h3 h4 = li.h4 Baihat = h3.string Casi = h4.string link = url + a["href"] itune = OrderedDict({ "Baihat": Baihat, "Casi": Casi, "link": link, }) itune_list.append(itune) pyexcel.save_as(records=itune_list, dest_file_name="Itunes_top_song.xlsx")
def _create_a_file(self, file): pyexcel.save_as( dest_file_name=file, array=self.content, dest_library="pyexcel-odsw", )
def test_get_array_from_memory(self): content = pe.save_as(dest_file_type="xls", array=self.test_data) array = pe.get_array(file_content=content.getvalue(), file_type="xls") eq_(array, self.test_data)
import pyexcel a_list_of_dic = [{"name": "Hieu", "age ": 20}, {"name": "ha", "age ": 20}] pyexcel.save_as(records=a_list_of_dic, dest_file_name="a1.xlsx")
import pyexcel # make sure you had pyexcel-xls installed a_list_of_dictionaries = [{ "Name": 'Adam', "Age": 28 }, { "Name": 'Beatrice', "Age": 29 }, { "Name": 'Ceri', "Age": 30 }, { "Name": 'Dean', "Age": 26 }] pyexcel.save_as(records=a_list_of_dictionaries, dest_file_name="your_file.xlsx")
import pyexcel # 1. Prepare data data = [ { "name": "Son", "age": 23, }, { "name": "Trung", "age": 19, }, { "name": "Dung", "age": 21, }, ] # 2. Save pyexcel.save_as(records=data, dest_file_name="sample.xlsx" )
site_visits = settings.MONGO_DB.instances.aggregate([{"$match":{"fs_site": str(site.id)}}, { "$group" : { "_id" : { "$substr": [ "$start", 0, 10 ] } } }])['result'] site_row[-1] = rejected_count site_row[-2] = flagged_count site_row[-3] = submission_count site_row[-4] = len(site_visits) data.append(site_row) p.save_as(array=data, dest_file_name="media/stage-report/{}_stage_data.xls".format(project.id)) xl_data = open("media/stage-report/{}_stage_data.xls".format(project.id), "rb") #Its only quick fix for now, save it in aws bucket whenever possible. project.progress_report = xl_data.name project.save() count += 1 except Exception as e: print 'Report Gen Unsuccesfull. %s' % e print e.__dict__ self.stdout.write('Created "%s " report for projects with success!' % (count)) print datetime.now()
h_data_list = [] table = soup.find(id='tableContent') row = table.find_all('tr') row_list = [] for item in h_list: h_data_list.append(item) for item in row: r_list = item.find_all('td','b_r_c') r_data_list = [] for j in r_list: j = j.string r_data_list.append(j) final= [] for item in row_list: val__list = [] val = { "Danh Muc" : item[0] } for i in range(len(h_data_list)): val[h_data_list[i]] = item[1+i] final.append(val) pyexcel.save_as(records=final,dest_file_name="vinamlk.xlsx")
def test_get_records_from_memory(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] content = pe.save_as(dest_file_type="xls", array=data) records = pe.get_records(file_content=content.getvalue(), file_type="xls") eq_(records, [{"X": 1, "Y": 2, "Z": 3}, {"X": 4, "Y": 5, "Z": 6}])
soup = BeautifulSoup(text, "html.parser") # print(soup.prettify()) # 2 Find the ROI ul_news = soup.find("ul", "ul1 ulnew") # find one # print(ul_news.prettify()) # 3 Extract data li_list = ul_news.find_all("li") # print(li_list) # list_of_dictionaries news_items = [] for li in li_list: a = li.h4.a link = url + a["href"] title = a.text item = { "Title": title, "Link": link } news_items.append(item) print(news_items) # 4 Save data pyexcel.save_as(records=news_items, dest_file_name="dantri.xlsx")
# html_file.close # 2: Extract ROI (region of interest) soup = BeautifulSoup(html, "html.parser") section = soup.find("section", "section chart-grid") # print(section.prettify()) # 3: Extract info songs = [] li_list = section.find_all("li") for li in li_list: song = {} song['name'] = li.h3.string song['artist'] = li.h4.string songs.append(song) pyexcel.save_as(records=songs, dest_file_name="top_song.xlsx") # *************************************************************************************************** # Part 2 : Search and download to youtube from youtube_dl import YoutubeDL options = { 'default_search': 'ytsearch', # tell download to search instead of directly downloading 'max_download': 1 # tell download to download only the first entry(video) } new_song = [] dl = YoutubeDL(options) for li in li_list:
def test_get_sheet_from_memory(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] content = pe.save_as(dest_file_type="xls", array=data) sheet = pe.get_sheet(file_content=content.getvalue(), file_type="xls") assert sheet.to_array() == data
#1.4 Save Html Content # urlretrieve(url, "dantri.html") #Cách dùng thư viện urlretrive # f = open('dantri.html','w') # f.write(html_content) # f.close #2. Extract ROI(Region of Interest) soup = BeautifulSoup(html_content, "html.parser") # print(soup.prettify()) soup.find_all("a", attrs={"class": "sister"}) ul = soup.find("ul", "ul1 ulnew") li_list = ul.find_all("li") list_of_dict = [] for li in li_list: # print(li.prettify()) # print("* " * 20) # h4 = li.find("h4") # a = h4.find("a") dict1 = {} a = li.h4.a dict1["Title"] = a.string dict1["Link"] = url + a["href"] list_of_dict.append(dict1) # print(a.string) # print("* "* 20) # print(url + a["href"]) #3. Extract info pyexcel.save_as(records=list_of_dict, dest_file_name="dantri.xlsx")
def test_out_file_parameter(): try: pe.save_as(array=[[1]], out_file="b", colnames=["X", "Y", "Z"]) except pe.sources.factory.UnknownParameters as e: eq_(str(e), 'No parameters found!')
def test_get_sheet_from_memory_compatibility(self): data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]] content = pe.save_as(dest_file_type="xls", array=data) pe.get_sheet(content=content.getvalue(), file_type="xls")