Пример #1
1
    def annotations_from_xlsx(xslxfile, delimiter='\t', rm_csv=False):
        """Read annotations from xlsx file.
        xlsx is converted to csv file and than parsed with csv reader.
        """
        csvfile = "{}.csv".format(xslxfile)
        pyexcel.save_as(file_name=xslxfile, dest_file_name=csvfile, dest_delimiter=delimiter)
        res = ModelAnnotator.annotations_from_csv(csvfile, delimiter=delimiter)

        if rm_csv:
            import os
            os.remove(csvfile)
        return res
Пример #2
0
def create_sample_file1(file):
    data = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 1.1, 1]
    table = []
    table.append(data[:4])
    table.append(data[4:8])
    table.append(data[8:12])
    pyexcel.save_as(dest_file_name=file, array=table)
Пример #3
0
 def save_to_database(
         self,
         session=None, table=None, initializer=None, mapdict=None,
         auto_commit=True,
         **keywords):
     """
     Save data from a sheet to database
     
     :param session: a SQLAlchemy session						
     :param table: a database table 
     :param initializer: a custom table initialization function if you have one
     :param mapdict: the explicit table column names if your excel data do not have the exact column names
     :param keywords: additional keywords to :meth:`pyexcel.Sheet.save_to_database`
     """
     params = self.get_params(**keywords)
     if 'name_columns_by_row' not in params:
         params['name_columns_by_row'] = 0
     if 'name_rows_by_column' not in params:
         params['name_rows_by_column'] = -1
     params['dest_session']=session
     params['dest_table'] = table
     params['dest_initializer']=initializer
     params['dest_mapdict'] = mapdict
     params['dest_auto_commit']=auto_commit
     pe.save_as(**params)
Пример #4
0
    def setUp(self):
        """
        Make a test csv file as:

        1, 2, 3, 4
        5, 6, 7, 8
        9, 10, 11, 12
        """
        self.testfile1 = "testcsv1.csv"
        content = [
            [1, 'a'],
            [2, 'b'],
            [3, 'c'],
            [4, 'd'],
            [5, 'e'],
            [6, 'f'],
            [7, 'g'],
            [8, 'h']
        ]
        pe.save_as(dest_file_name=self.testfile1,
                   array=content)
        self.testfile2 = "testcsv2.csv"
        content = [
            [1, 'a', 'c'],
            [2, 'b', 'h'],
            [3, 'c', 'c'],
            [8, 'h', 'd']
        ]
        pe.save_as(dest_file_name=self.testfile2,
                   array=content)
Пример #5
0
def create_sample_file1(file):
    data = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 1.1, 1]
    table = []
    table.append(data[:4])
    table.append(data[4:8])
    table.append(data[8:12])
    pyexcel.save_as(array=table, dest_file_name=file)
Пример #6
0
 def setUp(self):
     self.excel_filename = "testdateformat.csv"
     self.data = [[
         datetime.date(2014,12,25),
         datetime.datetime(2014,12,25,11,11,11),
         datetime.datetime(2014,12,25,11,11,11,10)
     ]]
     pe.save_as(dest_file_name=self.excel_filename, array=self.data)
Пример #7
0
 def setUp(self):
     self.data = {
         "1": [1, 2, 3, 4, 5, 6, 7, 8],
         "3": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8],
         "5": [2, 3, 4, 5, 6, 7, 8, 9],
     }
     self.testfile = "test.xls"
     pe.save_as(dest_file_name=self.testfile, adict=self.data)
def test_writing_multiline_ods():
    content = "2\n3\n4\n993939\na"
    testfile = "writemultiline.ods"
    array = [[content, "test"]]
    pyexcel.save_as(array=array, dest_file_name=testfile)
    sheet = pyexcel.get_sheet(file_name=testfile)
    assert sheet[0, 0] == content
    os.unlink(testfile)
Пример #9
0
    def test_new_normal_usage(self):
        content = [
            [1, 2, 3],
            [4, 588, 6],
            [7, 8, 999]
        ]
        pe.save_as(array=content, dest_file_name=self.testfile)

        self._check_test_file('new_normal_usage')
Пример #10
0
    def test_new_normal_usage_irregular_columns(self):
        content = [
            [1, 2, 3],
            [4, 588, 6],
            [7, 8]
        ]
        pe.save_as(array=content, dest_file_name=self.testfile)

        self._check_test_file('new_normal_usage_irregular_columns')
Пример #11
0
 def test_issue_10(self):
     thedict = OrderedDict()
     thedict.update({"Column 1": [1,2,3]})
     thedict.update({"Column 2": [1,2,3]})
     thedict.update({"Column 3": [1,2,3]})
     pe.save_as(adict=thedict, dest_file_name="issue10.xls")
     newdict = pe.get_dict(file_name="issue10.xls")
     assert isinstance(newdict, OrderedDict) == True
     assert thedict == newdict
Пример #12
0
    def test_no_title_single_sheet(self):
        content = [
            [1, 2, 3],
            [4, 588, 6],
            [7, 8, 999]
        ]
        pe.save_as(array=content, dest_file_name=self.testfile, dest_write_title=False)

        self._check_test_file('no_title_single_sheet')
Пример #13
0
 def test_save_as_to_database(self):
     adict = {
         "X": [1, 4],
         "Y": [2, 5],
         "Z": [3, 6]
     }
     pe.save_as(adict=adict, dest_session=self.session, dest_table=Signature)
     result = pe.get_dict(session=self.session, table=Signature)
     assert adict == result
Пример #14
0
 def test_save_as_and_append_colnames(self):
     data = [[1, 2, 3], [4, 5, 6]]
     sheet = pe.Sheet(data)
     testfile = "testfile.xls"
     testfile2 = "testfile.xls"
     sheet.save_as(testfile)
     pe.save_as(file_name=testfile, out_file=testfile2, colnames=["X", "Y", "Z"])
     array = pe.get_array(file_name=testfile2)
     assert array == [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
Пример #15
0
 def CVSOutput(self,sortedTempSimilarityTuple):
     data = []
     for item in sortedTempSimilarityTuple:
         dataTuple = []
         relations = item[0].split("-")
         dataTuple.append(relations[0])
         dataTuple.append(relations[1])
         dataTuple.append(item[1])
         data.append(dataTuple)
     pyexcel.save_as(array = data, dest_file_name = 'testCSV.csv')
Пример #16
0
 def test_new_normal_usage(self):
     content = [
         [1, 2, 3],
         [4, 588, 6],
         [7, 8, 999]
     ]
     pe.save_as(array=content, dest_file_name=self.testfile)
     with open(self.testfile, "r") as f:
         written_content = json.load(f)
         assert written_content == content
 def test_mapping_array(self):
     data2 = [
         ["A", 1, 4],
         ["B", 2, 5],
         ["C", 3, 6]
     ]
     mapdict = ["X", "Y", "Z"]
     model=FakeDjangoModel()
     pe.save_as(array=data2, dest_model=model, dest_mapdict=mapdict, transpose_before=True)
     assert model.objects.objs == self.result
 def test_mapping_array(self):
     data2 = [
         ["A", "B", "C"],
         [1, 2, 3],
         [4, 5, 6]
     ]
     mapdict = ["X", "Y", "Z"]
     model=FakeDjangoModel()
     pe.save_as(array=data2, dest_model=model, dest_mapdict=mapdict)
     assert model.objects.objs == self.result
Пример #19
0
 def setUp(self):
     self.data = {
         "1": [1, 2, 3, 4, 5, 6, 7, 8],
         "2": ["1", "2", "3", "4", "5", "6", "7", "8"],
         "3": [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8],
         "4": ["1.1", "2.2", "3.3", "4.4", "5.5", "6.6", "7,7", "8.8"],
         "5": [2, 3, 4, 5, 6, 7, 8, 9],
         "6": ["2", "3", "4", "5", "6", "7", "8", "9"]
     }
     self.testfile = "test.xls"
     pe.save_as(dest_file_name=self.testfile, adict=self.data)
Пример #20
0
    def test_data_frame(self):
        content = [
            ["", "Column 1", "Column 2", "Column 3"],
            ["Row 1", 1, 2, 3],
            ["Row 2", 4, 5, 6],
            ["Row 3", 7, 8, 9]
        ]
        pe.save_as(array=content, name_rows_by_column=0, name_columns_by_row=0,
                   dest_file_name=self.testfile)

        self._check_test_file('data_frame')
Пример #21
0
 def setUp(self):
     self.testfile = "test.xlsm"
     self.content = [
         ["X", "Y", "Z"],
         [1, 2, 3],
         [1, 2, 3],
         [1, 2, 3],
         [1, 2, 3],
         [1, 2, 3]
     ]
     pe.save_as(dest_file_name=self.testfile, array=self.content)
Пример #22
0
    def test_csvbook_irregular_columns(self):
        content = [
            [1, 2, 3],
            [4, 588, 6],
            [7, 8]
        ]
        self.testfile2 = "testfile.csv"
        pe.save_as(array=content, dest_file_name=self.testfile2)
        pe.save_as(file_name=self.testfile2, dest_file_name=self.testfile)

        self._check_test_file('csvbook_irregular_columns')
Пример #23
0
    def test_row_series(self):
        content = [
            ["Row 1", 1, 2, 3],
            ["Row 2", 4, 5, 6],
            ["Row 3", 7, 8, 9]
        ]

        pe.save_as(array=content, name_rows_by_column=0,
                   dest_file_name=self.testfile)

        self._check_test_file('row_series')
Пример #24
0
    def test_column_series_irregular_columns(self):
        content = [
            ["Column 1", "Column 2", "Column 3"],
            [1, 2, 3],
            [4, 5, 6],
            [7, 8]
        ]
        pe.save_as(array=content, name_columns_by_row=0,
                   dest_file_name=self.testfile)

        self._check_test_file('column_series_irregular_columns')
Пример #25
0
 def test_save_file_as_another_one(self):
     data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
     sheet = pe.Sheet(data)
     testfile = "testfile.xls"
     testfile2 = "testfile2.csv"
     sheet.save_as(testfile)
     pe.save_as(file_name=testfile, out_file=testfile2)
     sheet = pe.get_sheet(file_name=testfile2)
     sheet.format(int)
     assert sheet.to_array() == data
     os.unlink(testfile)
     os.unlink(testfile2)
Пример #26
0
 def setUp(self):
     self.testfile = "test.xlsm"
     self.content = {
         'Series_6': ['', '', '', '', '', 6.0],
         'Series_5': ['', '', '', '', '', 5.0, 5.0, '', '', ''],
         'Series_4': ['', '', '', 4.0, 4.0, 4.0, '', '', '', 4.0, 4.0],
         'Series_3': ['', '', 3.0, 3.0, 3.0, 3.0, '', '', 3.0, 3.0, 3.0],
         'Series_2': ['', 2.0, 2.0, 2.0, 2.0, 2.0],
         'Series_1': [1.0]
     }
     pe.save_as(dest_file_name=self.testfile,
                adict=self.content)
Пример #27
0
def test_write_texttable():
    content = [[1,2]]
    test_file = "test.texttable"
    expected = dedent("""
    pyexcel_sheet1:
    +---+---+
    | 1 | 2 |
    +---+---+""").strip('\n')
    pe.save_as(array=content, dest_file_name=test_file)
    with open(test_file, 'r') as f:
        written = f.read()
        eq_(written, expected)
    os.unlink(test_file)
Пример #28
0
 def test_new_normal_usage(self):
     content = [
         [1, 2, 3],
         [4, 588, 6],
         [7, 8, 999]
     ]
     pe.save_as(array=content, dest_file_name=self.testfile)
     f = open(self.testfile, "r")
     written_content = f.read()
     f.close()
     content = dedent("""
         [[1, 2, 3], [4, 588, 6], [7, 8, 999]]""").strip('\n')
     assert written_content == content
Пример #29
0
 def test_writing_date_format(self):
     excel_filename = "testdateformat.xls"
     data = [[datetime.date(2014, 12, 25),
             datetime.time(11, 11, 11),
             datetime.datetime(2014, 12, 25, 11, 11, 11)]]
     pe.save_as(dest_file_name=excel_filename, array=data)
     r = pe.Reader(excel_filename)
     assert isinstance(r[0, 0], datetime.date) is True
     assert r[0, 0].strftime("%d/%m/%y") == "25/12/14"
     assert isinstance(r[0, 1], datetime.time) is True
     assert r[0, 1].strftime("%H:%M:%S") == "11:11:11"
     assert isinstance(r[0, 2], datetime.date) is True
     assert r[0, 2].strftime("%d/%m/%y %H:%M:%S") == "25/12/14 11:11:11"
     os.unlink(excel_filename)
Пример #30
0
 def test_writing_date_format(self):
     excel_filename = "testdateformat.xlsx"
     data = [[datetime.date(2014,12,25),
             datetime.time(11,11,11),
              datetime.datetime(2014,12,25,11,11,11)]]
     pe.save_as(dest_file_name=excel_filename, array=data)
     r = pe.get_sheet(file_name=excel_filename, library="openpyxl")
     assert isinstance(r[0,0], datetime.date) == True
     assert r[0,0].strftime("%d/%m/%y") == "25/12/14"
     assert isinstance(r[0,1], datetime.time) == True
     assert r[0,1].strftime("%H:%M:%S") == "11:11:11"
     assert isinstance(r[0,2], datetime.date) == True
     assert r[0,2].strftime("%d/%m/%y") == "25/12/14"
     os.unlink(excel_filename)
Пример #31
0
def write_to_excel3(data, file_full_name):
    pyexcel.save_as(records=data, dest_file_name=file_full_name)
Пример #32
0
import pyexcel
from collections import OrderedDict
url = "http://s.cafef.vn/bao-cao-tai-chinh/VNM/IncSta/2017/3/0/0/ket-qua-hoat-dong-kinh-doanh-cong-ty-co-phan-sua-viet-nam.chn"
conn = urlopen(url)
raw_data = conn.read()
page_content = raw_data.decode("utf8")
soup = BeautifulSoup(page_content, "html.parser")
div = soup.find(
    "div", style="overflow:hidden;width:100%;border-bottom:solid 1px #cecece;")
table = div.find("table", id="tableContent")
tr_list = table.find_all("tr")
news_list = []
for t in tr_list:
    td_list = t.find_all("td")
    kq = {}
    for i in range(len(td_list)):
        if td_list[i].string != None:
            if i == 0:
                kq["Hạng mục"] = td_list[i].string.strip()
            elif i == 1:
                kq["Qúy 4-2016"] = td_list[i].string.strip()
            elif i == 2:
                kq["Quý 1-2017"] = td_list[i].string.strip()
            elif i == 3:
                kq["Quý 2-2017"] = td_list[i].string.strip()
            elif i == 4:
                kq["Quý 3-2017"] = td_list[i].string.strip()
    if kq != {}:
        news_list.append(kq)
pyexcel.save_as(records=news_list, dest_file_name="Ketqua.xlsx")
import pyexcel
from collections import OrderedDict

data = [
    OrderedDict({
        'name': 'quan',
        'age': '22',
        'city': 'hanoi'
    }),
    OrderedDict({
        'name': 'Hong',
        'age': '19',
        'city': 'campuchia'
    }),
    OrderedDict({
        'name': 'an',
        'age': '18',
        'city': 'laos'
    })
]

pyexcel.save_as(records=data, dest_file_name="asdfgh.xlsx")
Пример #34
0
        item_content = {
            "picture_link": picture_link,
            "title": title,
            "source": source
        }
        new_item.append(item_content)

# for pile in block2:
#   pile1 = pile.find_all("article","c-article c-article--summary")
#   for pile2 in pile1:
#     pile3 = pile2.find("div","c-article__container")
#     pile4 = pile3.find("div","c-article__image")
#     pile5 = pile3.find("div","c-article__summary")

#     pile6 = pile4.a
#     picture_link = url+ pile6["href"]

#     pile7 = pile5.h3
#     title = pile7.text
#     pile8 = pile7.a
#     source = url+ pile8["href"]

#     item_content = {
#       "source": source,
#       "picture_link":picture_link,
#       "title":title
#     }
#     new_item.append(item_content)

pyexcel.save_as(records=new_item, dest_file_name="treehugger_right_craw.xlsx")
Пример #35
0
for filename in input_files:
    # 간혹 xlsx 파일이 아닌 파일이 섞여있을 수 있습니다. 이걸 걸러냅니다.
    if ".xlsx" not in filename:
        continue

    # 엑셀 파일이 맞다면, 파일을 리스트 형태로 읽어옵니다.
    file = px.get_array(file_name=directory + "/" + filename)

    # 엑셀 파일의 첫 번째 열, 그러니까 헤더만 불러옵니다.
    header = file[0]

    # 불러온 파일의 헤더가 템플릿과 일치하는지 분석합니다.
    if HEADER != header:
        # 일치하지 않는다면 건너뛰어버립시다
        continue

    # CONTENTS 리스트에 엑셀 파일의 내용물을 입력합니다.
    CONTENTS += file[1:]

# 합쳐진 엑셀 파일을 저장합니다.
px.save_as(array=CONTENTS, dest_file_name="merged_FILE.xlsx")

# 총 몇개의 파일이 합쳐졌는지를 출력합니다.
print("Total " + str(len(CONTENTS) - 1) + " files were merged.")

# 작업 종료 메시지를 출력합니다.
print("Process Done.")

# 작업에 총 몇 초가 걸렸는지 출력합니다.
end_time = time.time()
print("The Job Took " + str(end_time - start_time) + " seconds.")
# print(text)

# dan_tri_file = open("dantri.html","w")
# dan_tri_file.write(text)
# dan_tri_file.close()

#2 find ROI
soup = BeautifulSoup(text, "html.parser")

# print(soup.prettify())

ul = soup.find("ul", "ul1 ulnew")
# print(ul.prettify())
li_list = ul.find_all("li")

item_list = []
for li in li_list:
    # print(li.prettify())
    a = li.h4.a
    title = a.string  #string or content
    link = url + a['href']
    # print(title)
    # print(link)
    item = {
        "Title": title,
        "Link": link,
    }
    item_list.append(item)
pyexcel.save_as(records=item_list, dest_file_name="dantri.xlsx")
Пример #37
0
 def _create_a_file(self, file):
     pyexcel.save_as(dest_file_name=file, array=self.content)
import pyexcel
from urllib.request import urlopen
from bs4 import BeautifulSoup
from collections import OrderedDict

url = "http://s.cafef.vn/bao-cao-tai-chinh/VNM/IncSta/2017/3/0/0/ket-qua-hoat-dong-kinh-doanh-cong-ty-co-phan-sua-viet-nam.chn"

conn = urlopen(url)
raw_data = conn.read()
content = raw_data.decode("utf8")

# f = open("vinamilk.html", "wb")
# f.write(raw_data)
# f.close()

soup = BeautifulSoup(content, "html.parser")
table = soup.find("table", id="tableContent")

tr_list = table.find_all("tr")
table_list = []

for tr in tr_list:
    td_list = tr.find_all("td", "b_r_c")
    for td in td_list:
        td = td.string
        data = {"": td}
        table_list.append(OrderedDict(data))

pyexcel.save_as(records=table_list, dest_file_name="vinamilk.xlsx")
Пример #39
0
def test_issue_92_verify_save_as():
    records = [{"a": 1, "b": 2, "c": 3}, {"b": 2}, {"c": 3}]
    csv_io = p.save_as(records=records, dest_file_type="csv")
    content = "a,b,c\r\n1,2,3\r\n,2,\r\n,,3\r\n"
    eq_(csv_io.getvalue(), content)
Пример #40
0
    input_driver = input("What is the driver associated with this device? ")
    d = {"IP": input_ip, "driver": input_driver}
    return d


## This code is left turned off, but might help visualize how pyexcel works with data sets.
## IP is the first column, whereas driver is the second column.
## mylistdict = [ {"IP": "172.16.2.10", "driver": "arista_eos"}, {"IP": "172.16.2.20", "driver": "arista_eos"} ]
## pyexcel.save_as(records=mylistdict, dest_file_name="ip_list.xls")

# Runtime
mylistdict = []  # this will be our list we turn into a *.xls file

print("Hello! This program will make you a *.xls file")

while (True):
    mylistdict.append(
        get_ip_data()
    )  # add an item to the list returned by get_ip_data() {"IP": value, "driver": value}
    keep_going = input(
        "\nWould you like to add another value? Enter to continue, or enter 'q' to quit: "
    )
    if (keep_going.lower() == 'q'):
        break

filename = input("\nWhat is the name of the *.xls file? ")

pyexcel.save_as(records=mylistdict, dest_file_name=filename)

print("The file " + filename + ".xls should be in your local directory")
Пример #41
0
raw_data = conn.read()

page_content = raw_data.decode("utf8")

soup = BeautifulSoup(page_content, "html.parser")

section = soup.find("section", "section chart-grid")

div = section.find("div", "section-content")

ul = div.find("ul")

li_list = ul.find_all("li")
top_songs = []
for li in li_list:
    h3 = li.h3.a
    h4 = li.h4.a
    name = h3.string
    singer = h4.string
    song = OrderedDict({"Song": name, "composer": singer})
    top_songs.append(song)

pyexcel.save_as(records=top_songs, dest_file_name="itunes.xlsx")

#-------------------------------------------------------------------------------------------

a = top_songs[1]
options = {'default_search': 'ytsearch', 'max_downloads': 1}
dl = YoutubeDL(options)
dl.download(a)
Пример #42
0
 def test_get_dict_from_memory(self):
     data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
     content = pe.save_as(dest_file_type="xls", array=data)
     adict = pe.get_dict(file_content=content.getvalue(), file_type="xls")
     assert adict == {"X": [1, 4], "Y": [2, 5], "Z": [3, 6]}
Пример #43
0
 def test_get_sheet_from_file_stream(self):
     data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
     content = pe.save_as(dest_file_type="xls", array=data)
     sheet = pe.get_sheet(file_stream=content, file_type="xls")
     assert sheet.to_array() == data
Пример #44
0
#copy and save
li_list = ul.find_all("li")
new_list = []
for li in li_list:
    h3 = li.h3
    a = h3.a
    title = a.string
    h4 = li.h4
    a = h4.a
    artist = a.string
    songs = {
        "title": title,
        "artist": artist,
    }
    new_list.append(songs)
pyexcel.save_as(records=new_list, dest_file_name="demo.xlsx")
#dowload youtube
options = {
    "default_search": "ytsearch",
    "max_dowload": 100,
    "format": "bestaudio/audio",
}
dl = YoutubeDL(options)
# for i in new_list:
#     key = i["title"]
#     print(key)
#     dl.download([key])
for i in new_list:
    print(i["title"])
    dl.download(i["title"])
Пример #45
0
 def test_series_table(self):
     pyexcel.save_as(adict=self.content, dest_file_name=self.testfile)
     r = pyexcel.get_sheet(file_name=self.testfile, name_columns_by_row=0)
     eq_(r.dict, self.content)
from collections import OrderedDict

url = "https://www.apple.com/itunes/charts/songs"
conn = urlopen(url)

raw_data = conn.read()
page_content = raw_data.decode("utf-8")

soup = BeautifulSoup(page_content, "html.parser")

ul = soup.find("ul", "")

li_list = ul.find_all("li")

itune_list = []
for li in li_list:
    a = li.a
    h3 = li.h3
    h4 = li.h4
    Baihat = h3.string
    Casi = h4.string
    link = url + a["href"]

    itune = OrderedDict({
        "Baihat": Baihat,
        "Casi": Casi,
        "link": link,
    })
    itune_list.append(itune)
pyexcel.save_as(records=itune_list, dest_file_name="Itunes_top_song.xlsx")
Пример #47
0
 def _create_a_file(self, file):
     pyexcel.save_as(
         dest_file_name=file,
         array=self.content,
         dest_library="pyexcel-odsw",
     )
Пример #48
0
 def test_get_array_from_memory(self):
     content = pe.save_as(dest_file_type="xls", array=self.test_data)
     array = pe.get_array(file_content=content.getvalue(), file_type="xls")
     eq_(array, self.test_data)
Пример #49
0
import pyexcel

a_list_of_dic = [{"name": "Hieu", "age ": 20}, {"name": "ha", "age ": 20}]
pyexcel.save_as(records=a_list_of_dic, dest_file_name="a1.xlsx")
Пример #50
0
import pyexcel
# make sure you had pyexcel-xls installed
a_list_of_dictionaries = [{
    "Name": 'Adam',
    "Age": 28
}, {
    "Name": 'Beatrice',
    "Age": 29
}, {
    "Name": 'Ceri',
    "Age": 30
}, {
    "Name": 'Dean',
    "Age": 26
}]

pyexcel.save_as(records=a_list_of_dictionaries,
                dest_file_name="your_file.xlsx")
Пример #51
0
import pyexcel

# 1. Prepare data

data = [
    {
        "name": "Son",
        "age": 23,
    },
    {
        "name": "Trung",
        "age": 19,
    },
    {
        "name": "Dung",
        "age": 21,
    },
]

# 2. Save
pyexcel.save_as(records=data, dest_file_name="sample.xlsx" )

                    site_visits = settings.MONGO_DB.instances.aggregate([{"$match":{"fs_site": str(site.id)}},  { "$group" : { 
                          "_id" :  
                            { "$substr": [ "$start", 0, 10 ] }
                          
                       }
                     }])['result']

                    site_row[-1] = rejected_count
                    site_row[-2] = flagged_count
                    site_row[-3] = submission_count
                    site_row[-4] = len(site_visits) 

                    data.append(site_row)

                p.save_as(array=data, dest_file_name="media/stage-report/{}_stage_data.xls".format(project.id))
                xl_data = open("media/stage-report/{}_stage_data.xls".format(project.id), "rb")
                
                #Its only quick fix for now, save it in aws bucket whenever possible.

                project.progress_report = xl_data.name
                project.save()
                count += 1
                
            except Exception as e:
                print 'Report Gen Unsuccesfull. %s' % e
                print e.__dict__
        
        self.stdout.write('Created "%s " report for projects with success!' % (count))
        print datetime.now()
Пример #53
0
h_data_list = []
table = soup.find(id='tableContent')
row = table.find_all('tr')
row_list = []
for item in h_list:   
    h_data_list.append(item)


for item in row:
    r_list = item.find_all('td','b_r_c')
    r_data_list = []
    for j in r_list:
        j = j.string     
        r_data_list.append(j)  
 

final= []
for item in row_list:
    val__list = []
    val = {
            "Danh Muc" : item[0]
        }  
    for i in range(len(h_data_list)):
        val[h_data_list[i]] =  item[1+i]   
              
    final.append(val)



pyexcel.save_as(records=final,dest_file_name="vinamlk.xlsx")
Пример #54
0
 def test_get_records_from_memory(self):
     data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
     content = pe.save_as(dest_file_type="xls", array=data)
     records = pe.get_records(file_content=content.getvalue(),
                              file_type="xls")
     eq_(records, [{"X": 1, "Y": 2, "Z": 3}, {"X": 4, "Y": 5, "Z": 6}])
Пример #55
0
soup = BeautifulSoup(text, "html.parser")
# print(soup.prettify())

# 2 Find the ROI
ul_news = soup.find("ul", "ul1 ulnew") # find one
# print(ul_news.prettify())


# 3 Extract data
li_list = ul_news.find_all("li")
# print(li_list)

# list_of_dictionaries
news_items = []

for li in li_list:
    a = li.h4.a 
    link = url + a["href"]
    title = a.text 
    item = {
        "Title": title,
        "Link": link
    }
    news_items.append(item)
print(news_items)

# 4 Save data
pyexcel.save_as(records=news_items, dest_file_name="dantri.xlsx")

# html_file.close

# 2: Extract ROI (region of interest)
soup = BeautifulSoup(html, "html.parser")
section = soup.find("section", "section chart-grid")
# print(section.prettify())

# 3: Extract info
songs = []
li_list = section.find_all("li")
for li in li_list:
    song = {}
    song['name'] = li.h3.string
    song['artist'] = li.h4.string
    songs.append(song)
pyexcel.save_as(records=songs, dest_file_name="top_song.xlsx")

# ***************************************************************************************************

# Part 2 : Search and download to youtube

from youtube_dl import YoutubeDL

options = {
    'default_search':
    'ytsearch',  # tell download to search instead of directly downloading
    'max_download': 1  # tell download to download only the first entry(video)
}
new_song = []
dl = YoutubeDL(options)
for li in li_list:
Пример #57
0
 def test_get_sheet_from_memory(self):
     data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
     content = pe.save_as(dest_file_type="xls", array=data)
     sheet = pe.get_sheet(file_content=content.getvalue(), file_type="xls")
     assert sheet.to_array() == data
Пример #58
0
#1.4 Save Html Content
# urlretrieve(url, "dantri.html")  #Cách dùng thư viện urlretrive
# f = open('dantri.html','w')
# f.write(html_content)
# f.close

#2. Extract ROI(Region of Interest)
soup = BeautifulSoup(html_content, "html.parser")
# print(soup.prettify())     soup.find_all("a", attrs={"class": "sister"})
ul = soup.find("ul", "ul1 ulnew")
li_list = ul.find_all("li")

list_of_dict = []

for li in li_list:
    # print(li.prettify())
    # print("* " * 20)
    # h4 = li.find("h4")
    # a = h4.find("a")
    dict1 = {}
    a = li.h4.a
    dict1["Title"] = a.string
    dict1["Link"] = url + a["href"]
    list_of_dict.append(dict1)
    # print(a.string)
    # print("* "* 20)
    # print(url + a["href"])

#3. Extract info
pyexcel.save_as(records=list_of_dict, dest_file_name="dantri.xlsx")
Пример #59
0
def test_out_file_parameter():
    try:
        pe.save_as(array=[[1]], out_file="b", colnames=["X", "Y", "Z"])
    except pe.sources.factory.UnknownParameters as e:
        eq_(str(e), 'No parameters found!')
Пример #60
0
 def test_get_sheet_from_memory_compatibility(self):
     data = [["X", "Y", "Z"], [1, 2, 3], [4, 5, 6]]
     content = pe.save_as(dest_file_type="xls", array=data)
     pe.get_sheet(content=content.getvalue(), file_type="xls")