如何用Python将Pandas数据框保存为EXCEL表到SharePoint

wrrgggsh  于 2022-09-21  发布在  Python
关注(0)|答案(1)|浏览(187)

我正在尝试将Pandas Dataframe 保存为Excel表到一个SharePoint站点。我有两个独立的代码块来实现下面的功能。(感谢Stackoverflow社区)

1.在本地存储中使用ExcelWriter将PandasDF保存为EXCEL表的脚本。
1.可以将本地文件保存到Sharepoint Online的脚本。

我对如何将这两者结合起来将DF保存到SharePoint Online上感到非常困惑,但EXCEL文件应该是一个表,而不仅仅是一系列数据。请帮帮忙

脚本1将EXCEL范围保存为本地表


############################################################################## 

# 

# An example of adding a dataframe to an worksheet table in an xlsx file

# using Pandas and XlsxWriter.

# 

# Tables in Excel are used to group rows and columns of data into a single

# structure that can be referenced in a formula or formatted collectively.

# 

# SPDX-License-Identifier: BSD-2-Clause

# Copyright 2013-2021, John McNamara, jmcnamara@cpan.org

# 

import pandas as pd

# Create a Pandas dataframe from some data.

df = pd.DataFrame({
    'Country':    ['China',    'India',    'United States', 'Indonesia'],
    'Population': [1404338840, 1366938189, 330267887,       269603400],
    'Rank':       [1,          2,          3,               4]})

# Order the columns if necessary.

df = df[['Rank', 'Country', 'Population']]

# Create a Pandas Excel writer using XlsxWriter as the engine.

writer = pd.ExcelWriter('pandas_table.xlsx', engine='xlsxwriter')

# Write the dataframe data to XlsxWriter. Turn off the default header and

# index and skip one row to allow us to insert a user defined header.

df.to_excel(writer, sheet_name='Sheet1', startrow=1, header=False, index=False)

# Get the xlsxwriter workbook and worksheet objects.

workbook = writer.book
worksheet = writer.sheets['Sheet1']

# Get the dimensions of the dataframe.

(max_row, max_col) = df.shape

# Create a list of column headers, to use in add_table().

column_settings = [{'header': column} for column in df.columns]

# Add the Excel table structure. Pandas will add the data.

worksheet.add_table(0, 0, max_row, max_col - 1, {'columns': column_settings})

# Make the columns wider for clarity.

worksheet.set_column(0, max_col - 1, 12)

# Close the Pandas Excel writer and output the Excel file.

writer.save()

将任何文件保存到SharePoint的脚本2

from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
import os 

baseurl = 'https://testsite.sharepoint.com/'
basesite = '/sites/project' # every share point has a home.
siteurl = baseurl + basesite 

localpath = "pandas_table.xlsx"
remotepath = "Shared Documents/General/file.xlsx" # existing folder path under sharepoint site.

ctx_auth = AuthenticationContext(siteurl)
ctx_auth.acquire_token_for_user(<username>, <password>)
ctx = ClientContext(siteurl, ctx_auth) # make sure you auth to the siteurl.

with open(localpath, 'rb') as content_file:
    file_content = content_file.read()

dir, name = os.path.split(remotepath)
file = ctx.web.get_folder_by_server_relative_url(dir).upload_file(name, file_content).execute_query()

我真的不确定如何将Writer.save()与SharePoint连接器一起使用。敬请提前告知感谢。

mzsu5hc0

mzsu5hc01#

假设您的数据在df中,下面的代码将使用O365库写入到SharePoint。

from io import BytesIO
from tempfile import gettempdir

from O365 import Account, FileSystemTokenBackend
import pandas as pd

O365_CLIENT_ID = "client"
O365_SECRET = "secret"
O365_TENANT_ID = "<name>"
O365_SHAREPOINT = "<name>.sharepoint.com"
O365_SITE = "/sites/..."

def save_file(folder_path, filename, data):
    """save file to O365."""
    account = Account(
        (O365_CLIENT_ID, O365_SECRET),
        auth_flow_type="credentials",
        tenant_id=O365_TENANT_ID,
        token_backend=FileSystemTokenBackend(
            token_path=gettempdir(), token_filename="o365_token.txt"
        ),
    )
    if account.authenticate():
        drive = (
            account.sharepoint()
            .get_site(O365_SHAREPOINT, O365_SITE)
            .get_default_document_library()
        )
        subfolders = folder_path.split("/")
        if len(subfolders) != 0:
           items = drive.get_items()
           for subfolder in subfolders:
           try:
               subfolder_drive = list(filter(lambda x, sf=subfolder: sf in x.name, items))[0]
               items = subfolder_drive.get_items()
           except Exception as excep:  # pylint: disable=broad-except
                raise f"Path {folder_path} does not exist." from excep
        else:
            subfolder_drive = drive.get_root_folder()
        subfolder_drive.upload_file(
            item=None,
            item_name=filename,
            stream=data,
            stream_size=data.getbuffer().nbytes,
        )

with BytesIO() as buf:
    df.to_excel(buf, index=False)
    buf.seek(0)
    save_file('folder/to/upload/to', 'filename.xlsx', buf)

写入多张图纸:

with BytesIO() as buf:
    with pd.ExcelWriter(  # pylint: disable=abstract-class-instantiated
        buf,
        engine="xlsxwriter",
    ) as writer:
        df.to_excel(
            writer,
            sheet_name='sheet1',
            index=False,
        )
        df.to_excel(
            writer,
            sheet_name='sheet2',
            index=False,
        )
    buf.seek(0)
    save_file('folder/to/upload/to', 'filename.xlsx', buf)

相关问题