How to read SharePoint Online (Office365) Excel files into Python specifically pandas with Work or School Account?
Question:
The question is very similar to the link below.
How to read SharePoint Online (Office365) Excel files in Python with Work or School Account?
Essentially I would like to import an excel file off SharePoint into pandas for further analysis.
The issue is when I run the code below I get the following error.
XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'rn<!DOCT'
My code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.file import File
url = 'https://companyname.sharepoint.com/SitePages/Home.aspx'
username = '[email protected]'
password = 'password!'
relative_url = '/Shared%20Documents/Folder%20Number1/Folder%20Number2/Folder3/Folder%20Number%Four/Target_Excel_File_v4.xlsx?d=w8f97c2341898_random_numbers_and_letters_a065c12cbcsf=1&e=KXoU4s'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
#this gives me a KeyError: 'Title'
#print("Web title: {0}".format(web.properties['Title']))
print('Authentication Successful')
else:
print(ctx_auth.get_last_error())
import io
import pandas as pd
response = File.open_binary(ctx, relative_url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read file into pandas dataframe
df = pd.read_excel(bytes_file_obj)
print(df)
Answers:
For those of you that ended up like me here at this issue, I found that one has to path the full URL to File
, not just the path:
#import all the libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import io
import pandas as pd
#target url taken from sharepoint and credentials
url = 'https://company.sharepoint.com/Shared%20Documents/Folder%20Number1/Folder%20Number2/Folder3/Folder%20Number4/Target_Excel_File_v4.xlsx?cid=_Random_letters_and_numbers-21dbf74c'
username = '[email protected]'
password = 'Password!'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Authentication successful")
response = File.open_binary(ctx, url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read excel file and each sheet into pandas dataframe
df = pd.read_excel(bytes_file_obj, sheetname = None)
Maybe worth to note that the official repository holds many examples on common operations for sharepoint, drive and teams.
One note for installation:
pip install Office365-REST-Python-Client
There is also a office365 package but the above one seems to be the correct oneenter link description here
I know it ws 2-3 years ao but someone maybe will know. I’m using this code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import pandas as pd
import io
url = 'some link to sharepoint'
username = 'mail'
password = 'password'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Authentication successful")
response = File.open_binary(ctx, url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read excel file and each sheet into pandas dataframe
df = pd.read_excel(bytes_file_obj, sheet_name = None, engine='openpyxl')
print(df)
i got error
zipfile.BadZipFile: File is not a zip file
and i can’t find any way to solve this problem. Maybe someone of you know how to figure out ?
The following works with Client ID and Secret Code (Lib: Office365)
# Credential to connect to your SP Site
SITE_URL ='https://XXXXXX.sharepoint.com/sites/yoursitename'
CLIENT_ID = 'xxxxxxxx-xxx-xxxx-xxxxxxxxxxxxxxxxx'
CLIENT_SECRET= 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
# Establish the connection
context = ClientContext(SITE_URL).with_credentials(ClientCredential(CLIENT_ID, CLIENT_SECRET))
response = File.open_binary(context, '/'.join(['/sites/yoursitename/Shared Documents/Work/OnlyFolderName',
'yourfilename.xlsx']))
df = pd.read_excel(io.BytesIO(response.content))
The question is very similar to the link below.
How to read SharePoint Online (Office365) Excel files in Python with Work or School Account?
Essentially I would like to import an excel file off SharePoint into pandas for further analysis.
The issue is when I run the code below I get the following error.
XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'rn<!DOCT'
My code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.file import File
url = 'https://companyname.sharepoint.com/SitePages/Home.aspx'
username = '[email protected]'
password = 'password!'
relative_url = '/Shared%20Documents/Folder%20Number1/Folder%20Number2/Folder3/Folder%20Number%Four/Target_Excel_File_v4.xlsx?d=w8f97c2341898_random_numbers_and_letters_a065c12cbcsf=1&e=KXoU4s'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
#this gives me a KeyError: 'Title'
#print("Web title: {0}".format(web.properties['Title']))
print('Authentication Successful')
else:
print(ctx_auth.get_last_error())
import io
import pandas as pd
response = File.open_binary(ctx, relative_url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read file into pandas dataframe
df = pd.read_excel(bytes_file_obj)
print(df)
For those of you that ended up like me here at this issue, I found that one has to path the full URL to File
, not just the path:
#import all the libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import io
import pandas as pd
#target url taken from sharepoint and credentials
url = 'https://company.sharepoint.com/Shared%20Documents/Folder%20Number1/Folder%20Number2/Folder3/Folder%20Number4/Target_Excel_File_v4.xlsx?cid=_Random_letters_and_numbers-21dbf74c'
username = '[email protected]'
password = 'Password!'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Authentication successful")
response = File.open_binary(ctx, url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read excel file and each sheet into pandas dataframe
df = pd.read_excel(bytes_file_obj, sheetname = None)
Maybe worth to note that the official repository holds many examples on common operations for sharepoint, drive and teams.
One note for installation:
pip install Office365-REST-Python-Client
There is also a office365 package but the above one seems to be the correct oneenter link description here
I know it ws 2-3 years ao but someone maybe will know. I’m using this code:
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
import pandas as pd
import io
url = 'some link to sharepoint'
username = 'mail'
password = 'password'
ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
ctx = ClientContext(url, ctx_auth)
web = ctx.web
ctx.load(web)
ctx.execute_query()
print("Authentication successful")
response = File.open_binary(ctx, url)
#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start
#read excel file and each sheet into pandas dataframe
df = pd.read_excel(bytes_file_obj, sheet_name = None, engine='openpyxl')
print(df)
i got error
zipfile.BadZipFile: File is not a zip file
and i can’t find any way to solve this problem. Maybe someone of you know how to figure out ?
The following works with Client ID and Secret Code (Lib: Office365)
# Credential to connect to your SP Site
SITE_URL ='https://XXXXXX.sharepoint.com/sites/yoursitename'
CLIENT_ID = 'xxxxxxxx-xxx-xxxx-xxxxxxxxxxxxxxxxx'
CLIENT_SECRET= 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
# Establish the connection
context = ClientContext(SITE_URL).with_credentials(ClientCredential(CLIENT_ID, CLIENT_SECRET))
response = File.open_binary(context, '/'.join(['/sites/yoursitename/Shared Documents/Work/OnlyFolderName',
'yourfilename.xlsx']))
df = pd.read_excel(io.BytesIO(response.content))