Read excel file from google drive without downloading file
Question:
I wants to read excel sheets from excel file on google drive without downloading on local machine! i searched for google drive api but couldn’t find solution i tried following code please need suggestion:
'''
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
file_id = 'abc'
file_name = 'abc.xlsx'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile(file_name)
class TestCase:
def __init__(self, file_name, sheet):
self.file_name = file_name
self.sheet = sheet
testcase = pd.read_excel(file_name, usecols=None, sheet_name=sheet)
print(testcase)
class TestCaseSteps:
def __init__(self, file_name, sheet):
self.file_name = file_name
self.sheet = sheet
testcase = pd.read_excel(file_name, usecols=None, sheet_name=sheet)
print(testcase)
testcases = TestCase(file_name, ‘A’)
steps = TestCaseSteps(file_name, ‘B’)
”’
Answers:
I believe your goal and situation as follows.
- You want to read the XLSX downloaded from Google Drive using
pd.read_excel
.
- You want to achieve this without saving the downloaded XLSX data as a file.
- Your
gauth = GoogleAuth()
can be used for downloading the Google Spreadsheet as the XLSX format.
In this case, I would like to propose the following flow.
- Download the Google Spreadsheet as XLSX format.
- In this case, it directly requests to the endpoint for exporting Spreadsheet as XLSX format using
requests
library.
- The access token is retrieved from
gauth = GoogleAuth()
.
- The downloaded XLSX data is read with
pd.read_excel
.
- In this case,
BytesIO
is used for reading the data.
By this flow, when the Spreadsheet is downloaded as the XLSX data, the XLSX data can be read without saving it as a file. When above flow is reflected to the script, it becomes as follows.
Sample script:
Before you run the script, please set the Spreadsheet ID.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
spreadsheetId = "###" # <--- Please set the Spreadsheet ID.
# 1. Download the Google Spreadsheet as XLSX format.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + spreadsheetId + "/export?mimeType=application%2Fvnd.openxmlformats-officedocument.spreadsheetml.sheet"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})
# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
values = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)
print(values)
References:
Added:
At the following sample script, it supposes that the XLSX file is put to the Google Drive, and the XLSX file is downloaded.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
file_id = "###" # <--- Please set the file ID of XLSX file.
# 1. Download the XLSX data.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + file_id + "?alt=media"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})
# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
values = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)
print(values)
Get the link to the Excel file.
Replace the "/edit?usp=share_link" for "/export?format=xlsx"
import pandas as pd
url = "https://docs.google.com/spreadsheets/d/1py9cdXWCorKrRz7fUp0doPaOTJ2tiexB/edit?usp=share_link&ouid=107751642562197088720&rtpof=true&sd=true"
url_for_pandas = url.replace("/edit?usp=share_link", "/export?format=xlsx")
df = pd.read_excel(url_for_pandas)
print(df)
I wants to read excel sheets from excel file on google drive without downloading on local machine! i searched for google drive api but couldn’t find solution i tried following code please need suggestion:
'''
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
file_id = 'abc'
file_name = 'abc.xlsx'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile(file_name)
class TestCase:
def __init__(self, file_name, sheet):
self.file_name = file_name
self.sheet = sheet
testcase = pd.read_excel(file_name, usecols=None, sheet_name=sheet)
print(testcase)
class TestCaseSteps:
def __init__(self, file_name, sheet):
self.file_name = file_name
self.sheet = sheet
testcase = pd.read_excel(file_name, usecols=None, sheet_name=sheet)
print(testcase)
testcases = TestCase(file_name, ‘A’)
steps = TestCaseSteps(file_name, ‘B’)
”’
I believe your goal and situation as follows.
- You want to read the XLSX downloaded from Google Drive using
pd.read_excel
. - You want to achieve this without saving the downloaded XLSX data as a file.
- Your
gauth = GoogleAuth()
can be used for downloading the Google Spreadsheet as the XLSX format.
In this case, I would like to propose the following flow.
- Download the Google Spreadsheet as XLSX format.
- In this case, it directly requests to the endpoint for exporting Spreadsheet as XLSX format using
requests
library. - The access token is retrieved from
gauth = GoogleAuth()
.
- In this case, it directly requests to the endpoint for exporting Spreadsheet as XLSX format using
- The downloaded XLSX data is read with
pd.read_excel
.- In this case,
BytesIO
is used for reading the data.
- In this case,
By this flow, when the Spreadsheet is downloaded as the XLSX data, the XLSX data can be read without saving it as a file. When above flow is reflected to the script, it becomes as follows.
Sample script:
Before you run the script, please set the Spreadsheet ID.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
spreadsheetId = "###" # <--- Please set the Spreadsheet ID.
# 1. Download the Google Spreadsheet as XLSX format.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + spreadsheetId + "/export?mimeType=application%2Fvnd.openxmlformats-officedocument.spreadsheetml.sheet"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})
# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
values = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)
print(values)
References:
Added:
At the following sample script, it supposes that the XLSX file is put to the Google Drive, and the XLSX file is downloaded.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
file_id = "###" # <--- Please set the file ID of XLSX file.
# 1. Download the XLSX data.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + file_id + "?alt=media"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})
# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
values = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)
print(values)
Get the link to the Excel file.
Replace the "/edit?usp=share_link" for "/export?format=xlsx"
import pandas as pd
url = "https://docs.google.com/spreadsheets/d/1py9cdXWCorKrRz7fUp0doPaOTJ2tiexB/edit?usp=share_link&ouid=107751642562197088720&rtpof=true&sd=true"
url_for_pandas = url.replace("/edit?usp=share_link", "/export?format=xlsx")
df = pd.read_excel(url_for_pandas)
print(df)