Appending pandas Data Frame to Google spreadsheet
Question:
Case:
My script returns a data frame that needs has to be appended to an existing google spreadsheet as new rows of data.As of now, I’m appending a data frame as multiple single rows through gspread.
My Code:
import gspread
import pandas as pd
df = pd.DataFrame()
# After some processing a non-empty data frame has been created.
output_conn = gc.open("SheetName").worksheet("xyz")
# Here 'SheetName' is google spreadsheet and 'xyz' is sheet in the workbook
for i, row in df.iterrows():
output_conn.append_row(row)
Is there a way to append entire data-frame rather than multiple single rows?
Answers:
if Google spreadsheet takes .csv format then you can convert a pandas dataframe to csv using df.to_csv() and save it in that format
I can recommend gspread-dataframe
:
import gspread_dataframe as gd
# Connecting with `gspread` here
ws = gc.open("SheetName").worksheet("xyz")
existing = gd.get_as_dataframe(ws)
updated = existing.append(your_new_data)
gd.set_with_dataframe(ws, updated)
I came up with the following solution. It does not overwrite current data but just appends entire pandas DataFrame df
to the end of Sheet with name sheet
in the Spreadsheet with the name spread_sheet
.
import gspread
from google.auth.transport.requests import AuthorizedSession
from oauth2client.service_account import ServiceAccountCredentials
def append_df_to_gs(df, spread_sheet:str, sheet_name:str):
scopes = [
'https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive',
]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
path_to_credentials,
scopes=scopes
)
gsc = gspread.authorize(credentials)
sheet = gsc.open(spread_sheet)
params = {'valueInputOption': 'USER_ENTERED'}
body = {'values': df.values.tolist()}
sheet.values_append(f'{sheet_name:str}!A1:G1', params, body)
For params valueInputOption
please consult this. I used USER_ENTERED
here as I needed some formulas to be valid once I append the data to Google Sheets.
Here is the code to write, append(without loading the existing sheet into memory), and read to google sheets.
import gspread_dataframe as gd
import gspread as gs
gc = gs.service_account(filename="your/cred/file.json")
def export_to_sheets(worksheet_name,df,mode='r'):
ws = gc.open("SHEET_NAME").worksheet("worksheet_name")
if(mode=='w'):
ws.clear()
gd.set_with_dataframe(worksheet=ws,dataframe=df,include_index=False,include_column_header=True,resize=True)
return True
elif(mode=='a'):
ws.add_rows(df.shape[0])
gd.set_with_dataframe(worksheet=ws,dataframe=df,include_index=False,include_column_header=False,row=ws.row_count+1,resize=False)
return True
else:
return gd.get_as_dataframe(worksheet=ws)
df = pd.DataFrame.from_records([{'a': i, 'b': i * 2} for i in range(100)])
export_to_sheets("SHEET_NAME",df,'a')
- Write Mode: First clear existing worksheet =>
ws.clear()
.Second using set_with_dataframe()
uploading the dataframe,
here note that resize=True
, which strictily set the row and col in worksheet to df.shape. This will help later in append method.
- Append Mode: First, add rows according to the dataframe. Second setting the parameter
resize=False
as we are adding rows and row=ws.row_count+1
anchoring its row value for append.
- Read Mode(Default): returns a dataframe
ws = gc.open("sheet title").worksheet("Sheet1")
gd.set_with_dataframe(ws, dataframe)
#simply transform your dataframe to google sheet
I was facing the same problem, here’s what I did
converted the dataframe into list and used gspread’s append_rows()
gc = gspread.service_account(filename="credentials.json")
sh = gc.open_by_key('<your_key>')
ws = sh.sheet1
##data is the original data frame
data_list = data.values.tolist()
ws.append_rows(data_list)
The following approach, using gspread
, may help one understand the procedures and solve the problem
-
Install the libraries in your environment.
-
Import the libraries in the script
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
-
Create credentials in Google API console.
-
Add the following to the script, to access the Google Sheet
gc = gspread.service_account(filename='GoogleAPICredentials.json')
sh = gc.open_by_key('GoogleSheetID')
Assuming one wants to add to the first sheet, use 0
in get_worksheet
(for the second sheet use 1, and so on)
worksheet = sh.get_worksheet(0)
-
Then, in order to export the dataframe, considering that the dataframe name is df
, to a Google Sheet
set_with_dataframe(worksheet, df)
The following doesn’t require external libs other than gspread:
worksheet.update([dataframe.columns.values.tolist()] + dataframe.values.tolist())
I came up with the following solution using try/catch statement, in case the spreadsheet doesn’t exsit he will create it for you and set the dataframe otherwise he will append it.
def load_to_sheet(conn_sheet, spreadsheet_name, df):
try:
worksheet = conn_sheet.worksheet(spreadsheet_name)
worksheet.add_rows(df.shape[0])
set_with_dataframe(worksheet=worksheet, row=worksheet.row_count, dataframe=df, include_index=False,
include_column_header=False,
resize=False)
except Exception:
worksheet = conn_sheet.add_worksheet(title=spreadsheet_name, rows=100, cols=100)
set_with_dataframe(worksheet=worksheet, dataframe=df, include_index=False, include_column_header=True,
resize=True)
Improving upon the solution given by Darsh Shukla
def append_df_to_sheet(g_spread_name, sheet_name, df):
wks = gd.set_with_dataframe(worksheet= self.gc.open(g_spread_name).worksheet(sheet_name) # get the sheet
append_row = wks.row_count+1 # position where to append
wks.add_rows(df.shape[0]) # add rows to avoid API range error
gd.set_with_dataframe(worksheet= self.gc.open(self.g_spread_name).worksheet(sheet_name), # refresh the sheet
dataframe=df,
include_index=False,
include_column_header=False,
row = append_row,
resize=False)
return True
This avoids the error
gspread.exceptions.APIError: {'code': 400, 'message': 'Range (test!A999:B1001) exceeds grid limits. Max rows: 998, max columns: 26', 'status': 'INVALID_ARGUMENT'}
Case:
My script returns a data frame that needs has to be appended to an existing google spreadsheet as new rows of data.As of now, I’m appending a data frame as multiple single rows through gspread.
My Code:
import gspread
import pandas as pd
df = pd.DataFrame()
# After some processing a non-empty data frame has been created.
output_conn = gc.open("SheetName").worksheet("xyz")
# Here 'SheetName' is google spreadsheet and 'xyz' is sheet in the workbook
for i, row in df.iterrows():
output_conn.append_row(row)
Is there a way to append entire data-frame rather than multiple single rows?
if Google spreadsheet takes .csv format then you can convert a pandas dataframe to csv using df.to_csv() and save it in that format
I can recommend gspread-dataframe
:
import gspread_dataframe as gd
# Connecting with `gspread` here
ws = gc.open("SheetName").worksheet("xyz")
existing = gd.get_as_dataframe(ws)
updated = existing.append(your_new_data)
gd.set_with_dataframe(ws, updated)
I came up with the following solution. It does not overwrite current data but just appends entire pandas DataFrame df
to the end of Sheet with name sheet
in the Spreadsheet with the name spread_sheet
.
import gspread
from google.auth.transport.requests import AuthorizedSession
from oauth2client.service_account import ServiceAccountCredentials
def append_df_to_gs(df, spread_sheet:str, sheet_name:str):
scopes = [
'https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive',
]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
path_to_credentials,
scopes=scopes
)
gsc = gspread.authorize(credentials)
sheet = gsc.open(spread_sheet)
params = {'valueInputOption': 'USER_ENTERED'}
body = {'values': df.values.tolist()}
sheet.values_append(f'{sheet_name:str}!A1:G1', params, body)
For params valueInputOption
please consult this. I used USER_ENTERED
here as I needed some formulas to be valid once I append the data to Google Sheets.
Here is the code to write, append(without loading the existing sheet into memory), and read to google sheets.
import gspread_dataframe as gd
import gspread as gs
gc = gs.service_account(filename="your/cred/file.json")
def export_to_sheets(worksheet_name,df,mode='r'):
ws = gc.open("SHEET_NAME").worksheet("worksheet_name")
if(mode=='w'):
ws.clear()
gd.set_with_dataframe(worksheet=ws,dataframe=df,include_index=False,include_column_header=True,resize=True)
return True
elif(mode=='a'):
ws.add_rows(df.shape[0])
gd.set_with_dataframe(worksheet=ws,dataframe=df,include_index=False,include_column_header=False,row=ws.row_count+1,resize=False)
return True
else:
return gd.get_as_dataframe(worksheet=ws)
df = pd.DataFrame.from_records([{'a': i, 'b': i * 2} for i in range(100)])
export_to_sheets("SHEET_NAME",df,'a')
- Write Mode: First clear existing worksheet =>
ws.clear()
.Second usingset_with_dataframe()
uploading the dataframe,
here note thatresize=True
, which strictily set the row and col in worksheet to df.shape. This will help later in append method. - Append Mode: First, add rows according to the dataframe. Second setting the parameter
resize=False
as we are adding rows androw=ws.row_count+1
anchoring its row value for append. - Read Mode(Default): returns a dataframe
ws = gc.open("sheet title").worksheet("Sheet1")
gd.set_with_dataframe(ws, dataframe)
#simply transform your dataframe to google sheet
I was facing the same problem, here’s what I did
converted the dataframe into list and used gspread’s append_rows()
gc = gspread.service_account(filename="credentials.json")
sh = gc.open_by_key('<your_key>')
ws = sh.sheet1
##data is the original data frame
data_list = data.values.tolist()
ws.append_rows(data_list)
The following approach, using gspread
, may help one understand the procedures and solve the problem
-
Install the libraries in your environment.
-
Import the libraries in the script
import pandas as pd import gspread from gspread_dataframe import set_with_dataframe
-
Create credentials in Google API console.
-
Add the following to the script, to access the Google Sheet
gc = gspread.service_account(filename='GoogleAPICredentials.json') sh = gc.open_by_key('GoogleSheetID')
Assuming one wants to add to the first sheet, use 0
in get_worksheet
(for the second sheet use 1, and so on)
worksheet = sh.get_worksheet(0)
-
Then, in order to export the dataframe, considering that the dataframe name is
df
, to a Google Sheetset_with_dataframe(worksheet, df)
The following doesn’t require external libs other than gspread:
worksheet.update([dataframe.columns.values.tolist()] + dataframe.values.tolist())
I came up with the following solution using try/catch statement, in case the spreadsheet doesn’t exsit he will create it for you and set the dataframe otherwise he will append it.
def load_to_sheet(conn_sheet, spreadsheet_name, df):
try:
worksheet = conn_sheet.worksheet(spreadsheet_name)
worksheet.add_rows(df.shape[0])
set_with_dataframe(worksheet=worksheet, row=worksheet.row_count, dataframe=df, include_index=False,
include_column_header=False,
resize=False)
except Exception:
worksheet = conn_sheet.add_worksheet(title=spreadsheet_name, rows=100, cols=100)
set_with_dataframe(worksheet=worksheet, dataframe=df, include_index=False, include_column_header=True,
resize=True)
Improving upon the solution given by Darsh Shukla
def append_df_to_sheet(g_spread_name, sheet_name, df):
wks = gd.set_with_dataframe(worksheet= self.gc.open(g_spread_name).worksheet(sheet_name) # get the sheet
append_row = wks.row_count+1 # position where to append
wks.add_rows(df.shape[0]) # add rows to avoid API range error
gd.set_with_dataframe(worksheet= self.gc.open(self.g_spread_name).worksheet(sheet_name), # refresh the sheet
dataframe=df,
include_index=False,
include_column_header=False,
row = append_row,
resize=False)
return True
This avoids the error
gspread.exceptions.APIError: {'code': 400, 'message': 'Range (test!A999:B1001) exceeds grid limits. Max rows: 998, max columns: 26', 'status': 'INVALID_ARGUMENT'}