Python XLRD use Range
Question:
I want to assign an excel range to a variable:
import xlrd
file = r"C:UsersLisaDesktopFrankexport.XLSX"
book = xlrd.open_workbook(file)
sheet = book.sheet_by_index(0)
data = [range("A3:D7")]
, but I get an error:
data = [range("A3:D7")]
TypeError: 'str' object cannot be interpreted as an integer
Any ideas?
Answers:
You could use the following to extract a block from an XLS file as follows:
from itertools import product
import xlrd
workbook = xlrd.open_workbook(r"input.xls")
sheet = workbook.sheet_by_index(0)
# Build a cell lookup table
cell_refs = {xlrd.cellname(r, c) : (c, r) for r, c in product(range(sheet.nrows), range(sheet.ncols))}
def get_cell_range(start_col, start_row, end_col, end_row):
return [sheet.row_slice(row, start_colx=start_col, end_colx=end_col+1) for row in range(start_row, end_row+1)]
def get_cells(excel_range):
start_range, end_range = excel_range.split(":")
return get_cell_range(*cell_refs[start_range], *cell_refs[end_range])
# Use numeric cell references
data = get_cell_range(0, 2, 3, 6) # A3 to D7
print(data)
# Use Excel range format
data = get_cells("A3:D7")
print(data)
xlrd
works using column and row numbers starting from 0
instead of Excel cell names. It can only convert Excel cell references from col row format to A1
format using xlrd.cellname()
, not the other way around. A workaround is to build your own reverse lookup table using that function.
Note: older versions of XLRD supported Excel XLSX files. Now only XLS files are supported. For XLSX files consider using openpyxl
or Pandas
.
The same question arose in me. And I solved it this way.
First I created an inverse function to xlrd.cellname()
and then I proceeded with the function to get the values of the ranges
import xlrd
import re
def parse_cell(cell):
r = re.compile("([a-zA-Z]+)([0-9]+)")
m = r.match(cell)
col = m.group(1).upper()
number_row = int(m.group(2)) - 1
ABC = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
number_col = 0
for char in col:
number_col += ABC.index(char)
return (number_row, number_col)
def get_cell_range_values(sheet, start_cell, end_cell):
start_cell_parsed = parse_cell(start_cell)
end_cell_parsed = parse_cell(end_cell)
data = sheet.col_values(start_rowx=start_cell_parsed[0], colx=start_cell_parsed[1], end_rowx=end_cell_parsed[0] + 1)
return data
file = r"C:UsersLisaDesktopFrankexport.XLSX"
book = xlrd.open_workbook(file)
sheet = book.sheet_by_index(0)
data = get_cell_range_values(sheet, "A3", "A130")
Keep in mind that you only get linear ranges, don’t create matrix
I want to assign an excel range to a variable:
import xlrd
file = r"C:UsersLisaDesktopFrankexport.XLSX"
book = xlrd.open_workbook(file)
sheet = book.sheet_by_index(0)
data = [range("A3:D7")]
, but I get an error:
data = [range("A3:D7")]
TypeError: 'str' object cannot be interpreted as an integer
Any ideas?
You could use the following to extract a block from an XLS file as follows:
from itertools import product
import xlrd
workbook = xlrd.open_workbook(r"input.xls")
sheet = workbook.sheet_by_index(0)
# Build a cell lookup table
cell_refs = {xlrd.cellname(r, c) : (c, r) for r, c in product(range(sheet.nrows), range(sheet.ncols))}
def get_cell_range(start_col, start_row, end_col, end_row):
return [sheet.row_slice(row, start_colx=start_col, end_colx=end_col+1) for row in range(start_row, end_row+1)]
def get_cells(excel_range):
start_range, end_range = excel_range.split(":")
return get_cell_range(*cell_refs[start_range], *cell_refs[end_range])
# Use numeric cell references
data = get_cell_range(0, 2, 3, 6) # A3 to D7
print(data)
# Use Excel range format
data = get_cells("A3:D7")
print(data)
xlrd
works using column and row numbers starting from 0
instead of Excel cell names. It can only convert Excel cell references from col row format to A1
format using xlrd.cellname()
, not the other way around. A workaround is to build your own reverse lookup table using that function.
Note: older versions of XLRD supported Excel XLSX files. Now only XLS files are supported. For XLSX files consider using openpyxl
or Pandas
.
The same question arose in me. And I solved it this way.
First I created an inverse function to xlrd.cellname()
and then I proceeded with the function to get the values of the ranges
import xlrd
import re
def parse_cell(cell):
r = re.compile("([a-zA-Z]+)([0-9]+)")
m = r.match(cell)
col = m.group(1).upper()
number_row = int(m.group(2)) - 1
ABC = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
number_col = 0
for char in col:
number_col += ABC.index(char)
return (number_row, number_col)
def get_cell_range_values(sheet, start_cell, end_cell):
start_cell_parsed = parse_cell(start_cell)
end_cell_parsed = parse_cell(end_cell)
data = sheet.col_values(start_rowx=start_cell_parsed[0], colx=start_cell_parsed[1], end_rowx=end_cell_parsed[0] + 1)
return data
file = r"C:UsersLisaDesktopFrankexport.XLSX"
book = xlrd.open_workbook(file)
sheet = book.sheet_by_index(0)
data = get_cell_range_values(sheet, "A3", "A130")
Keep in mind that you only get linear ranges, don’t create matrix