What's the difference between __iter__ and __getitem__?
Question:
This happens in Python 2.7.6 and 3.3.3 for me. When I define a class like this
class foo:
def __getitem__(self, *args):
print(*args)
And then try to iterate (and what I thought would call iter) on an instance,
bar = foo()
for i in bar:
print(i)
it just counts up by one for the args and prints None forever. Is this intentional as far as the language design is concerned?
Sample output
0
None
1
None
2
None
3
None
4
None
5
None
6
None
7
None
8
None
9
None
10
None
Answers:
__iter__
is the preferred way to iterate through an iterable object. If it is not defined the interpreter will try to simulate its behavior using __getitem__
. Take a look here
Yes, this is an intended design. It is documented, well-tested, and relied upon by sequence types such as str.
The __getitem__ version is a legacy before Python had modern iterators. The idea was that any sequence (something that is indexable and has a length) would be automatically iterable using the series s[0], s[1], s[2], … until IndexError or StopIteration is raised.
In Python 2.7 for example, strings are iterable because of the __getitem__ method (the str type does not have an __iter__ method).
In contrast, the iterator protocol lets any class be iterable without necessarily being indexable (dicts and sets for example).
Here is how to make an iterable class using the legacy style for sequences:
>>> class A:
def __getitem__(self, index):
if index >= 10:
raise IndexError
return index * 111
>>> list(A())
[0, 111, 222, 333, 444, 555, 666, 777, 888, 999]
Here is how to make an iterable using the __iter__ approach:
>>> class B:
def __iter__(self):
yield 10
yield 20
yield 30
>>> list(B())
[10, 20, 30]
For those who are interested in the details, the relevant code is in Objects/iterobject.c:
static PyObject *
iter_iternext(PyObject *iterator)
{
seqiterobject *it;
PyObject *seq;
PyObject *result;
assert(PySeqIter_Check(iterator));
it = (seqiterobject *)iterator;
seq = it->it_seq;
if (seq == NULL)
return NULL;
result = PySequence_GetItem(seq, it->it_index);
if (result != NULL) {
it->it_index++;
return result;
}
if (PyErr_ExceptionMatches(PyExc_IndexError) ||
PyErr_ExceptionMatches(PyExc_StopIteration))
{
PyErr_Clear();
Py_DECREF(seq);
it->it_seq = NULL;
}
return NULL;
}
and in Objects/abstract.c:
int
PySequence_Check(PyObject *s)
{
if (s == NULL)
return 0;
if (PyInstance_Check(s))
return PyObject_HasAttrString(s, "__getitem__");
if (PyDict_Check(s))
return 0;
return s->ob_type->tp_as_sequence &&
s->ob_type->tp_as_sequence->sq_item != NULL;
}
To get the result you are expecting, you need to have a data element with limited len and return each in sequence:
class foo:
def __init__(self):
self.data=[10,11,12]
def __getitem__(self, arg):
print('__getitem__ called with arg {}'.format(arg))
return self.data[arg]
bar = foo()
for i in bar:
print('__getitem__ returned {}'.format(i))
Prints:
__getitem__ called with arg 0
__getitem__ returned 10
__getitem__ called with arg 1
__getitem__ returned 11
__getitem__ called with arg 2
__getitem__ returned 12
__getitem__ called with arg 3
Or you can signal the end of the ‘sequence’ by raising IndexError
(although StopIteration
works as well…):
class foo:
def __getitem__(self, arg):
print('__getitem__ called with arg {}'.format(arg))
if arg>3:
raise IndexError
else:
return arg
bar = foo()
for i in bar:
print('__getitem__ returned {}'.format(i))
Prints:
__getitem__ called with arg 0
__getitem__ returned 0
__getitem__ called with arg 1
__getitem__ returned 1
__getitem__ called with arg 2
__getitem__ returned 2
__getitem__ called with arg 3
__getitem__ returned 3
__getitem__ called with arg 4
The for loop is expecting either IndexError
or StopIteration
to signal the end of the sequence.
You asked,
What is the difference between __getitem__
and __iter__
?
What is __getitem__()
?
Suppose that arr
is a list. Maybe arr = ["A", "B", "C"]
__getitem__
is a function which is executed when you write:
elem = arr[0]
elem = arr[1]
elem = arr[2]
- etc…
Suppose that we have lyst = [0, 111, 222, 333]
Each of the columns in the table below are equivalent in behavior:
+---------------------+------------------------------+--------------+
| `elem = lyst[0]` | `elem = lyst.__getitem__(0)` | `elem = 0` |
| `elem = lyst[1]` | `elem = lyst.__getitem__(1)` | `elem = 111` |
| `elem = lyst[2]` | `elem = lyst.__getitem__(2)` | `elem = 222` |
| `elem = lyst[3]` | `elem = lyst.__getitem__(3)` | `elem = 333` |
+---------------------+------------------------------+--------------+
You can define your own __getitem__
method in any of the classes you write. An example a custom-made class with a __getitem__
method is shown below:
from functools import *
from copy import *
class BaseArray:
pass
class Array(BaseArray):
def __init__(self:BaseArray, obj:object):
if hasattr(obj, `__iter__`):
it = iter(obj)
lyst = list(it)
lyst = self._init_helper(obj)
self._lyst = lyst
self._lvi = len(lyst) - 1
# `lvi` == `last valid index`
@singledispatchmethod
def _init_helper(self:BaseArray, obj:object):
raise NotImplementedError()
@_init_helper.register
def _init_helper(self:BaseArray, arr:BaseArray):
self._lyst = deepcopy(arr._lyst)
@_init_helper.register
def _init_helper(self:BaseArray, lyst:list):
self._lyst = deepcopy(lyst)
def __getitem__(self:BaseArray, index:int):
if index > self._lvi:
# `lvi` == `last valid index`
raise IndexError("index is too large")
return self._lyst
def __setitem__(self:BaseArray, index:int):
if index > self._lvi:
# `lvi`== `last valid index`
raise IndexError("index is too large")
self._lyst[index] = index
def __iter__(self:BaseArray):
raise NotImplementedError()
You might or might not care about C++, but in C++ __getitem__
is known as operator[]
.
Many different languages have something like python’s __getitem__
method. If you become comfortable with the inner-workings __getitem__
, it will help you write code in other programming languages as well.
# This code is written in C++, not python
int& Array::operator[](int index)
{
\ In C++ `this` is like the `self` parameter in python
if (index >= this->size) {
throw std::invalid_argument( "index is too large" );
\ The `throw` keyword from C++
\ is known as `raise` in python
exit(0);
}
return this->ptr[index];
}
What is __iter__()
?
Like __getitem__
, __iter__()
is a class method.
__iter__()
is usually used in for
loops.
Suppose that cookie_jar
is a list
like the following:
["oatmeal 1", "chocolate chip 1", "oatmeal 2"]
The following two pieces of code are syntactically different, but are semantically equivalent:
+------------------------------+-----------------------------------------+
| | it = iter(cookie_jar) |
| for cookie in cookie_jar: | while True: |
| print(cookie) | try: |
| | cookie = next(it) |
| | except (IndexError, StopIteration): |
| | break |
| | print(cookie) |
+------------------------------+-----------------------------------------+
Also, both of the loops shown above do the same thing as the following:
cookie_jar = ["oatmeal 1", "chocolate chip 1", "oatmeal 2"]
it = cookie_jar.__iter__()
while True:
try:
cookie = it.__next__()
except (StopIteration, IndexError):
break
print(cookie)
Many python container classes, such as list
, tuple
, etc… accept any iterable as input.
That is, you can pass anything as input to __init__
provided that that thing has an __iter__
method.
tuppy_the_tuple = ("A", "B", "C")
lizzy_the_list = ["A", "B", "C"]
steve_the_string = "ABC"
chests = [tuppy_the_tuple, lizzy_the_list, steve_the_string]
for chest in chests:
larry_the_new_list = list(chest)
print(larry_the_new_list)
# treasure chests are an example of a "container"
# __________
# /____;;___
# | / /
# `. ())oo() .
# |( ()*^^()^
# | |---------|
# | )) |
# |_________|
An example of how to define your own __iter__
method is shown below:
class BaseCookieJar:
pass
class CookieJar(BaseCookieJar):
class CookieIterator:
def __init__(self, cookie_jar:BaseCookieJar):
self._cookie_jar = cookie_jar
self._index = 0
def __iter__(self):
return self
def __next__(self):
if self_index >= len(self._cookie_jar):
raise StopIteration()
cookie = self._cookie_jar[self._index]
self._index += 1
return cookie
# End of Iterator class
# Resume `CookieJar` class
def __init__(*ignore_me, **ignore_me_too):
self._lyst = [0, 1, 2, 3, 4]
def __iter__(self):
return type(self).CookieIterator(self)
def __len__(self):
return len(self._lyst)
What happens if you define your own __getitem__()
method but do not define a __iter__()
method?
Suppose that you write a class with a __getitem__()
method, but no __iter__()
method.
Your class will inherit a default __iter__()
method. We can emulate the default implementation of __iter__
with the following analog:
IteratorClass = type(
iter(
type(
"TempClass",
(object,),
{
"__getitem__": lambda *args: None,
}
)()
)
)
class ClassChest:
"""
Something like this happens when you have
___getitem__()
but you do not have...
___iter__()
"""
# class ChestIterator(IteratorClass):
# We are not allowed to subclass `iterator`
# TypeError: type 'iterator' is not an acceptable base type
class ChestIterator:
def __init__(self, chest):
self._chest = chest
self._idx = 0
def __next__(self):
idx = self._idx
try:
gold_coin = self._chest[idx]
except IndexError:
raise IndexError from None
gold_coin = self._chest.__getitem__(idx)
self._idx = 1 + self._idx
return gold_coin
def __iter__(self):
return self
# End of Iterator
# Resume Class Chest class
def __iter__(self):
return type(self).ChestIterator(self)
def __getitem__(self, idx: int):
if idx > 4:
raise IndexError
return idx
instance_chest = ClassChest()
for shiny_object in instance_chest:
print("mi treazure is == ", shiny_object)
while True:
iterator = iter(instance_chest)
iterator = instance_chest.__iter__()
try:
shiny_object = next(iterator)
shiny_object = iterator.__next__()
except (StopIteration, IndexError):
break
print("mi treazure is == ", shiny_object)
the console output is:
mi treazure is == 0
mi treazure is == 1
mi treazure is == 2
mi treazure is == 3
mi treazure is == 4
Some notes about your code
You wrote,
class foo:
def __getitem__(self, *args):
print(*args)
bar = foo()
for i in bar:
print(i)
In your code, None
is printed because you failed to specify the return value of __getitem__
Both passages of code in the following table are equivalent:
+-------------------------------+-------------------------------+
| no `return` statement | `return None` |
+-------------------------------+-------------------------------+
| def __getitem__(self, *args): | def __getitem__(self, *args): |
| print(*args) | print(*args) |
| | return None |
+-------------------------------+-------------------------------+
Let us modify your code a bit:
class Klass:
def __getitem__(self, idx:int):
print("Inside __getitem__ `idx` is ", idx)
return idx
# WARNING: iter(self).__next__() can cause
# infinite loops if we do not do one of the following:
# * raise `StopIteration`
# * raise `IndexError`
obj = Klass()
for item in obj:
print("Inside of the `for` loop `item` is:", item)
We will have an infinite loop:
[...]
Inside of the `for` loop `item` is: 41875
Inside __getitem__ `idx` is 41876
Inside of the `for` loop `item` is: 41876
Inside __getitem__ `idx` is 41877
Inside of the `for` loop `item` is: 41877
Inside __getitem__ `idx` is 41878
Inside of the `for` loop `item` is: 41878
We can stop the looping by raising a StopIteration
exception.
class Klass:
def __getitem__(self, idx:int):
if idx > 3:
raise StopIteration()
print("Inside __getitem__ `idx` is ", idx)
return idx
obj = Klass()
for item in obj:
print("Inside of the `for` loop `item` is:", item)
This happens in Python 2.7.6 and 3.3.3 for me. When I define a class like this
class foo:
def __getitem__(self, *args):
print(*args)
And then try to iterate (and what I thought would call iter) on an instance,
bar = foo()
for i in bar:
print(i)
it just counts up by one for the args and prints None forever. Is this intentional as far as the language design is concerned?
Sample output
0
None
1
None
2
None
3
None
4
None
5
None
6
None
7
None
8
None
9
None
10
None
__iter__
is the preferred way to iterate through an iterable object. If it is not defined the interpreter will try to simulate its behavior using __getitem__
. Take a look here
Yes, this is an intended design. It is documented, well-tested, and relied upon by sequence types such as str.
The __getitem__ version is a legacy before Python had modern iterators. The idea was that any sequence (something that is indexable and has a length) would be automatically iterable using the series s[0], s[1], s[2], … until IndexError or StopIteration is raised.
In Python 2.7 for example, strings are iterable because of the __getitem__ method (the str type does not have an __iter__ method).
In contrast, the iterator protocol lets any class be iterable without necessarily being indexable (dicts and sets for example).
Here is how to make an iterable class using the legacy style for sequences:
>>> class A:
def __getitem__(self, index):
if index >= 10:
raise IndexError
return index * 111
>>> list(A())
[0, 111, 222, 333, 444, 555, 666, 777, 888, 999]
Here is how to make an iterable using the __iter__ approach:
>>> class B:
def __iter__(self):
yield 10
yield 20
yield 30
>>> list(B())
[10, 20, 30]
For those who are interested in the details, the relevant code is in Objects/iterobject.c:
static PyObject *
iter_iternext(PyObject *iterator)
{
seqiterobject *it;
PyObject *seq;
PyObject *result;
assert(PySeqIter_Check(iterator));
it = (seqiterobject *)iterator;
seq = it->it_seq;
if (seq == NULL)
return NULL;
result = PySequence_GetItem(seq, it->it_index);
if (result != NULL) {
it->it_index++;
return result;
}
if (PyErr_ExceptionMatches(PyExc_IndexError) ||
PyErr_ExceptionMatches(PyExc_StopIteration))
{
PyErr_Clear();
Py_DECREF(seq);
it->it_seq = NULL;
}
return NULL;
}
and in Objects/abstract.c:
int
PySequence_Check(PyObject *s)
{
if (s == NULL)
return 0;
if (PyInstance_Check(s))
return PyObject_HasAttrString(s, "__getitem__");
if (PyDict_Check(s))
return 0;
return s->ob_type->tp_as_sequence &&
s->ob_type->tp_as_sequence->sq_item != NULL;
}
To get the result you are expecting, you need to have a data element with limited len and return each in sequence:
class foo:
def __init__(self):
self.data=[10,11,12]
def __getitem__(self, arg):
print('__getitem__ called with arg {}'.format(arg))
return self.data[arg]
bar = foo()
for i in bar:
print('__getitem__ returned {}'.format(i))
Prints:
__getitem__ called with arg 0
__getitem__ returned 10
__getitem__ called with arg 1
__getitem__ returned 11
__getitem__ called with arg 2
__getitem__ returned 12
__getitem__ called with arg 3
Or you can signal the end of the ‘sequence’ by raising IndexError
(although StopIteration
works as well…):
class foo:
def __getitem__(self, arg):
print('__getitem__ called with arg {}'.format(arg))
if arg>3:
raise IndexError
else:
return arg
bar = foo()
for i in bar:
print('__getitem__ returned {}'.format(i))
Prints:
__getitem__ called with arg 0
__getitem__ returned 0
__getitem__ called with arg 1
__getitem__ returned 1
__getitem__ called with arg 2
__getitem__ returned 2
__getitem__ called with arg 3
__getitem__ returned 3
__getitem__ called with arg 4
The for loop is expecting either IndexError
or StopIteration
to signal the end of the sequence.
You asked,
What is the difference between
__getitem__
and__iter__
?
What is __getitem__()
?
Suppose that arr
is a list. Maybe arr = ["A", "B", "C"]
__getitem__
is a function which is executed when you write:
elem = arr[0]
elem = arr[1]
elem = arr[2]
- etc…
Suppose that we have lyst = [0, 111, 222, 333]
Each of the columns in the table below are equivalent in behavior:
+---------------------+------------------------------+--------------+
| `elem = lyst[0]` | `elem = lyst.__getitem__(0)` | `elem = 0` |
| `elem = lyst[1]` | `elem = lyst.__getitem__(1)` | `elem = 111` |
| `elem = lyst[2]` | `elem = lyst.__getitem__(2)` | `elem = 222` |
| `elem = lyst[3]` | `elem = lyst.__getitem__(3)` | `elem = 333` |
+---------------------+------------------------------+--------------+
You can define your own __getitem__
method in any of the classes you write. An example a custom-made class with a __getitem__
method is shown below:
from functools import *
from copy import *
class BaseArray:
pass
class Array(BaseArray):
def __init__(self:BaseArray, obj:object):
if hasattr(obj, `__iter__`):
it = iter(obj)
lyst = list(it)
lyst = self._init_helper(obj)
self._lyst = lyst
self._lvi = len(lyst) - 1
# `lvi` == `last valid index`
@singledispatchmethod
def _init_helper(self:BaseArray, obj:object):
raise NotImplementedError()
@_init_helper.register
def _init_helper(self:BaseArray, arr:BaseArray):
self._lyst = deepcopy(arr._lyst)
@_init_helper.register
def _init_helper(self:BaseArray, lyst:list):
self._lyst = deepcopy(lyst)
def __getitem__(self:BaseArray, index:int):
if index > self._lvi:
# `lvi` == `last valid index`
raise IndexError("index is too large")
return self._lyst
def __setitem__(self:BaseArray, index:int):
if index > self._lvi:
# `lvi`== `last valid index`
raise IndexError("index is too large")
self._lyst[index] = index
def __iter__(self:BaseArray):
raise NotImplementedError()
You might or might not care about C++, but in C++ __getitem__
is known as operator[]
.
Many different languages have something like python’s __getitem__
method. If you become comfortable with the inner-workings __getitem__
, it will help you write code in other programming languages as well.
# This code is written in C++, not python
int& Array::operator[](int index)
{
\ In C++ `this` is like the `self` parameter in python
if (index >= this->size) {
throw std::invalid_argument( "index is too large" );
\ The `throw` keyword from C++
\ is known as `raise` in python
exit(0);
}
return this->ptr[index];
}
What is __iter__()
?
Like __getitem__
, __iter__()
is a class method.
__iter__()
is usually used in for
loops.
Suppose that cookie_jar
is a list
like the following:
["oatmeal 1", "chocolate chip 1", "oatmeal 2"]
The following two pieces of code are syntactically different, but are semantically equivalent:
+------------------------------+-----------------------------------------+
| | it = iter(cookie_jar) |
| for cookie in cookie_jar: | while True: |
| print(cookie) | try: |
| | cookie = next(it) |
| | except (IndexError, StopIteration): |
| | break |
| | print(cookie) |
+------------------------------+-----------------------------------------+
Also, both of the loops shown above do the same thing as the following:
cookie_jar = ["oatmeal 1", "chocolate chip 1", "oatmeal 2"]
it = cookie_jar.__iter__()
while True:
try:
cookie = it.__next__()
except (StopIteration, IndexError):
break
print(cookie)
Many python container classes, such as list
, tuple
, etc… accept any iterable as input.
That is, you can pass anything as input to __init__
provided that that thing has an __iter__
method.
tuppy_the_tuple = ("A", "B", "C")
lizzy_the_list = ["A", "B", "C"]
steve_the_string = "ABC"
chests = [tuppy_the_tuple, lizzy_the_list, steve_the_string]
for chest in chests:
larry_the_new_list = list(chest)
print(larry_the_new_list)
# treasure chests are an example of a "container"
# __________
# /____;;___
# | / /
# `. ())oo() .
# |( ()*^^()^
# | |---------|
# | )) |
# |_________|
An example of how to define your own __iter__
method is shown below:
class BaseCookieJar:
pass
class CookieJar(BaseCookieJar):
class CookieIterator:
def __init__(self, cookie_jar:BaseCookieJar):
self._cookie_jar = cookie_jar
self._index = 0
def __iter__(self):
return self
def __next__(self):
if self_index >= len(self._cookie_jar):
raise StopIteration()
cookie = self._cookie_jar[self._index]
self._index += 1
return cookie
# End of Iterator class
# Resume `CookieJar` class
def __init__(*ignore_me, **ignore_me_too):
self._lyst = [0, 1, 2, 3, 4]
def __iter__(self):
return type(self).CookieIterator(self)
def __len__(self):
return len(self._lyst)
What happens if you define your own __getitem__()
method but do not define a __iter__()
method?
Suppose that you write a class with a __getitem__()
method, but no __iter__()
method.
Your class will inherit a default __iter__()
method. We can emulate the default implementation of __iter__
with the following analog:
IteratorClass = type(
iter(
type(
"TempClass",
(object,),
{
"__getitem__": lambda *args: None,
}
)()
)
)
class ClassChest:
"""
Something like this happens when you have
___getitem__()
but you do not have...
___iter__()
"""
# class ChestIterator(IteratorClass):
# We are not allowed to subclass `iterator`
# TypeError: type 'iterator' is not an acceptable base type
class ChestIterator:
def __init__(self, chest):
self._chest = chest
self._idx = 0
def __next__(self):
idx = self._idx
try:
gold_coin = self._chest[idx]
except IndexError:
raise IndexError from None
gold_coin = self._chest.__getitem__(idx)
self._idx = 1 + self._idx
return gold_coin
def __iter__(self):
return self
# End of Iterator
# Resume Class Chest class
def __iter__(self):
return type(self).ChestIterator(self)
def __getitem__(self, idx: int):
if idx > 4:
raise IndexError
return idx
instance_chest = ClassChest()
for shiny_object in instance_chest:
print("mi treazure is == ", shiny_object)
while True:
iterator = iter(instance_chest)
iterator = instance_chest.__iter__()
try:
shiny_object = next(iterator)
shiny_object = iterator.__next__()
except (StopIteration, IndexError):
break
print("mi treazure is == ", shiny_object)
the console output is:
mi treazure is == 0
mi treazure is == 1
mi treazure is == 2
mi treazure is == 3
mi treazure is == 4
Some notes about your code
You wrote,
class foo:
def __getitem__(self, *args):
print(*args)
bar = foo()
for i in bar:
print(i)
In your code, None
is printed because you failed to specify the return value of __getitem__
Both passages of code in the following table are equivalent:
+-------------------------------+-------------------------------+
| no `return` statement | `return None` |
+-------------------------------+-------------------------------+
| def __getitem__(self, *args): | def __getitem__(self, *args): |
| print(*args) | print(*args) |
| | return None |
+-------------------------------+-------------------------------+
Let us modify your code a bit:
class Klass:
def __getitem__(self, idx:int):
print("Inside __getitem__ `idx` is ", idx)
return idx
# WARNING: iter(self).__next__() can cause
# infinite loops if we do not do one of the following:
# * raise `StopIteration`
# * raise `IndexError`
obj = Klass()
for item in obj:
print("Inside of the `for` loop `item` is:", item)
We will have an infinite loop:
[...]
Inside of the `for` loop `item` is: 41875
Inside __getitem__ `idx` is 41876
Inside of the `for` loop `item` is: 41876
Inside __getitem__ `idx` is 41877
Inside of the `for` loop `item` is: 41877
Inside __getitem__ `idx` is 41878
Inside of the `for` loop `item` is: 41878
We can stop the looping by raising a StopIteration
exception.
class Klass:
def __getitem__(self, idx:int):
if idx > 3:
raise StopIteration()
print("Inside __getitem__ `idx` is ", idx)
return idx
obj = Klass()
for item in obj:
print("Inside of the `for` loop `item` is:", item)