Destructuring dicts and objects in Python
Question:
In Javascript, I can use destructuring to extract properties I want from a javascript objects in one liner. For example:
currentUser = {
"id": 24,
"name": "John Doe",
"website": "http://mywebsite.com",
"description": "I am an actor",
"email": "[email protected]",
"gender": "M",
"phone_number": "+12345678",
"username": "johndoe",
"birth_date": "1991-02-23",
"followers": 46263,
"following": 345,
"like": 204,
"comments": 9
}
let { id, username } = this.currentUser;
console.log(id) // 24
console.log(username) //johndoe
Do we have something similar in Python for Python dicts and Python objects? Example of Python way of doing for python objects:
class User:
def __init__(self, id, name, website, description, email, gender, phone_number, username):
self.id = id
self.name = name
self.website = website
self.description = description
self.email = email
self.gender = gender
self.phone_number = phone_number
self.username = username
current_user = User(24, "Jon Doe", "http://mywebsite.com", "I am an actor", "[email protected]", "M", "+12345678", "johndoe")
# This is a pain
id = current_user.id
email = current_user.email
gender = current_user.gender
username = current_user.username
print(id, email, gender, username)
Writing those 4 lines (as mentioned in example above) vs writing a single line (as mentioned below) to fetch values I need from an object is a real pain point.
(id, email, gender, username) = current_user
Answers:
You can implement an __iter__
method to enable unpacking:
class User:
def __init__(self, **data):
self.__dict__ = data
def __iter__(self):
yield from [getattr(self, i) for i in ('id', 'email', 'gender', 'username')]
current_user = User(**currentUser)
id, email, gender, username = current_user
print([id, email, gender, username])
Output:
[24, '[email protected]', 'M', 'johndoe']
Edit: Python2 solution:
class User:
def __init__(self, **data):
self.__dict__ = data
def __iter__(self):
for i in ('id', 'email', 'gender', 'username'):
yield getattr(self, i)
Edit 2:
Getting select attributes:
class User:
def __init__(self, **data):
self.__dict__ = data
def __getattr__(self, _vals):
yield from [getattr(self, i) for i in _vals.split('_')]
current_user = User(**currentUser)
id, email, gender, username = current_user.id_email_gender_username
id, gender = current_user.id_gender
Don’t flatten the arguments in the first place. When you write a 8-ary function like you did with User
, you’re bound to make mistakes like passing arguments in the wrong order.
Which of the following will produce User you intend?
User(24, "Jon Doe", "http://mywebsite.com", "I am an actor", "[email protected]", "M", "+12345678", "johndoe")
User(24, "Jon Doe", "http://mywebsite.com", "I am an actor", "[email protected]", "+12345678", "M", "johndoe")
Impossible to know! If your function takes a descriptor, you do not have this problem –
class User:
def __init__ (self, desc = {}):
self.desc = desc # whitelist items, if necessary
def __str__ (self):
# invent our own "destructuring" syntax
[ name, age, gender ] =
destructure(self.desc, 'name', 'age', 'gender')
return f"{name} ({gender}) is {age} years old"
# create users with a "descriptor"
u = User({ 'age': 2, 'gender': 'M' })
v = User({ 'gender': 'F', 'age': 3 })
x = User({ 'gender': 'F', 'name': 'Alice', 'age': 4 })
print(u) # None (M) is 2 years old
print(v) # None (F) is 3 years old
print(x) # Alice (F) is 4 years old
We can define our own destructure
as –
def destructure (d, *keys):
return [ d[k] if k in d else None for k in keys ]
This still could result in long chains, but the order is dependent on the caller, therefore it’s not fragile like the 8-ary function in the original question –
[ name, age, gender ] =
destructure(self.desc, 'name', 'age', 'gender')
# works the same as
[ gender, name, age ] =
destructure(self.desc, 'gender', 'name', 'age')
Another option is to use keyword arguments –
class User:
def __init__ (self, **desc):
self.desc = desc # whitelist items, if necessary
def __str__ (self):
[ name, age, gender ] =
destructure(self.desc, 'name', 'age', 'gender')
return f"{name} ({gender}) is {age} years old"
# create users with keyword arguments
u = User(age = 2, gender = 'M')
v = User(gender = 'F', age = 3)
x = User(gender = 'F', name = 'Alice', age = 4)
print(u) # None (M) is 2 years old
print(v) # None (F) is 3 years old
print(x) # Alice (F) is 4 years old
You can use operator
module from standard library as follows:
from operator import attrgetter
id, email, gender, username = attrgetter('id', 'email', 'gender', 'username')(current_user)
print(id, email, gender, username)
In case you have a dict like from your example
currentUser = {
"id": 24,
"name": "John Doe",
"website": "http://mywebsite.com",
"description": "I am an actor",
"email": "[email protected]",
"gender": "M",
"phone_number": "+12345678",
"username": "johndoe",
"birth_date": "1991-02-23",
"followers": 46263,
"following": 345,
"like": 204,
"comments": 9
}
just use itemgetter
instead of attrgetter
:
from operator import itemgetter
id, email, gender, username = itemgetter('id', 'email', 'gender', 'username')(currentUser)
print(id, email, gender, username)
Building off of other answers, I would recommend also using Python’s dataclasses
and use __getitem__
to get specific fields:
from dataclasses import astuple, dataclass
@dataclass
class User:
id: int
name: str
website: str
description: str
email: str
gender: str
phone_number: str
username: str
def __iter__(self):
return iter(astuple(self))
def __getitem__(self, keys):
return iter(getattr(self, k) for k in keys)
current_user = User(id=24, name="Jon Doe", website="http://mywebsite.com", description="I am an actor", email="[email protected]", gender="M", phone_number="+12345678", username="johndoe")
# Access fields sequentially:
id, _, email, *_ = current_user
# Access fields out of order:
id, email, gender, username = current_user["id", "email", "gender", "username"]
You can destruct a python dictionary and extract properties by unpacking with .values()
method:
currentUser = {
"id": 24,
"name": "John Doe",
"website": "http://mywebsite.com",
"description": "I am an actor",
"email": "[email protected]",
"gender": "M",
"phone_number": "+12345678",
"username": "johndoe",
"birth_date": "1991-02-23",
"followers": 46263,
"following": 345,
"like": 204,
"comments": 9
}
id, _, _, _, _, _, _, username, *other = currentUser.values()
print('distructuring:', { 'id': id, 'username': username })
In this way JavaScript has better domain of objects than Python. You also can build a method or function to replicate the functionality, but JavaScript do it really easy.
Something similar on Python could be "packing/unpacking" functionalities applied to dictionaries (JSON objects).
You can find related documentation on the internet:
https://www.geeksforgeeks.org/packing-and-unpacking-arguments-in-python/
(Ab)using the import system
Python already has a compact destructuring syntax in the form of from x import y
. This can be re-purposed to destructure dicts and objects:
import sys, types
class MyClass:
def __init__(self, a, b):
self.a = a
self.b = b
sys.modules["myobj"] = MyClass(1, 2)
from myobj import a, b
assert a + b == 3
mydict = {"c": 3, "d": 4}
sys.modules["mydict"] = types.SimpleNamespace(**mydict)
from mydict import c, d
assert c + d == 7
Cluttering sys.modules
with our objects isn’t very nice though.
Context manager
A more serious hack would be a context manager that temporarily adds a module to sys.modules
, and makes sure the __getattr__
method of the module points to the __getattribute__
or __getitem__
method of the object/dict in question.
That would let us do:
mydict = {"a": 1, "b": 2}
with obj_as_module(mydict, "mydict"):
from mydict import a, b
assert a + b == 3
assert "mydict" not in sys.modules
Implementation:
import sys, types
from contextlib import contextmanager
@contextmanager
def obj_as_module(obj, name):
"Temporarily load an object/dict as a module, to import its attributes/values"
module = types.ModuleType(name)
get = obj.__getitem__ if isinstance(obj, dict) else obj.__getattribute__
module.__getattr__ = lambda attr: get(attr) if attr != "__path__" else None
try:
if name in sys.modules:
raise Exception(f"Name '{name}' already in sys.modules")
else:
sys.modules[name] = module
yield module
finally:
if sys.modules[name] == module:
del sys.modules[name]
This was my first time playing around with the import system, and I have no idea if this might break something, or what the performance is like. But I think it is a valuable observation that the import
statement already provides a very convenient destructuring syntax.
Replacing sys.modules
entirely
Using an even more questionable hack, we can arrive at an even more compact syntax:
with from_(mydict): import a, b
Implementation:
import sys
@contextmanager
def from_(target):
"Temporarily replace the sys.modules dict with target dict or it's __dict__."
if not isinstance(target, dict):
target = target.__dict__
sysmodules = sys.modules
try:
sys.modules = target
yield
finally:
sys.modules = sysmodules
Class decorator
For working with classes we could use a decorator:
def self_as_module(cls):
"For those who like to write self-less methods"
cls.as_module = lambda self: obj_as_module(self, "self")
return cls
Then we can unpack attributes without cluttering our methods with lines like a = self.a
:
@self_as_module
class MyClass:
def __init__(self):
self.a = 1
self.b = 2
def check(self):
with self.as_module():
from self import a, b
assert a + b == 3
MyClass().check()
For classes with many attributes and math-heavy methods, this is quite nice.
In Python 3.10 and upwards you can do it using match
, which allows variable names to be created in the pattern definition after case
, as seen here:
from dataclasses import dataclass
@dataclass
class User:
id: int
username: str
height: float
current_user = User(23, 'hobycat', 24.6)
match current_user:
case User(id=id, username=username):
# From here, id = current_user.id, username = current_user.username
print(f"Found user with id {id} called {username}")
print(f"Outside the block, {username} is still bound")
As is visible above, the variables remain bound after the case
block, so this can be used as a form of assignment:
match current_user:
case User(id=id, username=username): pass
print(f"User {username} has id {id}")
Note: it’s also possible to use positional class matching: case User(id, username)
, but this will not check the attribute names, so case User(username, id)
will match them unexpectedly.
And it’s also possible to give the matched attributes different names: case User(id=found_id, username=found_username)
, in which case found_id
and found_username
will be the variables accessible from the case
block.
See the Python docs on match statements and the precise definition of class patterns in the PEP
In Javascript, I can use destructuring to extract properties I want from a javascript objects in one liner. For example:
currentUser = {
"id": 24,
"name": "John Doe",
"website": "http://mywebsite.com",
"description": "I am an actor",
"email": "[email protected]",
"gender": "M",
"phone_number": "+12345678",
"username": "johndoe",
"birth_date": "1991-02-23",
"followers": 46263,
"following": 345,
"like": 204,
"comments": 9
}
let { id, username } = this.currentUser;
console.log(id) // 24
console.log(username) //johndoe
Do we have something similar in Python for Python dicts and Python objects? Example of Python way of doing for python objects:
class User:
def __init__(self, id, name, website, description, email, gender, phone_number, username):
self.id = id
self.name = name
self.website = website
self.description = description
self.email = email
self.gender = gender
self.phone_number = phone_number
self.username = username
current_user = User(24, "Jon Doe", "http://mywebsite.com", "I am an actor", "[email protected]", "M", "+12345678", "johndoe")
# This is a pain
id = current_user.id
email = current_user.email
gender = current_user.gender
username = current_user.username
print(id, email, gender, username)
Writing those 4 lines (as mentioned in example above) vs writing a single line (as mentioned below) to fetch values I need from an object is a real pain point.
(id, email, gender, username) = current_user
You can implement an __iter__
method to enable unpacking:
class User:
def __init__(self, **data):
self.__dict__ = data
def __iter__(self):
yield from [getattr(self, i) for i in ('id', 'email', 'gender', 'username')]
current_user = User(**currentUser)
id, email, gender, username = current_user
print([id, email, gender, username])
Output:
[24, '[email protected]', 'M', 'johndoe']
Edit: Python2 solution:
class User:
def __init__(self, **data):
self.__dict__ = data
def __iter__(self):
for i in ('id', 'email', 'gender', 'username'):
yield getattr(self, i)
Edit 2:
Getting select attributes:
class User:
def __init__(self, **data):
self.__dict__ = data
def __getattr__(self, _vals):
yield from [getattr(self, i) for i in _vals.split('_')]
current_user = User(**currentUser)
id, email, gender, username = current_user.id_email_gender_username
id, gender = current_user.id_gender
Don’t flatten the arguments in the first place. When you write a 8-ary function like you did with User
, you’re bound to make mistakes like passing arguments in the wrong order.
Which of the following will produce User you intend?
User(24, "Jon Doe", "http://mywebsite.com", "I am an actor", "[email protected]", "M", "+12345678", "johndoe")
User(24, "Jon Doe", "http://mywebsite.com", "I am an actor", "[email protected]", "+12345678", "M", "johndoe")
Impossible to know! If your function takes a descriptor, you do not have this problem –
class User:
def __init__ (self, desc = {}):
self.desc = desc # whitelist items, if necessary
def __str__ (self):
# invent our own "destructuring" syntax
[ name, age, gender ] =
destructure(self.desc, 'name', 'age', 'gender')
return f"{name} ({gender}) is {age} years old"
# create users with a "descriptor"
u = User({ 'age': 2, 'gender': 'M' })
v = User({ 'gender': 'F', 'age': 3 })
x = User({ 'gender': 'F', 'name': 'Alice', 'age': 4 })
print(u) # None (M) is 2 years old
print(v) # None (F) is 3 years old
print(x) # Alice (F) is 4 years old
We can define our own destructure
as –
def destructure (d, *keys):
return [ d[k] if k in d else None for k in keys ]
This still could result in long chains, but the order is dependent on the caller, therefore it’s not fragile like the 8-ary function in the original question –
[ name, age, gender ] =
destructure(self.desc, 'name', 'age', 'gender')
# works the same as
[ gender, name, age ] =
destructure(self.desc, 'gender', 'name', 'age')
Another option is to use keyword arguments –
class User:
def __init__ (self, **desc):
self.desc = desc # whitelist items, if necessary
def __str__ (self):
[ name, age, gender ] =
destructure(self.desc, 'name', 'age', 'gender')
return f"{name} ({gender}) is {age} years old"
# create users with keyword arguments
u = User(age = 2, gender = 'M')
v = User(gender = 'F', age = 3)
x = User(gender = 'F', name = 'Alice', age = 4)
print(u) # None (M) is 2 years old
print(v) # None (F) is 3 years old
print(x) # Alice (F) is 4 years old
You can use operator
module from standard library as follows:
from operator import attrgetter
id, email, gender, username = attrgetter('id', 'email', 'gender', 'username')(current_user)
print(id, email, gender, username)
In case you have a dict like from your example
currentUser = {
"id": 24,
"name": "John Doe",
"website": "http://mywebsite.com",
"description": "I am an actor",
"email": "[email protected]",
"gender": "M",
"phone_number": "+12345678",
"username": "johndoe",
"birth_date": "1991-02-23",
"followers": 46263,
"following": 345,
"like": 204,
"comments": 9
}
just use itemgetter
instead of attrgetter
:
from operator import itemgetter
id, email, gender, username = itemgetter('id', 'email', 'gender', 'username')(currentUser)
print(id, email, gender, username)
Building off of other answers, I would recommend also using Python’s dataclasses
and use __getitem__
to get specific fields:
from dataclasses import astuple, dataclass
@dataclass
class User:
id: int
name: str
website: str
description: str
email: str
gender: str
phone_number: str
username: str
def __iter__(self):
return iter(astuple(self))
def __getitem__(self, keys):
return iter(getattr(self, k) for k in keys)
current_user = User(id=24, name="Jon Doe", website="http://mywebsite.com", description="I am an actor", email="[email protected]", gender="M", phone_number="+12345678", username="johndoe")
# Access fields sequentially:
id, _, email, *_ = current_user
# Access fields out of order:
id, email, gender, username = current_user["id", "email", "gender", "username"]
You can destruct a python dictionary and extract properties by unpacking with .values()
method:
currentUser = {
"id": 24,
"name": "John Doe",
"website": "http://mywebsite.com",
"description": "I am an actor",
"email": "[email protected]",
"gender": "M",
"phone_number": "+12345678",
"username": "johndoe",
"birth_date": "1991-02-23",
"followers": 46263,
"following": 345,
"like": 204,
"comments": 9
}
id, _, _, _, _, _, _, username, *other = currentUser.values()
print('distructuring:', { 'id': id, 'username': username })
In this way JavaScript has better domain of objects than Python. You also can build a method or function to replicate the functionality, but JavaScript do it really easy.
Something similar on Python could be "packing/unpacking" functionalities applied to dictionaries (JSON objects).
You can find related documentation on the internet:
https://www.geeksforgeeks.org/packing-and-unpacking-arguments-in-python/
(Ab)using the import system
Python already has a compact destructuring syntax in the form of from x import y
. This can be re-purposed to destructure dicts and objects:
import sys, types
class MyClass:
def __init__(self, a, b):
self.a = a
self.b = b
sys.modules["myobj"] = MyClass(1, 2)
from myobj import a, b
assert a + b == 3
mydict = {"c": 3, "d": 4}
sys.modules["mydict"] = types.SimpleNamespace(**mydict)
from mydict import c, d
assert c + d == 7
Cluttering sys.modules
with our objects isn’t very nice though.
Context manager
A more serious hack would be a context manager that temporarily adds a module to sys.modules
, and makes sure the __getattr__
method of the module points to the __getattribute__
or __getitem__
method of the object/dict in question.
That would let us do:
mydict = {"a": 1, "b": 2}
with obj_as_module(mydict, "mydict"):
from mydict import a, b
assert a + b == 3
assert "mydict" not in sys.modules
Implementation:
import sys, types
from contextlib import contextmanager
@contextmanager
def obj_as_module(obj, name):
"Temporarily load an object/dict as a module, to import its attributes/values"
module = types.ModuleType(name)
get = obj.__getitem__ if isinstance(obj, dict) else obj.__getattribute__
module.__getattr__ = lambda attr: get(attr) if attr != "__path__" else None
try:
if name in sys.modules:
raise Exception(f"Name '{name}' already in sys.modules")
else:
sys.modules[name] = module
yield module
finally:
if sys.modules[name] == module:
del sys.modules[name]
This was my first time playing around with the import system, and I have no idea if this might break something, or what the performance is like. But I think it is a valuable observation that the import
statement already provides a very convenient destructuring syntax.
Replacing sys.modules
entirely
Using an even more questionable hack, we can arrive at an even more compact syntax:
with from_(mydict): import a, b
Implementation:
import sys
@contextmanager
def from_(target):
"Temporarily replace the sys.modules dict with target dict or it's __dict__."
if not isinstance(target, dict):
target = target.__dict__
sysmodules = sys.modules
try:
sys.modules = target
yield
finally:
sys.modules = sysmodules
Class decorator
For working with classes we could use a decorator:
def self_as_module(cls):
"For those who like to write self-less methods"
cls.as_module = lambda self: obj_as_module(self, "self")
return cls
Then we can unpack attributes without cluttering our methods with lines like a = self.a
:
@self_as_module
class MyClass:
def __init__(self):
self.a = 1
self.b = 2
def check(self):
with self.as_module():
from self import a, b
assert a + b == 3
MyClass().check()
For classes with many attributes and math-heavy methods, this is quite nice.
In Python 3.10 and upwards you can do it using match
, which allows variable names to be created in the pattern definition after case
, as seen here:
from dataclasses import dataclass
@dataclass
class User:
id: int
username: str
height: float
current_user = User(23, 'hobycat', 24.6)
match current_user:
case User(id=id, username=username):
# From here, id = current_user.id, username = current_user.username
print(f"Found user with id {id} called {username}")
print(f"Outside the block, {username} is still bound")
As is visible above, the variables remain bound after the case
block, so this can be used as a form of assignment:
match current_user:
case User(id=id, username=username): pass
print(f"User {username} has id {id}")
Note: it’s also possible to use positional class matching: case User(id, username)
, but this will not check the attribute names, so case User(username, id)
will match them unexpectedly.
And it’s also possible to give the matched attributes different names: case User(id=found_id, username=found_username)
, in which case found_id
and found_username
will be the variables accessible from the case
block.
See the Python docs on match statements and the precise definition of class patterns in the PEP