django – prefetch only the newest record?
Question:
I am trying to prefetch only the latest record against the parent record.
my models are as such
class LinkTargets(models.Model):
device_circuit_subnet = models.ForeignKey(DeviceCircuitSubnets, verbose_name="Device", on_delete=models.PROTECT)
interface_index = models.CharField(max_length=100, verbose_name='Interface index (SNMP)', blank=True, null=True)
get_bgp = models.BooleanField(default=False, verbose_name="get BGP Data?")
dashboard = models.BooleanField(default=False, verbose_name="Display on monitoring dashboard?")
class LinkData(models.Model):
link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
...
The below query fails with the error
AttributeError: 'LinkData' object has no attribute '_iterable_class'
Query:
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.all().order_by('-id')[0]
)
)
I thought about getting LinkData instead and doing a select related but ive no idea how to get only 1 record for each link_target_id
link_data = LinkData.objects.filter(link_target__dashboard=True)
.select_related('link_target')..?
EDIT:
using rtindru’s solution, the pre fetched seems to be empty. there is 6 records in there currently, atest 1 record for each of the 3 LinkTargets
>>> link_data[0]
<LinkTargets: LinkTargets object>
>>> link_data[0].linkdata_set.all()
<QuerySet []>
>>>
Answers:
The reason is that Prefetch
expects a Django Queryset
as the queryset
parameter and you are giving an instance of an object.
Change your query as follows:
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk=LinkData.objects.latest('id').pk)
)
)
This does have the unfortunate effect of undoing the purpose of Prefetch
to a large degree.
Update
This prefetches exactly one record globally; not the latest LinkData
record per LinkTarget
.
To prefetch the max LinkData for each LinkTarget you should start at LinkData: you can achieve this as follows:
LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id'))
This will return a dictionary of {link_target
: 12, max_id
: 3223}
You can then use this to return the right set of objects; perhaps filter LinkData based on the values of max_id
.
That will look something like this:
latest_link_data_pks = LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id')).values_list('max_id', flat=True)
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk__in=latest_link_data_pks)
)
)
LinkData.objects.all().order_by('-id')[0]
is not a queryset, it is an model object, hence your error.
You could try LinkData.objects.all().order_by('-id')[0:1]
which is indeed a QuerySet, but it’s not going to work. Given how prefetch_related
works, the queryset argument must return a queryset that contains all the LinkData
records you need (this is then further filtered, and the items in it joined up with the LinkTarget objects). This queryset only contains one item, so that’s no good. (And Django will complain “Cannot filter a query once a slice has been taken” and raise an exception, as it should).
Let’s back up. Essentially you are asking an aggregation/annotation question – for each LinkTarget, you want to know the most recent LinkData object, or the ‘max’ of an ‘id’ column. The easiest way is to just annotate with the id, and then do a separate query to get all the objects.
So, it would look like this (I’ve checked with a similar model in my project, so it should work, but the code below may have some typos):
linktargets = (LinkTargets.objects
.filter(dashboard=True)
.annotate(most_recent_linkdata_id=Max('linkdata_set__id'))
# Now, if we need them, lets collect and get the actual objects
linkdata_ids = [t.most_recent_linkdata_id for t in linktargets]
linkdata_objects = LinkData.objects.filter(id__in=linkdata_ids)
# And we can decorate the LinkTarget objects as well if we want:
linkdata_d = {l.id: l for l in linkdata_objects}
for t in linktargets:
if t.most_recent_linkdata_id is not None:
t.most_recent_linkdata = linkdata_d[t.most_recent_linkdata_id]
I have deliberately not made this into a prefetch that masks linkdata_set
, because the result is that you have objects that lie to you – the linkdata_set
attribute is now missing results. Do you really want to be bitten by that somewhere down the line? Best to make a new attribute that has just the thing you want.
The following works on PostgreSQL. I understand it won’t help OP, but it might be useful to somebody else.
from django.db.models import Count, Prefetch
from .models import LinkTargets, LinkData
link_data_qs = LinkData.objects.order_by(
'link_target__id',
'-id',
).distinct(
'link_target__id',
)
qs = LinkTargets.objects.prefetch_related(
Prefetch(
'linkdata_set',
queryset=link_data_qs,
)
).all()
Tricky, but it seems to work:
class ForeignKeyAsOneToOneField(models.OneToOneField):
def __init__(self, to, on_delete, to_field=None, **kwargs):
super().__init__(to, on_delete, to_field=to_field, **kwargs)
self._unique = False
class LinkData(models.Model):
# link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
link_target = ForeignKeyAsOneToOneField(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT, related_name='linkdata_helper')
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_helper',
queryset=LinkData.objects.all().order_by('-id'),
'linkdata'
)
)
# Now you can access linkdata:
link_data[0].linkdata
Ofcourse with this approach you can’t use linkdata_helper to get related objects.
This is not a direct answer to you question, but solves the same problem. It is possible annotate newest object with a subquery, which I think is more clear. You also don’t have to do stuff like Max("id")
to limit the prefetch query.
It makes use of django.db.models.functions.JSONObject
(added in Django 3.2) to combine multiple fields:
MainModel.objects.annotate(
last_object=RelatedModel.objects.filter(mainmodel=OuterRef("pk"))
.order_by("-date_created")
.values(
data=JSONObject(
id="id", body="body", date_created="date_created"
)
)[:1]
)
I am trying to prefetch only the latest record against the parent record.
my models are as such
class LinkTargets(models.Model):
device_circuit_subnet = models.ForeignKey(DeviceCircuitSubnets, verbose_name="Device", on_delete=models.PROTECT)
interface_index = models.CharField(max_length=100, verbose_name='Interface index (SNMP)', blank=True, null=True)
get_bgp = models.BooleanField(default=False, verbose_name="get BGP Data?")
dashboard = models.BooleanField(default=False, verbose_name="Display on monitoring dashboard?")
class LinkData(models.Model):
link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
...
The below query fails with the error
AttributeError: 'LinkData' object has no attribute '_iterable_class'
Query:
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.all().order_by('-id')[0]
)
)
I thought about getting LinkData instead and doing a select related but ive no idea how to get only 1 record for each link_target_id
link_data = LinkData.objects.filter(link_target__dashboard=True)
.select_related('link_target')..?
EDIT:
using rtindru’s solution, the pre fetched seems to be empty. there is 6 records in there currently, atest 1 record for each of the 3 LinkTargets
>>> link_data[0]
<LinkTargets: LinkTargets object>
>>> link_data[0].linkdata_set.all()
<QuerySet []>
>>>
The reason is that Prefetch
expects a Django Queryset
as the queryset
parameter and you are giving an instance of an object.
Change your query as follows:
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk=LinkData.objects.latest('id').pk)
)
)
This does have the unfortunate effect of undoing the purpose of Prefetch
to a large degree.
Update
This prefetches exactly one record globally; not the latest LinkData
record per LinkTarget
.
To prefetch the max LinkData for each LinkTarget you should start at LinkData: you can achieve this as follows:
LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id'))
This will return a dictionary of {link_target
: 12, max_id
: 3223}
You can then use this to return the right set of objects; perhaps filter LinkData based on the values of max_id
.
That will look something like this:
latest_link_data_pks = LinkData.objects.filter(link_target__dashboard=True).values('link_target').annotate(max_id=Max('id')).values_list('max_id', flat=True)
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_set',
queryset=LinkData.objects.filter(pk__in=latest_link_data_pks)
)
)
LinkData.objects.all().order_by('-id')[0]
is not a queryset, it is an model object, hence your error.
You could try LinkData.objects.all().order_by('-id')[0:1]
which is indeed a QuerySet, but it’s not going to work. Given how prefetch_related
works, the queryset argument must return a queryset that contains all the LinkData
records you need (this is then further filtered, and the items in it joined up with the LinkTarget objects). This queryset only contains one item, so that’s no good. (And Django will complain “Cannot filter a query once a slice has been taken” and raise an exception, as it should).
Let’s back up. Essentially you are asking an aggregation/annotation question – for each LinkTarget, you want to know the most recent LinkData object, or the ‘max’ of an ‘id’ column. The easiest way is to just annotate with the id, and then do a separate query to get all the objects.
So, it would look like this (I’ve checked with a similar model in my project, so it should work, but the code below may have some typos):
linktargets = (LinkTargets.objects
.filter(dashboard=True)
.annotate(most_recent_linkdata_id=Max('linkdata_set__id'))
# Now, if we need them, lets collect and get the actual objects
linkdata_ids = [t.most_recent_linkdata_id for t in linktargets]
linkdata_objects = LinkData.objects.filter(id__in=linkdata_ids)
# And we can decorate the LinkTarget objects as well if we want:
linkdata_d = {l.id: l for l in linkdata_objects}
for t in linktargets:
if t.most_recent_linkdata_id is not None:
t.most_recent_linkdata = linkdata_d[t.most_recent_linkdata_id]
I have deliberately not made this into a prefetch that masks linkdata_set
, because the result is that you have objects that lie to you – the linkdata_set
attribute is now missing results. Do you really want to be bitten by that somewhere down the line? Best to make a new attribute that has just the thing you want.
The following works on PostgreSQL. I understand it won’t help OP, but it might be useful to somebody else.
from django.db.models import Count, Prefetch
from .models import LinkTargets, LinkData
link_data_qs = LinkData.objects.order_by(
'link_target__id',
'-id',
).distinct(
'link_target__id',
)
qs = LinkTargets.objects.prefetch_related(
Prefetch(
'linkdata_set',
queryset=link_data_qs,
)
).all()
Tricky, but it seems to work:
class ForeignKeyAsOneToOneField(models.OneToOneField):
def __init__(self, to, on_delete, to_field=None, **kwargs):
super().__init__(to, on_delete, to_field=to_field, **kwargs)
self._unique = False
class LinkData(models.Model):
# link_target = models.ForeignKey(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT)
link_target = ForeignKeyAsOneToOneField(LinkTargets, verbose_name="Link Target", on_delete=models.PROTECT, related_name='linkdata_helper')
interface_description = models.CharField(max_length=200, verbose_name='Interface Description', blank=True, null=True)
link_data = LinkTargets.objects.filter(dashboard=True)
.prefetch_related(
Prefetch(
'linkdata_helper',
queryset=LinkData.objects.all().order_by('-id'),
'linkdata'
)
)
# Now you can access linkdata:
link_data[0].linkdata
Ofcourse with this approach you can’t use linkdata_helper to get related objects.
This is not a direct answer to you question, but solves the same problem. It is possible annotate newest object with a subquery, which I think is more clear. You also don’t have to do stuff like Max("id")
to limit the prefetch query.
It makes use of django.db.models.functions.JSONObject
(added in Django 3.2) to combine multiple fields:
MainModel.objects.annotate(
last_object=RelatedModel.objects.filter(mainmodel=OuterRef("pk"))
.order_by("-date_created")
.values(
data=JSONObject(
id="id", body="body", date_created="date_created"
)
)[:1]
)