Django: aggregate django fields to avoid N + 1 problem
Question:
I have 3 tables/classes that are relevant to each other:
- CourseStudent – represents Student signed up to the course
- Presence – represents the attendance list of the CourseStudent
- CourseStudentPayment – represents the payments list for CourseStudent
In the code it looks like this:
class CourseStudentPayment(models.Model):
course_student = models.ForeignKey(
"CourseStudent",
on_delete=models.CASCADE,
related_name="course_student_payments",
)
start_date = models.DateField(db_index=True)
# other fields: price, currency, etc
price = models.DecimalField(default=0, max_digits=10, decimal_places=2)
def lessons_complete(self) -> int:
return (
Presence.objects.filter(
course_student=self.course_student,
)
.filter(date__gte=self.start_date)
.count()
)
class Presence(models.Model):
course_student = models.ForeignKey("CourseStudent", on_delete=models.CASCADE)
date = models.DateField()
# some other fields
class CourseStudent(models.Model):
# some course-related information
student = models.CharField(...)
def last_payment(self) -> CourseStudentPayment:
return CourseStudentPayment.objects.filter(course_student=self).order_by("-start_date").first()
So lessons_complete
function calculates the number of the attendances since the payment date. Both CourseStudentPayment
and Presence
objects have CourseStudent
pk.
I want to render a list of payments for the students with lessons_complete in an efficient way. The dumb solution would be:
- get list of payments.
course_payments = CourseStudentPayment.objects.all()
- for each payment I call lessons_complete.
This solution creates N+1 problem, where for each payment I do Presence lookup.
In SQL I would just join two tables (pseudocode):
SELECT csp.*, count(p.id) from CourseStudentPayment csp
JOIN Presence p ON scp.course_student_id = p.course_student_id
WHERE p.date > csp.start_date
Is it possible to aggregate Presence table results and use within CourseStudentPayment rows?
Answers:
You can filter with:
from django.db.models import Count, F
course_payments = CourseStudentPayment.objects.filter(
course_student__presence__date__gt=F('start_date')
).annotate(presence_count=Count('course_student__presence'))
The CourseStudentPayment
objects will have an extra attribute .presence_count
with the number of Presence
s after the start_date
of the CourseStudentPayment
object.
This will however leave out CourseStudentPayment
s without any Presence
, you can include these with:
from django.db.models import Count, F
course_payments = CourseStudentPayment.objects.annotate(
presence_count=Count(
'course_student__presence',
filter=Q(course_student__presence__date__gt=F('start_date')),
)
)
I have 3 tables/classes that are relevant to each other:
- CourseStudent – represents Student signed up to the course
- Presence – represents the attendance list of the CourseStudent
- CourseStudentPayment – represents the payments list for CourseStudent
In the code it looks like this:
class CourseStudentPayment(models.Model):
course_student = models.ForeignKey(
"CourseStudent",
on_delete=models.CASCADE,
related_name="course_student_payments",
)
start_date = models.DateField(db_index=True)
# other fields: price, currency, etc
price = models.DecimalField(default=0, max_digits=10, decimal_places=2)
def lessons_complete(self) -> int:
return (
Presence.objects.filter(
course_student=self.course_student,
)
.filter(date__gte=self.start_date)
.count()
)
class Presence(models.Model):
course_student = models.ForeignKey("CourseStudent", on_delete=models.CASCADE)
date = models.DateField()
# some other fields
class CourseStudent(models.Model):
# some course-related information
student = models.CharField(...)
def last_payment(self) -> CourseStudentPayment:
return CourseStudentPayment.objects.filter(course_student=self).order_by("-start_date").first()
So lessons_complete
function calculates the number of the attendances since the payment date. Both CourseStudentPayment
and Presence
objects have CourseStudent
pk.
I want to render a list of payments for the students with lessons_complete in an efficient way. The dumb solution would be:
- get list of payments.
course_payments = CourseStudentPayment.objects.all()
- for each payment I call lessons_complete.
This solution creates N+1 problem, where for each payment I do Presence lookup.
In SQL I would just join two tables (pseudocode):
SELECT csp.*, count(p.id) from CourseStudentPayment csp
JOIN Presence p ON scp.course_student_id = p.course_student_id
WHERE p.date > csp.start_date
Is it possible to aggregate Presence table results and use within CourseStudentPayment rows?
You can filter with:
from django.db.models import Count, F
course_payments = CourseStudentPayment.objects.filter(
course_student__presence__date__gt=F('start_date')
).annotate(presence_count=Count('course_student__presence'))
The CourseStudentPayment
objects will have an extra attribute .presence_count
with the number of Presence
s after the start_date
of the CourseStudentPayment
object.
This will however leave out CourseStudentPayment
s without any Presence
, you can include these with:
from django.db.models import Count, F
course_payments = CourseStudentPayment.objects.annotate(
presence_count=Count(
'course_student__presence',
filter=Q(course_student__presence__date__gt=F('start_date')),
)
)