Is there a way to add custom data into ListAPIView in django rest framework
Question:
So I’ve built an API for movies dataset
which contain following structure:
Models.py
class Directors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'directors'
ordering = ['-id']
class Movies(models.Model):
id = models.IntegerField(primary_key=True)
name = models.CharField(max_length=100, blank=True, null=True)
year = models.IntegerField(blank=True, null=True)
rank = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'movies'
ordering = ['-id']
class Actors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
gender = models.CharField(max_length=20, blank=True, null=True)
class Meta:
db_table = 'actors'
ordering = ['-id']
class DirectorsGenres(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='directors_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
prob = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'directors_genres'
ordering = ['-director']
class MoviesDirectors(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='movies_directors')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_directors')
class Meta:
db_table = 'movies_directors'
ordering = ['-director']
class MoviesGenres(models.Model):
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'movies_genres'
ordering = ['-movie']
class Roles(models.Model):
actor = models.ForeignKey(Actors,on_delete=models.CASCADE,related_name='roles')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='roles')
role = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'roles'
ordering = ['-actor']
urls.py
from django.urls import path, include
from . import views
from api.views import getMovies, getGenres, getActors
urlpatterns = [
path('', views.getRoutes),
path('movies/', getMovies.as_view(), name='movies'),
path('movies/genres/', getGenres.as_view(), name='genres'),
path('actor_stats/<pk>', getActors.as_view(), name='actor_stats'),
]
serializer.py
from rest_framework import serializers
from movies.models import *
class MoviesSerializer(serializers.ModelSerializer):
class Meta:
model = Movies
fields = '__all__'
class DirectorsSerializer(serializers.ModelSerializer):
class Meta:
model = Directors
fields = '__all__'
class ActorsSerializer(serializers.ModelSerializer):
class Meta:
model = Actors
fields = '__all__'
class DirectorsGenresSerializer(serializers.ModelSerializer):
class Meta:
model = DirectorsGenres
fields = '__all__'
class MoviesDirectorsSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
director = DirectorsSerializer(many = False)
class Meta:
model = MoviesDirectors
fields = '__all__'
class MoviesGenresSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
class Meta:
model = MoviesGenres
fields = '__all__'
class RolesSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
actor = ActorsSerializer(many = False)
class Meta:
model = Roles
fields = '__all__'
views.py
class getMovies(ListAPIView):
directors = Directors.objects.all()
queryset = MoviesDirectors.objects.filter(director__in=directors)
serializer_class = MoviesDirectorsSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['director__first_name', 'director__last_name']
class getGenres(ListAPIView):
movies = Movies.objects.all()
queryset = MoviesGenres.objects.filter(movie__in=movies).order_by('-genre')
serializer_class = MoviesGenresSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['genre']
class getActors(ListAPIView):
queryset = Roles.objects.all()
serializer_class = RolesSerializer
pagination_class = CustomPagination
def get_queryset(self):
return super().get_queryset().filter(
actor_id=self.kwargs['pk']
)
Now I want to count number of movies by genre that actor with specific pk played in getActors
class.
Like the number of movies by genre that actor participated in. E.g. Drama: 2, Horror: 3
Right now I am getting the overall count of movies count: 2
:
GET /api/actor_stats/17
HTTP 200 OK
Allow: GET, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept
{
"count": 2,
"next": null,
"previous": null,
"results": [
{
"id": 800480,
"movie": {
"id": 105231,
"name": "Everybody's Business",
"year": 1993,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Grandfather"
},
{
"id": 800481,
"movie": {
"id": 242453,
"name": "OP Pro 88 - Barra Rio",
"year": 1988,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Himself"
}
]
}
What is the optimized way of achieving the following:
- number_of_movies_by_genre
- Drama: 2
- Horror: 3
UPDATE
class RolesSerializer(serializers.Serializer):
id = serializers.SerializerMethodField()
name = serializers.SerializerMethodField()
top_genre = serializers.SerializerMethodField()
number_of_movies = serializers.SerializerMethodField()
number_of_movies_by_genre = serializers.SerializerMethodField()
most_frequent_partner = serializers.SerializerMethodField()
class Meta:
model = Roles
fields = '__all__'
def get_id(self, obj):
return obj.actor.id
def get_name(self, obj):
return f'{obj.actor.first_name} {obj.actor.last_name}'
def get_top_genre(self, obj):
number_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre = F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
)
data = [s['number_of_movies'] for s in number_by_genre]
highest = max(data)
result = [s for s in data if s == highest]
return result
def get_number_of_movies(self, obj):
number_of_movies = Roles.objects.filter(actor = obj.actor.id
).values('movie__name').count()
return number_of_movies
def get_number_of_movies_by_genre(self, obj):
number_of_movies_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre=F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
).values('genre', 'number_of_movies')
return number_of_movies_by_genre
def get_most_frequent_partner(self, obj):
partners = Roles.objects.filter(actor = obj.actor.id
).values('movie__id')
result = Roles.objects.filter(movie__in = partners
).values('actor').exclude(actor=obj.actor.id).annotate(
partner_actor_id = F('actor'),
partner_actor_name = Concat(F('actor__first_name'), Value(' '), F('actor__last_name')),
number_of_shared_movies =Count('actor'),
).values('partner_actor_id', 'partner_actor_name', 'number_of_shared_movies')
return result
The problem with that code is: It repeats the results by the number of movies. For instance if the actor have 5 movies the results will be repeated 5 times. Another issue is: in order to get top_genre
and most_frequent_partner
I’m using max()
but then I just get the numbers and not the actual name of genre in (top_genre
) and actor name in (most_frequent_partner
). Since I use max()
in a way to get more than one value. For instance in the top_genre
: If the actor have 3 Drama, 3 Comedy, 1 Horror, 1 Documentary
, I get the max in that way: [3,3]
, but how can I get the actual names out of these results? Same goes to most_frequent_partner
.
Results looks like this so far:
{
"next": null,
"previous": null,
"count": 4,
"pagenum": null,
"results": [
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
}
]
}
What I want to see in the end:
{
"next": null,
"previous": null,
"count": 2,
"results": [
{
"id": 18 (actor_id),
"name": Bruce Buffer (actor_name),
"number of movies": 2,
"top genre": Drama, Documentary,
"number of movies by genre": Drama: 1, Documentary: 1,
"most frequent partner": partner_actor_id, partner_actor_name, number_of_shared_movies,
}
]
}
Answers:
There many ways to implement this route, it depends on many criteria and how much it will be used .
i think a correct way is to create a dedicated model that would store actor stats with a one to one relation to actor and recompute the value each time a movie is added. But If you add movie often it could slow down your database.
You can also accept to have some outdated data for a while and update the table regularly using a background job and maybe using custom sql query that will ensure you better performance (bulk update).
I would start from your model, you have genres defined as a CharField in two of your models. By not isolating them anywhere, you need to look in both tables for all types of genres. If do not, then you are just supposing that all the genres you have in one table is also on the other one, which could not be true.
Also, querying string fields is not very efficient when in comparison to a int PK, so from the point of view of scaling this is bad. (Of course, i am saying that in general, as a good practice and not focused specifically in movie genres)
Your best option would be to have either a Genre Model or a choice field, where you define all possible genres.
As for the counting, you would do that inside your serializer class, by using a serializermethodfield.
If you want, the number of movies
by genre
for a given actor what you can do is annotate and count aggregate
return Roles.objects.filter(
actor_id=self.kwargs['pk']
).values('movie__movies_genres__genre').annotate(
no_of_movies=Count('movie__movies_genres__genre'),
genre=F('movie__movies_genres__genre'),
)
Here first we filtered roles for a given actor
then values will group by genre
then annotation is computed over all members of the group that count
and get genre
and you can use SerializerMethodField
to these calculated results
if you have a huge dataset it will not perform well, but you can create indexes accordingly still it will cost you 2-3 queries
you can learn more about Django queryset API
So I’ve built an API for movies dataset
which contain following structure:
Models.py
class Directors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'directors'
ordering = ['-id']
class Movies(models.Model):
id = models.IntegerField(primary_key=True)
name = models.CharField(max_length=100, blank=True, null=True)
year = models.IntegerField(blank=True, null=True)
rank = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'movies'
ordering = ['-id']
class Actors(models.Model):
id = models.IntegerField(primary_key=True)
first_name = models.CharField(max_length=100, blank=True, null=True)
last_name = models.CharField(max_length=100, blank=True, null=True)
gender = models.CharField(max_length=20, blank=True, null=True)
class Meta:
db_table = 'actors'
ordering = ['-id']
class DirectorsGenres(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='directors_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
prob = models.FloatField(blank=True, null=True)
class Meta:
db_table = 'directors_genres'
ordering = ['-director']
class MoviesDirectors(models.Model):
director = models.ForeignKey(Directors,on_delete=models.CASCADE,related_name='movies_directors')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_directors')
class Meta:
db_table = 'movies_directors'
ordering = ['-director']
class MoviesGenres(models.Model):
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='movies_genres')
genre = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'movies_genres'
ordering = ['-movie']
class Roles(models.Model):
actor = models.ForeignKey(Actors,on_delete=models.CASCADE,related_name='roles')
movie = models.ForeignKey(Movies,on_delete=models.CASCADE,related_name='roles')
role = models.CharField(max_length=100, blank=True, null=True)
class Meta:
db_table = 'roles'
ordering = ['-actor']
urls.py
from django.urls import path, include
from . import views
from api.views import getMovies, getGenres, getActors
urlpatterns = [
path('', views.getRoutes),
path('movies/', getMovies.as_view(), name='movies'),
path('movies/genres/', getGenres.as_view(), name='genres'),
path('actor_stats/<pk>', getActors.as_view(), name='actor_stats'),
]
serializer.py
from rest_framework import serializers
from movies.models import *
class MoviesSerializer(serializers.ModelSerializer):
class Meta:
model = Movies
fields = '__all__'
class DirectorsSerializer(serializers.ModelSerializer):
class Meta:
model = Directors
fields = '__all__'
class ActorsSerializer(serializers.ModelSerializer):
class Meta:
model = Actors
fields = '__all__'
class DirectorsGenresSerializer(serializers.ModelSerializer):
class Meta:
model = DirectorsGenres
fields = '__all__'
class MoviesDirectorsSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
director = DirectorsSerializer(many = False)
class Meta:
model = MoviesDirectors
fields = '__all__'
class MoviesGenresSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
class Meta:
model = MoviesGenres
fields = '__all__'
class RolesSerializer(serializers.ModelSerializer):
movie = MoviesSerializer(many = False)
actor = ActorsSerializer(many = False)
class Meta:
model = Roles
fields = '__all__'
views.py
class getMovies(ListAPIView):
directors = Directors.objects.all()
queryset = MoviesDirectors.objects.filter(director__in=directors)
serializer_class = MoviesDirectorsSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['director__first_name', 'director__last_name']
class getGenres(ListAPIView):
movies = Movies.objects.all()
queryset = MoviesGenres.objects.filter(movie__in=movies).order_by('-genre')
serializer_class = MoviesGenresSerializer
pagination_class = CustomPagination
filter_backends = [DjangoFilterBackend]
filterset_fields = ['genre']
class getActors(ListAPIView):
queryset = Roles.objects.all()
serializer_class = RolesSerializer
pagination_class = CustomPagination
def get_queryset(self):
return super().get_queryset().filter(
actor_id=self.kwargs['pk']
)
Now I want to count number of movies by genre that actor with specific pk played in getActors
class.
Like the number of movies by genre that actor participated in. E.g. Drama: 2, Horror: 3
Right now I am getting the overall count of movies count: 2
:
GET /api/actor_stats/17
HTTP 200 OK
Allow: GET, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept
{
"count": 2,
"next": null,
"previous": null,
"results": [
{
"id": 800480,
"movie": {
"id": 105231,
"name": "Everybody's Business",
"year": 1993,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Grandfather"
},
{
"id": 800481,
"movie": {
"id": 242453,
"name": "OP Pro 88 - Barra Rio",
"year": 1988,
"rank": null
},
"actor": {
"id": 17,
"first_name": "Luis Roberto",
"last_name": "Formiga",
"gender": "M"
},
"role": "Himself"
}
]
}
What is the optimized way of achieving the following:
- number_of_movies_by_genre
- Drama: 2
- Horror: 3
UPDATE
class RolesSerializer(serializers.Serializer):
id = serializers.SerializerMethodField()
name = serializers.SerializerMethodField()
top_genre = serializers.SerializerMethodField()
number_of_movies = serializers.SerializerMethodField()
number_of_movies_by_genre = serializers.SerializerMethodField()
most_frequent_partner = serializers.SerializerMethodField()
class Meta:
model = Roles
fields = '__all__'
def get_id(self, obj):
return obj.actor.id
def get_name(self, obj):
return f'{obj.actor.first_name} {obj.actor.last_name}'
def get_top_genre(self, obj):
number_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre = F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
)
data = [s['number_of_movies'] for s in number_by_genre]
highest = max(data)
result = [s for s in data if s == highest]
return result
def get_number_of_movies(self, obj):
number_of_movies = Roles.objects.filter(actor = obj.actor.id
).values('movie__name').count()
return number_of_movies
def get_number_of_movies_by_genre(self, obj):
number_of_movies_by_genre = Roles.objects.filter(actor = obj.actor.id
).values('movie__movies_genres__genre').annotate(
genre=F('movie__movies_genres__genre'),
number_of_movies=Count('movie__movies_genres__genre'),
).values('genre', 'number_of_movies')
return number_of_movies_by_genre
def get_most_frequent_partner(self, obj):
partners = Roles.objects.filter(actor = obj.actor.id
).values('movie__id')
result = Roles.objects.filter(movie__in = partners
).values('actor').exclude(actor=obj.actor.id).annotate(
partner_actor_id = F('actor'),
partner_actor_name = Concat(F('actor__first_name'), Value(' '), F('actor__last_name')),
number_of_shared_movies =Count('actor'),
).values('partner_actor_id', 'partner_actor_name', 'number_of_shared_movies')
return result
The problem with that code is: It repeats the results by the number of movies. For instance if the actor have 5 movies the results will be repeated 5 times. Another issue is: in order to get top_genre
and most_frequent_partner
I’m using max()
but then I just get the numbers and not the actual name of genre in (top_genre
) and actor name in (most_frequent_partner
). Since I use max()
in a way to get more than one value. For instance in the top_genre
: If the actor have 3 Drama, 3 Comedy, 1 Horror, 1 Documentary
, I get the max in that way: [3,3]
, but how can I get the actual names out of these results? Same goes to most_frequent_partner
.
Results looks like this so far:
{
"next": null,
"previous": null,
"count": 4,
"pagenum": null,
"results": [
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
},
{
"id": 36,
"name": "Benjamin 2X",
"top_genre": [
2,
2
],
"number_of_movies": 4,
"number_of_movies_by_genre": [
{
"movie__movies_genres__genre": null,
"genre": null,
"number_of_movies": 0
},
{
"movie__movies_genres__genre": "Documentary",
"genre": "Documentary",
"number_of_movies": 2
},
{
"movie__movies_genres__genre": "Music",
"genre": "Music",
"number_of_movies": 2
}
],
"most_frequent_partner": []
}
]
}
What I want to see in the end:
{
"next": null,
"previous": null,
"count": 2,
"results": [
{
"id": 18 (actor_id),
"name": Bruce Buffer (actor_name),
"number of movies": 2,
"top genre": Drama, Documentary,
"number of movies by genre": Drama: 1, Documentary: 1,
"most frequent partner": partner_actor_id, partner_actor_name, number_of_shared_movies,
}
]
}
There many ways to implement this route, it depends on many criteria and how much it will be used .
i think a correct way is to create a dedicated model that would store actor stats with a one to one relation to actor and recompute the value each time a movie is added. But If you add movie often it could slow down your database.
You can also accept to have some outdated data for a while and update the table regularly using a background job and maybe using custom sql query that will ensure you better performance (bulk update).
I would start from your model, you have genres defined as a CharField in two of your models. By not isolating them anywhere, you need to look in both tables for all types of genres. If do not, then you are just supposing that all the genres you have in one table is also on the other one, which could not be true.
Also, querying string fields is not very efficient when in comparison to a int PK, so from the point of view of scaling this is bad. (Of course, i am saying that in general, as a good practice and not focused specifically in movie genres)
Your best option would be to have either a Genre Model or a choice field, where you define all possible genres.
As for the counting, you would do that inside your serializer class, by using a serializermethodfield.
If you want, the number of movies
by genre
for a given actor what you can do is annotate and count aggregate
return Roles.objects.filter(
actor_id=self.kwargs['pk']
).values('movie__movies_genres__genre').annotate(
no_of_movies=Count('movie__movies_genres__genre'),
genre=F('movie__movies_genres__genre'),
)
Here first we filtered roles for a given actor
then values will group by genre
then annotation is computed over all members of the group that count
and get genre
and you can use SerializerMethodField
to these calculated results
if you have a huge dataset it will not perform well, but you can create indexes accordingly still it will cost you 2-3 queries
you can learn more about Django queryset API