How to enforce reads from read replica in Postgres using django and or flask?
Question:
Is enforcement of a reads from a read replica an application layer task?
i.e. I have a Postgres database and I have set up a read replica.
On the application side I have two connections one for the “write” database and one for the “read replica” database.
In my code if I do a “read” action I use the connection to the read replica. But when I go to insert or update I use the connection to the “write” database a.k.a. master.
Is there better with django or flask that this is managed automatically. i.e.
I would prefer to avoid specifying directly in code the connection to use and just have django or flask figure it out on their own.
Answers:
Django
For this purpose django supports so called Database routers.
First create your custom router:
class CustomRouter:
def db_for_read(self, model, **hints):
return 'replica'
def db_for_write(self, model, **hints):
return 'master'
And configure django orm to use it like that.
DATABASES = {
'default': {},
'primary': {
'NAME': 'master',
'ENGINE': 'django.db.backends.mysql',
'USER': 'mysql_user',
'PASSWORD': 'spam',
},
'replica1': {
'NAME': 'replica',
'ENGINE': 'django.db.backends.mysql',
'USER': 'mysql_user',
'PASSWORD': 'eggs',
},
}
DATABASE_ROUTERS = ['path.to.CustomRouter']
The sample code was taken from the docs (it is worth reading!) and slightly adjusted.
SQLAlchemy (flask)
I went through the SQLAlchemy docs and found a link to this article, which describes how to implement djangos database router approach with SQLAlchemy.
You can use a custom session here to implement this properly.
The following snippets are picked from the linked article and are slightly adjusted.
Create your engines:
engines = {
'master': create_engine('postgresql://user:***@localhost:5432/master',
logging_name='master'),
'replica': create_engine('postgresql://user:***@localhost:5432/replica',
logging_name='replica'),
}
Create a custom session class:
class RoutingSession(Session):
def get_bind(self, mapper=None, clause=None):
if self._flushing:
return engines['master']
else:
return engines['replica']
And create your session like this:
Session = scoped_session(sessionmaker(class_=RoutingSession, autocommit=True))
Read the article for details and limitations.
here is concept solution for flask witch change
"""This is not the full code. We do a lot of stuff to clean up connections, particularly for unit testing."""
import sqlalchemy
from sqlalchemy.orm import Query, Session, scoped_session, sessionmaker
CONFIG_KEY_SQLALCHEMY_BINDS = 'SQLALCHEMY_BINDS'
CONFIG_KEY_SQLALCHEMY_RO_BINDS = 'SQLALCHEMY_READ_ONLY_BINDS'
class Config:
# These default values are for testing. In a deployed environment, they would be three separate instances.
SQLALCHEMY_DATABASE_URI = 'postgresql://localhost/branded_dev'
SQLALCHEMY_READ_ONLY_BINDS = {
'replica': 'postgresql://localhost/branded_dev',
'replica_analytics': 'postgresql://localhost/branded_dev'
}
class DBSessionFactory:
"""
A wrapper for getting db sessions from the primary and read replicas.
"""
def register(config):
self.engines = dict() # type: Dict[str, Engine]
self.read_only_engines = defaultdict(list) # type: Dict[str, Engine]
# The session factories to be used by scoped_session to connect
self.session_factories = dict() # Dict[str, sessionmaker]
# The scoped sessions for each connection.
self.scoped_sessions = dict() # Dict[str, scoped_session]
# The scoped sessions for each read only connection.
self.read_only_scoped_sessions = defaultdict(list) # Dict[str, List[scoped_session]]
# The primary connection
self.add_engine(
'primary', config.SQLALCHEMY_DATABASE_URI, config=config
)
# Other read-write dbs
for name, connect_url in config[CONFIG_KEY_SQLALCHEMY_BINDS].items():
self.add_engine(name, connect_url, config=config)
# Read replica binds
for name, connect_url in config[CONFIG_KEY_SQLALCHEMY_RO_BINDS].items():
self.add_engine(name, connect_url, config=config, read_only=True)
def add_engine(self, name: DBInstance, uri: str, config: Config, read_only=False) -> None:
"""Initialize a database connection and register it in the appropriate internal dicts."""
# Clean up existing engine if present
if self.engines.get(name) or self.read_only_engines.get(name):
self.session_factories[name].close_all()
engines = [self._create_engine(u, config) for u in uri] if isinstance(uri, list)
else [self._create_engine(uri, config)]
for engine in engines:
self.session_factories[name] = sessionmaker(bind=engine, expire_on_commit=False)
scoped_session_instance = scoped_session(self.session_factories[name])
if read_only:
self.read_only_engines[name].append(engine)
self.read_only_scoped_sessions[name].append(scoped_session_instance)
else:
self.engines[name] = engine
self.scoped_sessions[name] = scoped_session_instance
def _create_engine(self, url: str, config: Config): # pylint: disable=no-self-use
"""wrapper to set up our connections"""
engine = sqlalchemy.create_engine(
url,
pool_size=config.SQLALCHEMY_POOL_SIZE,
pool_recycle=config.SQLALCHEMY_POOL_RECYCLE,
echo=config.SQLALCHEMY_ECHO,
pool_pre_ping=config.SQLALCHEMY_POOL_PRE_PING
)
@contextmanager
def session(self, engine: DBInstance=None) -> Generator[scoped_session, None, None]:
"""
Generate a session and yield it out.
After resuming, commit, unless an exception happens,
in which case we roll back.
:param engine: connection to use
:return: a generator for a scoped session
"""
session = self.raw_scoped_session(engine)
try:
yield session
session.commit()
except:
session.rollback()
raise
finally:
session.remove()
def read_only_session(self, engine: str=None) -> scoped_session:
"""
Return a session for a read-only db
:param engine: connection to use
:return: a Session via scoped_session
"""
if engine in self.read_only_engines:
return random.choice(self.read_only_scoped_sessions[engine])
else:
raise DBConfigurationException(
"Requested session for '{}', which is not bound in this app. Try: [{}]".
format(engine, ','.join(self.read_only_engines.keys()))
)
# The global db factory instance.
db = DBSessionFactory()
https://gist.github.com/jasonwalkeryung/5133383d66782461cdc3b4607ae35d98
Is enforcement of a reads from a read replica an application layer task?
i.e. I have a Postgres database and I have set up a read replica.
On the application side I have two connections one for the “write” database and one for the “read replica” database.
In my code if I do a “read” action I use the connection to the read replica. But when I go to insert or update I use the connection to the “write” database a.k.a. master.
Is there better with django or flask that this is managed automatically. i.e.
I would prefer to avoid specifying directly in code the connection to use and just have django or flask figure it out on their own.
Django
For this purpose django supports so called Database routers.
First create your custom router:
class CustomRouter:
def db_for_read(self, model, **hints):
return 'replica'
def db_for_write(self, model, **hints):
return 'master'
And configure django orm to use it like that.
DATABASES = {
'default': {},
'primary': {
'NAME': 'master',
'ENGINE': 'django.db.backends.mysql',
'USER': 'mysql_user',
'PASSWORD': 'spam',
},
'replica1': {
'NAME': 'replica',
'ENGINE': 'django.db.backends.mysql',
'USER': 'mysql_user',
'PASSWORD': 'eggs',
},
}
DATABASE_ROUTERS = ['path.to.CustomRouter']
The sample code was taken from the docs (it is worth reading!) and slightly adjusted.
SQLAlchemy (flask)
I went through the SQLAlchemy docs and found a link to this article, which describes how to implement djangos database router approach with SQLAlchemy.
You can use a custom session here to implement this properly.
The following snippets are picked from the linked article and are slightly adjusted.
Create your engines:
engines = {
'master': create_engine('postgresql://user:***@localhost:5432/master',
logging_name='master'),
'replica': create_engine('postgresql://user:***@localhost:5432/replica',
logging_name='replica'),
}
Create a custom session class:
class RoutingSession(Session):
def get_bind(self, mapper=None, clause=None):
if self._flushing:
return engines['master']
else:
return engines['replica']
And create your session like this:
Session = scoped_session(sessionmaker(class_=RoutingSession, autocommit=True))
Read the article for details and limitations.
here is concept solution for flask witch change
"""This is not the full code. We do a lot of stuff to clean up connections, particularly for unit testing."""
import sqlalchemy
from sqlalchemy.orm import Query, Session, scoped_session, sessionmaker
CONFIG_KEY_SQLALCHEMY_BINDS = 'SQLALCHEMY_BINDS'
CONFIG_KEY_SQLALCHEMY_RO_BINDS = 'SQLALCHEMY_READ_ONLY_BINDS'
class Config:
# These default values are for testing. In a deployed environment, they would be three separate instances.
SQLALCHEMY_DATABASE_URI = 'postgresql://localhost/branded_dev'
SQLALCHEMY_READ_ONLY_BINDS = {
'replica': 'postgresql://localhost/branded_dev',
'replica_analytics': 'postgresql://localhost/branded_dev'
}
class DBSessionFactory:
"""
A wrapper for getting db sessions from the primary and read replicas.
"""
def register(config):
self.engines = dict() # type: Dict[str, Engine]
self.read_only_engines = defaultdict(list) # type: Dict[str, Engine]
# The session factories to be used by scoped_session to connect
self.session_factories = dict() # Dict[str, sessionmaker]
# The scoped sessions for each connection.
self.scoped_sessions = dict() # Dict[str, scoped_session]
# The scoped sessions for each read only connection.
self.read_only_scoped_sessions = defaultdict(list) # Dict[str, List[scoped_session]]
# The primary connection
self.add_engine(
'primary', config.SQLALCHEMY_DATABASE_URI, config=config
)
# Other read-write dbs
for name, connect_url in config[CONFIG_KEY_SQLALCHEMY_BINDS].items():
self.add_engine(name, connect_url, config=config)
# Read replica binds
for name, connect_url in config[CONFIG_KEY_SQLALCHEMY_RO_BINDS].items():
self.add_engine(name, connect_url, config=config, read_only=True)
def add_engine(self, name: DBInstance, uri: str, config: Config, read_only=False) -> None:
"""Initialize a database connection and register it in the appropriate internal dicts."""
# Clean up existing engine if present
if self.engines.get(name) or self.read_only_engines.get(name):
self.session_factories[name].close_all()
engines = [self._create_engine(u, config) for u in uri] if isinstance(uri, list)
else [self._create_engine(uri, config)]
for engine in engines:
self.session_factories[name] = sessionmaker(bind=engine, expire_on_commit=False)
scoped_session_instance = scoped_session(self.session_factories[name])
if read_only:
self.read_only_engines[name].append(engine)
self.read_only_scoped_sessions[name].append(scoped_session_instance)
else:
self.engines[name] = engine
self.scoped_sessions[name] = scoped_session_instance
def _create_engine(self, url: str, config: Config): # pylint: disable=no-self-use
"""wrapper to set up our connections"""
engine = sqlalchemy.create_engine(
url,
pool_size=config.SQLALCHEMY_POOL_SIZE,
pool_recycle=config.SQLALCHEMY_POOL_RECYCLE,
echo=config.SQLALCHEMY_ECHO,
pool_pre_ping=config.SQLALCHEMY_POOL_PRE_PING
)
@contextmanager
def session(self, engine: DBInstance=None) -> Generator[scoped_session, None, None]:
"""
Generate a session and yield it out.
After resuming, commit, unless an exception happens,
in which case we roll back.
:param engine: connection to use
:return: a generator for a scoped session
"""
session = self.raw_scoped_session(engine)
try:
yield session
session.commit()
except:
session.rollback()
raise
finally:
session.remove()
def read_only_session(self, engine: str=None) -> scoped_session:
"""
Return a session for a read-only db
:param engine: connection to use
:return: a Session via scoped_session
"""
if engine in self.read_only_engines:
return random.choice(self.read_only_scoped_sessions[engine])
else:
raise DBConfigurationException(
"Requested session for '{}', which is not bound in this app. Try: [{}]".
format(engine, ','.join(self.read_only_engines.keys()))
)
# The global db factory instance.
db = DBSessionFactory()
https://gist.github.com/jasonwalkeryung/5133383d66782461cdc3b4607ae35d98