How Postgresql COPY TO STDIN With CSV do on conflic do update?
Question:
I want to do
" on conflict (time) do update set name , description "
but I have no idea when I use stdin with csv , I don’t know what name equal what? and description equal what…
table_a:
xxx.csv:
with open('xxx/xxx.csv', 'r', encoding='utf8') as f:
sql = """
COPY table_a FROM STDIN With CSV on conflict (time)
do update set name=??, description=??;
"""
cur.copy_expert(sql, f)
conn.commit()
Answers:
https://www.postgresql.org/docs/current/static/sql-copy.html
there is no copy ... on conflict do
statement in postgres
https://www.postgresql.org/docs/current/static/sql-insert.html
only insert ... on conflict do
Thanks for every master’s solution.
this is my solution.
sql = """
CREATE TABLE temp_h (
time ,
name,
description
);
COPY temp_h FROM STDIN With CSV;
INSERT INTO table_a(time, name, description)
SELECT *
FROM temp_h ON conflict (time)
DO update set name=EXCLUDED.name, description=EXCLUDED.description;
DROP TABLE temp_h;
"""
In this SO post, there are two answers that -combined together- provide a nice solution for successfully using ON CONFLICT
. The example below, uses ON CONFLICT DO NOTHING;
:
BEGIN;
CREATE TEMP TABLE tmp_table
(LIKE main_table INCLUDING DEFAULTS)
ON COMMIT DROP;
COPY tmp_table FROM 'full/file/name/here';
INSERT INTO main_table
SELECT *
FROM tmp_table
ON CONFLICT DO NOTHING;
COMMIT;
Replace both instances of main_table with the name of your table.
I’ve managed to accomplish a bulk upsert with the following function (suggestions are welcome):
import io
from sqlalchemy.engine import Engine
from sqlalchemy.ext import declarative_base
BaseModel = declarative_base()
def upsert_bulk(engine: Engine, model: BaseModel, data: io.StringIO) -> None:
"""
Fast way to upsert multiple entries at once
:param `db`: DB Session
:param `data`: CSV in a stream object
"""
table_name = model.__tablename__
temp_table_name = f"temp_{table_name}"
columns = [c.key for c in model.__table__.columns]
# Select only columns to be updated (in my case, all non-id columns)
variable_columns = [c for c in columns if c != "id"]
# Create string with set of columns to be updated
update_set = ", ".join([f"{v}=EXCLUDED.{v}" for v in variable_columns])
# Rewind data and prepare it for `copy_from`
data.seek(0)
with conn.cursor() as cur:
# Creates temporary empty table with same columns and types as
# the final table
cur.execute(
f"""
CREATE TEMPORARY TABLE {temp_table_name} (LIKE {table_name})
ON COMMIT DROP
"""
)
# Copy stream data to the created temporary table in DB
cur.copy_from(data, temp_table_name)
# Inserts copied data from the temporary table to the final table
# updating existing values at each new conflict
cur.execute(
f"""
INSERT INTO {table_name}({', '.join(columns)})
SELECT * FROM {temp_table_name}
ON CONFLICT (id) DO UPDATE SET {update_set}
"""
)
# Drops temporary table (I believe this step is unnecessary,
# but tables sizes where growing without any new data modifications
# if this command isn't executed)
cur.execute(f"DROP TABLE {temp_table_name}")
# Commit everything through cursor
conn.commit()
conn.close()
I want to do
" on conflict (time) do update set name , description "
but I have no idea when I use stdin with csv , I don’t know what name equal what? and description equal what…
table_a:
xxx.csv:
with open('xxx/xxx.csv', 'r', encoding='utf8') as f:
sql = """
COPY table_a FROM STDIN With CSV on conflict (time)
do update set name=??, description=??;
"""
cur.copy_expert(sql, f)
conn.commit()
https://www.postgresql.org/docs/current/static/sql-copy.html
there is no copy ... on conflict do
statement in postgres
https://www.postgresql.org/docs/current/static/sql-insert.html
only insert ... on conflict do
Thanks for every master’s solution.
this is my solution.
sql = """
CREATE TABLE temp_h (
time ,
name,
description
);
COPY temp_h FROM STDIN With CSV;
INSERT INTO table_a(time, name, description)
SELECT *
FROM temp_h ON conflict (time)
DO update set name=EXCLUDED.name, description=EXCLUDED.description;
DROP TABLE temp_h;
"""
In this SO post, there are two answers that -combined together- provide a nice solution for successfully using ON CONFLICT
. The example below, uses ON CONFLICT DO NOTHING;
:
BEGIN;
CREATE TEMP TABLE tmp_table
(LIKE main_table INCLUDING DEFAULTS)
ON COMMIT DROP;
COPY tmp_table FROM 'full/file/name/here';
INSERT INTO main_table
SELECT *
FROM tmp_table
ON CONFLICT DO NOTHING;
COMMIT;
Replace both instances of main_table with the name of your table.
I’ve managed to accomplish a bulk upsert with the following function (suggestions are welcome):
import io
from sqlalchemy.engine import Engine
from sqlalchemy.ext import declarative_base
BaseModel = declarative_base()
def upsert_bulk(engine: Engine, model: BaseModel, data: io.StringIO) -> None:
"""
Fast way to upsert multiple entries at once
:param `db`: DB Session
:param `data`: CSV in a stream object
"""
table_name = model.__tablename__
temp_table_name = f"temp_{table_name}"
columns = [c.key for c in model.__table__.columns]
# Select only columns to be updated (in my case, all non-id columns)
variable_columns = [c for c in columns if c != "id"]
# Create string with set of columns to be updated
update_set = ", ".join([f"{v}=EXCLUDED.{v}" for v in variable_columns])
# Rewind data and prepare it for `copy_from`
data.seek(0)
with conn.cursor() as cur:
# Creates temporary empty table with same columns and types as
# the final table
cur.execute(
f"""
CREATE TEMPORARY TABLE {temp_table_name} (LIKE {table_name})
ON COMMIT DROP
"""
)
# Copy stream data to the created temporary table in DB
cur.copy_from(data, temp_table_name)
# Inserts copied data from the temporary table to the final table
# updating existing values at each new conflict
cur.execute(
f"""
INSERT INTO {table_name}({', '.join(columns)})
SELECT * FROM {temp_table_name}
ON CONFLICT (id) DO UPDATE SET {update_set}
"""
)
# Drops temporary table (I believe this step is unnecessary,
# but tables sizes where growing without any new data modifications
# if this command isn't executed)
cur.execute(f"DROP TABLE {temp_table_name}")
# Commit everything through cursor
conn.commit()
conn.close()