当前位置: 代码迷 >> 综合 >> pgsql:分表+去重+大数据量
  详细解决方案

pgsql:分表+去重+大数据量

热度:81   发布时间:2024-01-25 21:55:37.0

最近搞了一个项目,需求是将客户导入的csv文件里的数据存入数据库,并且根据id和packed_on去重。
表结构:
在这里插入图片描述
1 分表
– 给时间分表加上id packed_on唯一性约束
drop trigger if exists tri_ins_tb_package on tb_package;
drop function if exists func_tri_tb_package();
create or replace function func_tri_tb_package() returns trigger as
$$
declare my_tbname varchar(64);
declare my_start_time varchar(64);
declare my_end_time varchar(64);
declare sql_str text;
begin
my_tbname = TG_TABLE_NAME || ‘_’ || to_char(NEW.packed_on,‘YYYYMM’);
sql_str = 'INSERT INTO ‘||my_tbname ||’ SELECT $1.* ’ || ‘on CONFLICT(id,packed_on) do nothing’;
EXECUTE sql_str USING NEW;
return null;

exception  when  undefined_table then
beginmy_start_time=to_char(NEW.packed_on,'YYYY-MM')||'-01 00:00:00';my_end_time=(my_start_time::timestamp(0)  + interval '1 month')::text;execute 'create table ' || my_tbname || '(check (packed_on >= ' || '''' || my_start_time || '''' || ' and  packed_on < ' || '''' || my_end_time || '''' || ')) INHERITS (' || TG_TABLE_NAME || ')';execute 'alter table ' || my_tbname || ' add constraint pk_' || my_tbname || ' primary key (package_id)';execute 'alter table ' || my_tbname || ' add constraint unique_' || my_tbname || ' unique (id,packed_on)';execute 'create index idx_' || my_tbname ||'_sttm_elid  on ' || my_tbname || ' (id,packed_on)'; EXECUTE sql_str USING NEW;return null;exception when others thenexecute sql_str using NEW;return null;
end;

end;
$$ language plpgsql;
create trigger tri_ins_tb_package BEFORE insert on tb_package for each row EXECUTE PROCEDURE func_tri_tb_package();

2 删除70天以前的记录,防止数据量太大。使用语句级别触发器,在导入数据的时候执行一次
CREATE OR REPLACE FUNCTION auto_del_package ()
RETURNS TRIGGER AS r e s res
BEGIN
delete from tb_package where packed_on < (now() - interval ‘70 day’);
return old;
END;
r e s res LANGUAGE plpgsql;

CREATE TRIGGER auto_del_package_trigger AFTER INSERT ON tb_package
FOR statement EXECUTE PROCEDURE auto_del_package();