- SQL code
create table ttt ( id number(4) primary key, name varchar2(16) , sex varchar2(2), age number(4) ) insert into ttt values (1, 'a','男', 2); insert into ttt values (6, 'a','男', 4); insert into ttt values (3, 'a','男', 3); insert into ttt values (8, 'a','女', 4); insert into ttt values (7, 'b','女', 4); insert into ttt values (2, 'b','女', 2); insert into ttt values (9, 'b','女', 4); insert into ttt values (4, 'c','女', 4); insert into ttt values (5, 'e','女', 5);
要求:统计数据库中重复数据的条数(有可能不止重复两条)
重复条件:(name相同,sex相同) 或者 (name相同,age相同),去掉统计时的重复记录(如name相同,sex相同,age相同的记录,不能重复统计)
正确答案:7条
求用sql查询实现
已知算法:
1.
- SQL code
--这个算法最简单,但是真实数据量很大,这个效率太差select count(distinct t.id) from ttt t, (select name,0 age,sex from ttt group by name,sex having count(1)>1 union select name,age,'' sex from ttt group by name,age having count(1)>1) m where t.name=m.name and (t.age=m.age or t.sex=m.sex);
2.
- SQL code
--此种方式不能去除重复记录,查出来为10条select sum(count(1)) coun from ttt group by grouping sets((name,sex),(name,age)) having count(1)>1;
等sql牛人来解决。。。。。。
------解决方案--------------------
--查出所有重复记录
select name,sex,age,id from(
select name,sex,age,id from ttt where exists (select name,sex from
(select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a
where ttt.name=a.name and ttt.sex=a.sex))a
union
( select name,sex,age,id from ttt where exists (select name,age from
(select name,age,count(name) from ttt group by name,age having count(name)>=2)a
where ttt.name=a.name and ttt.age=a.age))
------解决方案--------------------
--查询重复条数
select count(*) from (
select name,sex,age,id from(
select name,sex,age,id from ttt where exists (select name,sex from
(select name,sex,count(name) from ttt group by name,sex having count(name)>=2)a
where ttt.name=a.name and ttt.sex=a.sex))a
union
( select name,sex,age,id from ttt where exists (select name,age from
(select name,age,count(name) from ttt group by name,age having count(name)>=2)a
where ttt.name=a.name and ttt.age=a.age))
)
------解决方案--------------------
Have a try.
- SQL code
with a as (select id, count(*) over(partition by t.name, t.sex) count from ttt t), b as (select id, count(*) over(partition by t.name, t.age) count from ttt t) select count(id) from (select a.id from a where a.count >= 2 union select b.id from b where b.count >= 2)
------解决方案--------------------
在原来基础上还可以继续优化,代码如下:
- SQL code
select count(*) from (select id, count(*) over(partition by t.name, t.sex) count1, count(*) over(partition by t.name, t.age) count2 from ttt t) a where a.count1 > 1 or a.count2 > 1
------解决方案--------------------