需求:删除table_name表中的重复数据;判定重复的标准为:item_1,item_2,item_3的值一样,保留最新的一条
BEGIN
LOOP
--手动开启本次会话的并行
ALTER SESSION ENABLE PARALLEL DML;
--删除分组后重复数据中ROWID最小的数据
DELETE /*+ PARALLEL(8) */ FROM TABLE_NAME WHERE ROWID IN(
SELECT /*+ PARALLEL(8) */ MIN(ROWID) FROM TABLE_NAME A
GROUP BY A.ITEM_1,A.ITEM_2,A.ITEM_3
HAVING COUNT(*)>1
);
COMMIT;
--手动关闭本次会话的并行
ALTER SESSION DIABLE PARALLEL DML;
--如受影响行数等于0,则结束循环
EXIT WHEN SQL%ROWCOUNT=0;
END LOOP;
END;
以上逻辑是重复数据中留下ROWID最大的那条,如需要指定留下哪一条可参考如下:留下CREATE_TIME最新的一条
DELETE FROM TABLE_NAME A WHERE EXISTS(
SELECT 1 FROM (
SELECT C.ID,(ROW_NUMBER() OVER(PARTITION BY C.ITEM_1,C.ITEM_2,C.ITEM_3 ORDER BY
C.CREATE_TIME DESC)) ROW_NUMBER FROM TABLE_NAME C
) B WHERE B.ROW_NUMBER!=1 AND A.ID=B.ID
)
- 无论逻辑怎么写,最后是否可以执行都要参考执行计划,必要时一定要建索引
对于数据量较大(或逻辑比较复杂)的表,大到通过上述写法无明显优化效果的表,可尝试以下方式:
--1.对操作表开并行
ALTER TABLE TABLE_NAME PARALLEL 32;
--2.查并行是否开启
SELECT TABLE_NAME,DEGREE FROM USER_TABLES WHERE TABLE_NAME='TABLE_NAME';
--3.抽取重复数据到临时表
CREATE TABLE DEL_TEMP AS
SELECT C.ID,(ROW_NUMBER() OVER(PARTITION BY C.ITEM_1,C.ITEM_2,C.ITEM_3 ORDER BY
C.CREATE_TIME DESC)) ROW_NUMBER FROM TABLE_NAME C;
--4.给临时表加主键
ALTER TABLE DEL_TEMP ADD CONSTRAINT PK_DEL_TEMP PRIMARY KEY (ID);
--5.对临时表开并行
ALTRT TABLE DEL_TEMP PARALLEL 32;
--6.查并行是否开启
SELECT TABLE_NAME,DEGREE FROM USER_TABLES WHERE TABLE_NAME='DEL_TEMP';
--7.过滤临时表
DELETE FROM DEL_TEMP WHERE ROW_NUNMBER=1;
COMMIT;
--8.关联临时表进行删除
BEGIN
LOOP
DELETE /*+INDEX(C PK_TABLE_NAME)*/ FROM TABLE_NAME C WHERE ID IN(
SELECT ID
FROM(SELECT A.ID,ROWNUM RN FROM(SELECT ID FROM DEL_TEMP ORDER BY ID) A
WHERE ROWNUM<=100000) WHERE RN>=1
);
EXIT WHEN SQL%ROWCOUNT=0;
COMMIT;
DELETE FROM(SELECT A.ID,ROWNUM RN FROM(SELECT ID FROM DEL_TEMP ORDER BY ID) A
WHERE ROWNUM<=100000) WHERE RN>=1;
COMMIT;
END LOOP;
COMMIT;
END;
--9.确认重复数据是否完全删除
SELECT SUM(COUNT(1)-1) FROM TABLE_NAME C
GROUP BY C.ITEM_1,C.ITEM_2,C.ITEM_3 HAVING COUNT(1)>1;
--10.关闭并行
ALTER TABLE TABLE_NAME NOPARALLEL;
ALTER TABLE DEL_TEMP NOPARALLEL;
--11.检查并行是否关闭
SELECT TABLE_NAME,DEGREE FROM USER_TABLES WHERE TABLE_NAME IN('DEL_TEMP','TABLE_NAME');
--12.删除临时表
DROP TABLE DEL_TEMP;