Oracle删除重复数据

需求:删除table_name表中的重复数据;判定重复的标准为:item_1,item_2,item_3的值一样,保留最新的一条

BEGIN
  LOOP
    --手动开启本次会话的并行
    ALTER SESSION ENABLE PARALLEL DML;
    --删除分组后重复数据中ROWID最小的数据
    DELETE /*+ PARALLEL(8) */ FROM TABLE_NAME WHERE ROWID IN(
    SELECT /*+ PARALLEL(8) */ MIN(ROWID) FROM TABLE_NAME A
    GROUP BY A.ITEM_1,A.ITEM_2,A.ITEM_3
    HAVING COUNT(*)>1
   );
   COMMIT;
   --手动关闭本次会话的并行
   ALTER SESSION DIABLE PARALLEL DML;
   --如受影响行数等于0,则结束循环
   EXIT WHEN SQL%ROWCOUNT=0;
  END LOOP;
END;

以上逻辑是重复数据中留下ROWID最大的那条,如需要指定留下哪一条可参考如下:留下CREATE_TIME最新的一条

DELETE FROM TABLE_NAME A WHERE EXISTS(
  SELECT 1 FROM (
    SELECT C.ID,(ROW_NUMBER() OVER(PARTITION BY C.ITEM_1,C.ITEM_2,C.ITEM_3 ORDER BY 
    C.CREATE_TIME DESC)) ROW_NUMBER FROM TABLE_NAME C
  ) B WHERE B.ROW_NUMBER!=1 AND A.ID=B.ID
)
  • 无论逻辑怎么写,最后是否可以执行都要参考执行计划,必要时一定要建索引

对于数据量较大(或逻辑比较复杂)的表,大到通过上述写法无明显优化效果的表,可尝试以下方式:

--1.对操作表开并行
ALTER TABLE TABLE_NAME PARALLEL 32;
--2.查并行是否开启
SELECT TABLE_NAME,DEGREE FROM USER_TABLES WHERE TABLE_NAME='TABLE_NAME';
--3.抽取重复数据到临时表
CREATE TABLE DEL_TEMP AS
SELECT C.ID,(ROW_NUMBER() OVER(PARTITION BY C.ITEM_1,C.ITEM_2,C.ITEM_3 ORDER BY 
    C.CREATE_TIME DESC)) ROW_NUMBER FROM TABLE_NAME C;
--4.给临时表加主键
ALTER TABLE DEL_TEMP ADD CONSTRAINT PK_DEL_TEMP PRIMARY KEY (ID);
--5.对临时表开并行
ALTRT TABLE DEL_TEMP PARALLEL 32;
--6.查并行是否开启
SELECT TABLE_NAME,DEGREE FROM USER_TABLES WHERE TABLE_NAME='DEL_TEMP';
--7.过滤临时表
DELETE FROM DEL_TEMP WHERE ROW_NUNMBER=1;
COMMIT;
--8.关联临时表进行删除
BEGIN
 LOOP
  DELETE /*+INDEX(C PK_TABLE_NAME)*/ FROM TABLE_NAME C WHERE ID IN(
  SELECT ID
  FROM(SELECT A.ID,ROWNUM RN FROM(SELECT ID FROM DEL_TEMP ORDER BY ID) A
   WHERE ROWNUM<=100000) WHERE RN>=1
  );
  EXIT WHEN SQL%ROWCOUNT=0;
  COMMIT;
  DELETE FROM(SELECT A.ID,ROWNUM RN FROM(SELECT ID FROM DEL_TEMP ORDER BY ID) A
  WHERE ROWNUM<=100000) WHERE RN>=1;
  COMMIT;
 END LOOP;
 COMMIT;
END;
--9.确认重复数据是否完全删除
SELECT SUM(COUNT(1)-1) FROM TABLE_NAME C
GROUP BY C.ITEM_1,C.ITEM_2,C.ITEM_3 HAVING COUNT(1)>1;
--10.关闭并行
ALTER TABLE TABLE_NAME NOPARALLEL;
ALTER TABLE DEL_TEMP NOPARALLEL;
--11.检查并行是否关闭
SELECT TABLE_NAME,DEGREE FROM USER_TABLES WHERE TABLE_NAME IN('DEL_TEMP','TABLE_NAME');
--12.删除临时表
DROP TABLE DEL_TEMP;

发表评论

电子邮件地址不会被公开。 必填项已用*标注

20 − 2 =