Monday, June 9, 2008

SQL Server 2005 Remove Dups – CTE

No more selecting DISTINCT into a temp table and then inserting back into the cleaned out table. This does it all for you in two statements.
Deleting from the CTE actually changes the underlying table. Be careful how you setup your CTE. You could have some unintended deletes without the right logic
CREATE TABLE #prod(
    Product_Code varchar(10),
    Product_Name varchar(100)
)

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('123','Product_1')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('234','Product_2')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('345','Product_3')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('345','Product_3')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('456','Product_4')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('567','Product_5')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('678','Product_6')

INSERT INTO #prod(Product_Code, Product_Name)
VALUES ('789','Product_7')

SELECT *
FROM #prod;


With Dups as
(
    select *, row_number() over (partition by Product_Code order by Product_Code) as RowNum
    from #prod
)


Delete from Dups where rownum > 1;

--Note duplicate record 345 Product_3 has been removed.
SELECT *
FROM #prod;

No comments: