There are two tables T1 and T2. I want to delete those in T1 if they are already in T2.
This is the first statement:
delete T1.* from T1 inner join T2 on T1.col1 = T2.col1;
The execution takes 2 seconds.
However, if I rewrite the statement in a different way as below:
delete from T1 where col1 in (select col1 from T2);
Guess, how much time does it take to execute? 8 minutes!
-- Size of T1: 200K records.
-- Size of T2: 10K records.
-- 6m39s
select count(T1.col1) from T1
left join T2 on T1.col1 = T2.col1
where T2.col1 is null;
-- 11m42s
select count(T1.col1) from T1
where T1.col1 not in (select col1 from T2);
-- 11m33s
select count(T1.col1) from T1
where not exists (select 1 from T2 where T2.col1 = T1.col1);
SELECT count(T1.col1) FROM T1
EXCEPT
SELECT T1.col1 FROM T1 a JOIN T2 b ON a.col1 = b.col1
select group_concat(concat('"',id-to-purge,'"')) into @batch_id_purge FROM t1;
select IFNULL(@batch_id_purge , '"DONE"') into @batch_id_purge ;
set @delete_stmt = concat ("DELETE FROM t2 WHERE id IN (", @batch_id_purge ,")");
prepare STMT FROM @delete_stmt;
execute STMT;
SELECT count(T1.col1) FROM T1
EXCEPT
SELECT T1.col1 FROM T1 a JOIN T2 b ON a.col1 = b.col1