update users set email=concat(name, '@gmail.com') where percentage>.20; UPDATE 3060977And some from another known domain.
update users set email=concat(name, '@yahoo.com') where email not like '%gmail.com'; UPDATE 763903Finally, a small handful of email addresses from our target domain.
update users set email=concat(name, '@aol.com') where percentage<.0005; UPDATE 1841
postgres=# explain analyze verbose select * from users where email like '%aol.com'; QUERY PLAN ---------------------------------------------------- Gather (cost=1000.00..53368.45 rows=382 width=35) (actual time=2.058..453.503 rows=1841 loops=1) Output: name, email, percentage Workers Planned: 2 Workers Launched: 2 -> Parallel Seq Scan on public.users (cost=0.00..52330.25 rows=159 width=35) (actual time=0.478..339.496 rows=614 loops=3) Output: name, email, percentage Filter: ((users.email)::text ~~ '%aol.com'::text) Rows Removed by Filter: 1274346 Worker 0: actual time=0.151..279.486 rows=349 loops=1 Worker 1: actual time=0.040..289.677 rows=737 loops=1 Planning Time: 0.301 ms Execution Time: 453.742 ms (12 rows)
create index idx_user_email on users(email); CREATE INDEX postgres=# explain analyze verbose select * from users where email like '%aol.com'; QUERY PLAN ------------------------------------------------------------- Gather (cost=1000.00..53368.45 rows=382 width=35) (actual time=2.775..455.128 rows=1841 loops=1) Output: name, email, percentage Workers Planned: 2 Workers Launched: 2 -> Parallel Seq Scan on public.users (cost=0.00..52330.25 rows=159 width=35) (actual time=0.714..343.353 rows=614 loops=3) Output: name, email, percentage Filter: ((users.email)::text ~~ '%aol.com'::text) Rows Removed by Filter: 1274346 Worker 0: actual time=0.140..279.955 rows=311 loops=1 Worker 1: actual time=0.123..299.223 rows=823 loops=1 Planning Time: 0.306 ms Execution Time: 455.387 ms (12 rows)
create index idx_user_domain on users(email) where email like '%aol.com'; CREATE INDEX explain analyze verbose select * from users where email like '%aol.com'; QUERY PLAN ---------------------------------------------------------------- Bitmap Heap Scan on public.users (cost=14.28..1415.62 rows=382 width=35) (actual time=1.917..14.224 rows=1841 loops=1) Output: name, email, percentage Recheck Cond: ((users.email)::text ~~ '%aol.com'::text) Heap Blocks: exact=1602 -> Bitmap Index Scan on idx_user_domain (cost=0.00..14.19 rows=382 width=0) (actual time=0.988..0.989 rows=1841 loops=1) Planning Time: 1.849 ms Execution Time: 14.331 ms (7 rows)
postgres=# explain analyze verbose select email from users where left(reverse(email::text),8)='moc.loa';
QUERY PLAN
-------------------------------------------------------------
Gather (cost=1000.00..63211.15 rows=19124 width=17) (actual time=593.064..596.289 rows=0 loops=1)
Output: email
Workers Planned: 2
Workers Launched: 2
-> Parallel Seq Scan on public.users (cost=0.00..60298.75 rows=7968 width=17) (actual time=547.845..547.846 rows=0 loops=3)
Output: email
Filter: ("left"(reverse((users.email)::text), 8) = 'moc.loa'::text)
Rows Removed by Filter: 1274960
Worker 0: actual time=526.251..526.251 rows=0 loops=1
Worker 1: actual time=526.283..526.283 rows=0 loops=1
Planning Time: 0.548 ms
Execution Time: 596.420 ms
(12 rows)
What if we combined a partial index with this idea?
create index idx_user_reverse on users(email) where left(reverse(email::text),8)='moc.loa';
explain analyze verbose select email from users where left(reverse(email::text),8)='moc.loa';
QUERY PLAN
------------------------------------------------------------
Bitmap Heap Scan on public.users (cost=8.91..29508.71 rows=19124 width=17) (actual time=0.004..0.005 rows=0 loops=1)
Output: email
Recheck Cond: ("left"(reverse((users.email)::text), 8) = 'moc.loa'::text)
-> Bitmap Index Scan on idx_user_reverse (cost=0.00..4.13 rows=19124 width=0) (actual time=0.002..0.002 rows=0 loops=1)
Planning Time: 0.534 ms
Execution Time: 0.025 ms
(6 rows)
Oh, yes. Very nice.
Looking to innovate through PostgreSQL?