[Solved] How does a string operation on a column in a filter condition of a Postgresql query have on the plan it chooses

EverSQL Database Performance Knowledge Base

How does a string operation on a column in a filter condition of a Postgresql query have on the plan it chooses

Database type:

I was working on optimising a query, with dumb luck I tried something and it improved the query but I am unable to explain why. Below is the query with poor performance

with ctedata1 as(
select
    sum(total_visit_count) as total_visit_count,
    sum(sh_visit_count) as sh_visit_count,
    sum(ec_visit_count) as ec_visit_count,
    sum(total_like_count) as total_like_count,
    sum(sh_like_count) as sh_like_count,
    sum(ec_like_count) as ec_like_count,
    sum(total_order_count) as total_order_count,
    sum(sh_order_count) as sh_order_count,
    sum(ec_order_count) as ec_order_count,
    sum(total_sales_amount) as total_sales_amount,
    sum(sh_sales_amount) as sh_sales_amount,
    sum(ec_sales_amount) as ec_sales_amount,
    sum(ec_order_online_count) as ec_order_online_count,
    sum(ec_sales_online_amount) as ec_sales_online_amount,
    sum(ec_order_in_store_count) as ec_order_in_store_count,
    sum(ec_sales_in_store_amount) as ec_sales_in_store_amount,
    table2.im_name,
    table2.brand as kpibrand,
    table2.id_region as kpiregion
from
    table2
where
    deleted_at is null
    and id_region = any('{1}')
group by
    im_name,
    kpiregion,
    kpibrand ),
ctedata2 as (
select
    ctedata1.*,
    rank() over (partition by (kpiregion,
    kpibrand)
order by
    coalesce(ctedata1.total_sales_amount, 0) desc) rank,
    count(*) over (partition by (kpiregion,
    kpibrand)) as total_count
from
    ctedata1 )
select
    table1.id_pf_item,
    table1.product_id,
    table1.color_code,
    table1.l1_code,
    table1.local_title as product_name,
    table1.id_region,
    table1.gender,
    case
        when table1.created_at is null then '1970/01/01 00:00:00'
        else table1.created_at
    end as created_at,
    (
    select
        count(distinct id_outfit)
    from
        table3
    left join table4 on
        table3.id_item = table4.id_item
        and table4.deleted_at is null
    where
        table3.deleted_at is null
        and table3.id_pf_item = table1.id_pf_item) as outfit_count,
    count(*) over() as total_matched,
    case
        when table1.v8_im_name = '' then table1.im_name
        else table1.v8_im_name
    end as im_name,
    case
        when table1.id_region != 1 then null
        else
        case
            when table1.sales_start_at is null then '1970/01/01 00:00:00'
            else table1.sales_start_at
        end
    end as sales_start_date,
    table1.category_ids,
    array_to_string(table1.intermediate_category_ids, ','),
    table1.image_url,
    table1.brand,
    table1.pdp_url,
    coalesce(ctedata2.total_visit_count, 0) as total_visit_count,
    coalesce(ctedata2.sh_visit_count, 0) as sh_visit_count,
    coalesce(ctedata2.ec_visit_count, 0) as ec_visit_count,
    coalesce(ctedata2.total_like_count, 0) as total_like_count,
    coalesce(ctedata2.sh_like_count, 0) as sh_like_count,
    coalesce(ctedata2.ec_like_count, 0) as ec_like_count,
    coalesce(ctedata2.total_order_count, 0) as total_order_count,
    coalesce(ctedata2.sh_order_count, 0) as sh_order_count,
    coalesce(ctedata2.ec_order_count, 0) as ec_order_count,
    coalesce(ctedata2.total_sales_amount, 0) as total_sales_amount,
    coalesce(ctedata2.sh_sales_amount, 0) as sh_sales_amount,
    coalesce(ctedata2.ec_sales_amount, 0) as ec_sales_amount,
    coalesce(ctedata2.ec_order_online_count, 0) as ec_order_online_count,
    coalesce(ctedata2.ec_sales_online_amount, 0) as ec_sales_online_amount,
    coalesce(ctedata2.ec_order_in_store_count, 0) as ec_order_in_store_count,
    coalesce(ctedata2.ec_sales_in_store_amount, 0) as ec_sales_in_store_amount,
    ctedata2.rank,
    ctedata2.total_count,
    table1.department,
    table1.seasons
from
    table1
left join ctedata2 on
    table1.im_name = ctedata2.im_name
    and table1.brand = ctedata2.kpibrand
where
    table1.deleted_at is null
    and table1.id_region = any('{1}')
    and lower(table1.brand) = any('{"brand1","brand2"}')
    and 'season1' = any(lower(seasons::text)::text[])
    and table1.department = 'Department1'
order by
    total_sales_amount desc offset 0
limit 100

The explain output for above query is

QUERY PLAN                                                                                                                                                                                                                                     
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit  (cost=172326.55..173435.38 rows=1 width=952) (actual time=85664.201..85665.970 rows=100 loops=1)                                                                                                                                        
  CTE ctedata1                                                                                                                                                                                                                                  
    ->  GroupAggregate  (cost=0.42..80478.71 rows=43468 width=530) (actual time=0.063..708.069 rows=73121 loops=1)                                                                                                                             
          Group Key: table2.im_name, table2.id_region, table2.brand                                                                                                                                                    
          ->  Index Scan using udx_table2_im_name_id_region_brand_target_date_key on table2  (cost=0.42..59699.18 rows=391708 width=146) (actual time=0.029..308.582 rows=391779 loops=1)                                      
                Filter: ((deleted_at IS NULL) AND (id_region = ANY ('{1}'::integer[])))                                                                                                                                                        
                Rows Removed by Filter: 20415                                                                                                                                                                                                  
  CTE ctedata2                                                                                                                                                                                                                             
    ->  WindowAgg  (cost=16104.06..17842.78 rows=43468 width=628) (actual time=1012.994..1082.057 rows=73121 loops=1)                                                                                                                          
          ->  WindowAgg  (cost=16104.06..17082.09 rows=43468 width=620) (actual time=945.755..1014.656 rows=73121 loops=1)                                                                                                                     
                ->  Sort  (cost=16104.06..16212.73 rows=43468 width=612) (actual time=945.747..963.254 rows=73121 loops=1)                                                                                                                     
                      Sort Key: ctedata1.kpiregion, ctedata1.kpibrand, (COALESCE(ctedata1.total_sales_amount, '0'::numeric)) DESC                                                                                                                 
                      Sort Method: external merge  Disk: 6536kB                                                                                                                                                                                
                      ->  CTE Scan on ctedata1  (cost=0.00..869.36 rows=43468 width=612) (actual time=0.069..824.841 rows=73121 loops=1)                                                                                                        
  ->  Result  (cost=74005.05..75113.88 rows=1 width=952) (actual time=85664.199..85665.950 rows=100 loops=1)                                                                                                                                   
        ->  Sort  (cost=74005.05..74005.05 rows=1 width=944) (actual time=85664.072..85664.089 rows=100 loops=1)                                                                                                                               
              Sort Key: (COALESCE(ctedata2.total_sales_amount, '0'::numeric)) DESC                                                                                                                                                         
              Sort Method: top-N heapsort  Memory: 76kB                                                                                                                                                                                        
              ->  WindowAgg  (cost=10960.95..74005.04 rows=1 width=944) (actual time=85658.049..85661.393 rows=3151 loops=1)                                                                                                                   
                    ->  Nested Loop Left Join  (cost=10960.95..74005.02 rows=1 width=927) (actual time=1075.219..85643.595 rows=3151 loops=1)                                                                                                  
                          Join Filter: (((table1.im_name)::text = ctedata2.im_name) AND ((table1.brand)::text = ctedata2.kpibrand))                                                                                                  
                          Rows Removed by Join Filter: 230402986                                                                                                                                                                               
                          ->  Bitmap Heap Scan on table1  (cost=10960.95..72483.64 rows=1 width=399) (actual time=45.466..278.376 rows=3151 loops=1)                                                                                          
                                Recheck Cond: (id_region = ANY ('{1}'::integer[]))                                                                                                                                                             
                                Filter: ((deleted_at IS NULL) AND (department = 'Department1'::text) AND (lower((brand)::text) = ANY ('{brand1, brand2}'::text[])) AND ('season1'::text = ANY ((lower((seasons)::text))::text[])))
                                Rows Removed by Filter: 106335                                                                                                                                                                                 
                                Heap Blocks: exact=42899                                                                                                                                                                                       
                                ->  Bitmap Index Scan on table1_im_name_id_region_key  (cost=0.00..10960.94 rows=110619 width=0) (actual time=38.307..38.307 rows=109486 loops=1)                                                             
                                      Index Cond: (id_region = ANY ('{1}'::integer[]))                                                                                                                                                         
                          ->  CTE Scan on ctedata2  (cost=0.00..869.36 rows=43468 width=592) (actual time=0.325..21.721 rows=73121 loops=3151)                                                                                             
        SubPlan 3                                                                                                                                                                                                                              
          ->  Aggregate  (cost=1108.80..1108.81 rows=1 width=8) (actual time=0.018..0.018 rows=1 loops=100)                                                                                                                                    
                ->  Nested Loop Left Join  (cost=5.57..1108.57 rows=93 width=4) (actual time=0.007..0.016 rows=3 loops=100)                                                                                                                    
                      ->  Bitmap Heap Scan on table3  (cost=5.15..350.95 rows=93 width=4) (actual time=0.005..0.008 rows=3 loops=100)                                                                                                            
                            Recheck Cond: (id_pf_item = table1.id_pf_item)                                                                                                                                                                    
                            Filter: (deleted_at IS NULL)                                                                                                                                                                                       
                            Heap Blocks: exact=107                                                                                                                                                                                             
                            ->  Bitmap Index Scan on idx_id_pf_item  (cost=0.00..5.12 rows=93 width=0) (actual time=0.003..0.003 rows=3 loops=100)                                                                                             
                                  Index Cond: (id_pf_item = table1.id_pf_item)                                                                                                                                                                
                      ->  Index Scan using index_table4_id_item on table4  (cost=0.42..8.14 rows=1 width=8) (actual time=0.002..0.002 rows=0 loops=303)                                                                        
                            Index Cond: (table3.id_item = id_item)                                                                                                                                                                               
                            Filter: (deleted_at IS NULL)                                                                                                                                                                                       
                            Rows Removed by Filter: 0                                                                                                                                                                                          
Planning time: 1.023 ms                                                                                                                                                                                                                        
Execution time: 85669.512 ms    

I changed

    and lower(table1.brand) = any('{"brand1","brand2"}')

in the query to

    and table1.brand = any('{"Brand1","Brand2"}')

and the plan changed to

QUERY PLAN                                                                                                                                                                                                                                    
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit  (cost=173137.44..188661.06 rows=14 width=952) (actual time=1444.123..1445.653 rows=100 loops=1)                                                                                                                                        
  CTE ctedata1                                                                                                                                                                                                                                 
    ->  GroupAggregate  (cost=0.42..80478.71 rows=43468 width=530) (actual time=0.040..769.982 rows=73121 loops=1)                                                                                                                            
          Group Key: table2.im_name, table2.id_region, table2.brand                                                                                                                                                   
          ->  Index Scan using udx_table2_item_im_name_id_region_brand_target_date_key on table2  (cost=0.42..59699.18 rows=391708 width=146) (actual time=0.021..350.774 rows=391779 loops=1)                                     
                Filter: ((deleted_at IS NULL) AND (id_region = ANY ('{1}'::integer[])))                                                                                                                                                       
                Rows Removed by Filter: 20415                                                                                                                                                                                                 
  CTE ctedata2                                                                                                                                                                                                                            
    ->  WindowAgg  (cost=16104.06..17842.78 rows=43468 width=628) (actual time=1088.905..1153.749 rows=73121 loops=1)                                                                                                                         
          ->  WindowAgg  (cost=16104.06..17082.09 rows=43468 width=620) (actual time=1020.017..1089.117 rows=73121 loops=1)                                                                                                                   
                ->  Sort  (cost=16104.06..16212.73 rows=43468 width=612) (actual time=1020.011..1037.170 rows=73121 loops=1)                                                                                                                  
                      Sort Key: ctedata1.kpiregion, ctedata1.kpibrand, (COALESCE(ctedata1.total_sales_amount, '0'::numeric)) DESC                                                                                                                
                      Sort Method: external merge  Disk: 6536kB                                                                                                                                                                               
                      ->  CTE Scan on ctedata1  (cost=0.00..869.36 rows=43468 width=612) (actual time=0.044..891.653 rows=73121 loops=1)                                                                                                       
  ->  Result  (cost=74815.94..90339.56 rows=14 width=952) (actual time=1444.121..1445.635 rows=100 loops=1)                                                                                                                                   
        ->  Sort  (cost=74815.94..74815.98 rows=14 width=944) (actual time=1444.053..1444.065 rows=100 loops=1)                                                                                                                               
              Sort Key: (COALESCE(ctedata2.total_sales_amount, '0'::numeric)) DESC                                                                                                                                                        
              Sort Method: top-N heapsort  Memory: 76kB                                                                                                                                                                                       
              ->  WindowAgg  (cost=72207.31..74815.68 rows=14 width=944) (actual time=1439.128..1441.885 rows=3151 loops=1)                                                                                                                   
                    ->  Hash Right Join  (cost=72207.31..74815.40 rows=14 width=927) (actual time=1307.531..1437.246 rows=3151 loops=1)                                                                                                       
                          Hash Cond: ((ctedata2.im_name = (table1.im_name)::text) AND (ctedata2.kpibrand = (table1.brand)::text))                                                                                                   
                          ->  CTE Scan on ctedata2  (cost=0.00..869.36 rows=43468 width=592) (actual time=1088.911..1209.646 rows=73121 loops=1)                                                                                          
                          ->  Hash  (cost=72207.10..72207.10 rows=14 width=399) (actual time=216.850..216.850 rows=3151 loops=1)                                                                                                              
                                Buckets: 4096 (originally 1024)  Batches: 1 (originally 1)  Memory Usage: 1249kB                                                                                                                              
                                ->  Bitmap Heap Scan on table1  (cost=10960.95..72207.10 rows=14 width=399) (actual time=46.434..214.246 rows=3151 loops=1)                                                                                  
                                      Recheck Cond: (id_region = ANY ('{1}'::integer[]))                                                                                                                                                      
                                      Filter: ((deleted_at IS NULL) AND (department = 'Department1'::text) AND ((brand)::text = ANY ('{Brand1, Brand2}'::text[])) AND ('season1'::text = ANY ((lower((seasons)::text))::text[])))
                                      Rows Removed by Filter: 106335                                                                                                                                                                          
                                      Heap Blocks: exact=42899                                                                                                                                                                                
                                      ->  Bitmap Index Scan on table1_im_name_id_region_key  (cost=0.00..10960.94 rows=110619 width=0) (actual time=34.849..34.849 rows=109486 loops=1)                                                      
                                            Index Cond: (id_region = ANY ('{1}'::integer[]))                                                                                                                                                  
        SubPlan 3                                                                                                                                                                                                                             
          ->  Aggregate  (cost=1108.80..1108.81 rows=1 width=8) (actual time=0.015..0.015 rows=1 loops=100)                                                                                                                                   
                ->  Nested Loop Left Join  (cost=5.57..1108.57 rows=93 width=4) (actual time=0.006..0.014 rows=3 loops=100)                                                                                                                   
                      ->  Bitmap Heap Scan on table3  (cost=5.15..350.95 rows=93 width=4) (actual time=0.004..0.006 rows=3 loops=100)                                                                                                           
                            Recheck Cond: (id_pf_item = table1.id_pf_item)                                                                                                                                                                   
                            Filter: (deleted_at IS NULL)                                                                                                                                                                                      
                            Heap Blocks: exact=107                                                                                                                                                                                            
                            ->  Bitmap Index Scan on idx_id_pf_item  (cost=0.00..5.12 rows=93 width=0) (actual time=0.003..0.003 rows=3 loops=100)                                                                                            
                                  Index Cond: (id_pf_item = table1.id_pf_item)                                                                                                                                                               
                      ->  Index Scan using index_table4_id_item on table4  (cost=0.42..8.14 rows=1 width=8) (actual time=0.002..0.002 rows=0 loops=303)                                                                       
                            Index Cond: (table3.id_item = id_item)                                                                                                                                                                              
                            Filter: (deleted_at IS NULL)                                                                                                                                                                                      
                            Rows Removed by Filter: 0                                                                                                                                                                                         
Planning time: 0.760 ms                                                                                                                                                                                                                       
Execution time: 1448.848 ms                                                                                                                                                                                                                                                                                                                                                                                                                                  

My Observation

The join strategy for table1 left join ctedata2 changes after the lower() function is avoided. The strategy changes from nested loop left join to hash right join.

The CTE Scan node on ctedata2 is executed only once in the better performing query.

Postgres Version 9.6

Please help me to understand this behaviour. I will supply additional info if required.

How to optimize this SQL query?

The following recommendations will help you in your SQL tuning process.
You'll find 3 sections below:

  1. Description of the steps you can take to speed up the query.
  2. The optimal indexes for this query, which you can copy and create in your database.
  3. An automatically re-written query you can copy and execute in your database.
The optimization process and recommendations:
  1. Avoid Calling Functions With Indexed Columns (query line: 127): When a function is used directly on an indexed column, the database's optimizer won’t be able to use the index. For example, if the column `brand` is indexed, the index won’t be used as it’s wrapped with the function `lower`. If you can’t find an alternative condition that won’t use a function call, a possible solution is to store the required value in a new indexed column.
  2. Avoid Calling Functions With Indexed Columns (query line: 130): When a function is used directly on an indexed column, the database's optimizer won’t be able to use the index. For example, if the column `seasons` is indexed, the index won’t be used as it’s wrapped with the function `any`. If you can’t find an alternative condition that won’t use a function call, a possible solution is to store the required value in a new indexed column.
  3. Avoid OFFSET In LIMIT Clause (query line: 135): OFFSET clauses can be very slow when used with high offsets (e.g. with high page numbers when implementing paging). Instead, use the following \u003ca target\u003d"_blank" href\u003d"http://www.eversql.com/faster-pagination-in-mysql-why-order-by-with-limit-and-offset-is-slow/"\u003eseek method\u003c/a\u003e, which provides better and more stable response rates.
  4. Avoid Selecting Unnecessary Columns (query line: 30): Avoid selecting all columns with the '*' wildcard, unless you intend to use them all. Selecting redundant columns may result in unnecessary performance degradation.
  5. Create Optimal Indexes (modified query below): The recommended indexes are an integral part of this optimization effort and should be created before testing the execution duration of the optimized query.
Optimal indexes for this query:
CREATE INDEX table1_idx_deleted_at_id_region_department ON "table1" ("deleted_at","id_region","department");
CREATE INDEX table2_idx_deleted_at_id_region ON "table2" ("deleted_at","id_region");
CREATE INDEX table3_idx_deleted_at_id_item ON "table3" ("deleted_at","id_pf_item");
CREATE INDEX table4_idx_deleted_at_id_item ON "table4" ("deleted_at","id_item");
The optimized query:
WITH ctedata1 AS (SELECT
        sum(total_visit_count) AS total_visit_count,
        sum(sh_visit_count) AS sh_visit_count,
        sum(ec_visit_count) AS ec_visit_count,
        sum(total_like_count) AS total_like_count,
        sum(sh_like_count) AS sh_like_count,
        sum(ec_like_count) AS ec_like_count,
        sum(total_order_count) AS total_order_count,
        sum(sh_order_count) AS sh_order_count,
        sum(ec_order_count) AS ec_order_count,
        sum(total_sales_amount) AS total_sales_amount,
        sum(sh_sales_amount) AS sh_sales_amount,
        sum(ec_sales_amount) AS ec_sales_amount,
        sum(ec_order_online_count) AS ec_order_online_count,
        sum(ec_sales_online_amount) AS ec_sales_online_amount,
        sum(ec_order_in_store_count) AS ec_order_in_store_count,
        sum(ec_sales_in_store_amount) AS ec_sales_in_store_amount,
        table2.im_name,
        table2.brand AS kpibrand,
        table2.id_region AS kpiregion 
    FROM
        table2 
    WHERE
        table2.deleted_at IS NULL 
        AND table2.id_region = any('{1}') 
    GROUP BY
        table2.im_name,
        kpiregion,
        kpibrand), ctedata2 AS (SELECT
        ctedata1.*,
        rank() OVER (PARTITION 
    BY
        (kpiregion,
        kpibrand) 
    ORDER BY
        coalesce(ctedata1.total_sales_amount,
        0) DESC) rank,
        count(*) OVER (PARTITION 
    BY
        (kpiregion,
        kpibrand) ) AS total_count 
    FROM
        ctedata1) SELECT
        table1.id_pf_item,
        table1.product_id,
        table1.color_code,
        table1.l1_code,
        table1.local_title AS product_name,
        table1.id_region,
        table1.gender,
        CASE 
            WHEN table1.created_at IS NULL THEN '1970/01/01 00:00:00' 
            ELSE table1.created_at END AS created_at,
(SELECT
    count(DISTINCT id_outfit) 
FROM
    table3 
LEFT JOIN
    table4 
        ON table3.id_item = table4.id_item 
        AND table4.deleted_at IS NULL 
WHERE
    table3.deleted_at IS NULL 
    AND table3.id_pf_item = table1.id_pf_item) AS outfit_count,
count(*) OVER () AS total_matched,
CASE 
    WHEN table1.v8_im_name = '' THEN table1.im_name 
    ELSE table1.v8_im_name END AS im_name,
CASE 
    WHEN table1.id_region != 1 THEN NULL 
    ELSE CASE 
        WHEN table1.sales_start_at IS NULL THEN '1970/01/01 00:00:00' 
        ELSE table1.sales_start_at END END AS sales_start_date,
table1.category_ids,
array_to_string(table1.intermediate_category_ids,
','),
table1.image_url,
table1.brand,
table1.pdp_url,
coalesce(ctedata2.total_visit_count,
0) AS total_visit_count,
coalesce(ctedata2.sh_visit_count,
0) AS sh_visit_count,
coalesce(ctedata2.ec_visit_count,
0) AS ec_visit_count,
coalesce(ctedata2.total_like_count,
0) AS total_like_count,
coalesce(ctedata2.sh_like_count,
0) AS sh_like_count,
coalesce(ctedata2.ec_like_count,
0) AS ec_like_count,
coalesce(ctedata2.total_order_count,
0) AS total_order_count,
coalesce(ctedata2.sh_order_count,
0) AS sh_order_count,
coalesce(ctedata2.ec_order_count,
0) AS ec_order_count,
coalesce(ctedata2.total_sales_amount,
0) AS total_sales_amount,
coalesce(ctedata2.sh_sales_amount,
0) AS sh_sales_amount,
coalesce(ctedata2.ec_sales_amount,
0) AS ec_sales_amount,
coalesce(ctedata2.ec_order_online_count,
0) AS ec_order_online_count,
coalesce(ctedata2.ec_sales_online_amount,
0) AS ec_sales_online_amount,
coalesce(ctedata2.ec_order_in_store_count,
0) AS ec_order_in_store_count,
coalesce(ctedata2.ec_sales_in_store_amount,
0) AS ec_sales_in_store_amount,
ctedata2.rank,
ctedata2.total_count,
table1.department,
table1.seasons 
FROM
table1 
LEFT JOIN
ctedata2 
    ON table1.im_name = ctedata2.im_name 
    AND table1.brand = ctedata2.kpibrand 
WHERE
table1.deleted_at IS NULL 
AND table1.id_region = any(
    '{1}'
) 
AND lower(table1.brand) = any(
    '{"brand1","brand2"}'
) 
AND 'season1' = any(
    lower(table1.seasons::text)::text[]
) 
AND table1.department = 'Department1' 
ORDER BY
total_sales_amount DESC LIMIT 100 OFFSET 0

Related Articles



* original question posted on StackOverflow here.