mirror of
https://github.com/postgres/postgres.git
synced 2026-03-23 02:43:22 -04:00
Consider startup cost as a figure of merit for partial paths.
Previously, the comments stated that there was no purpose to considering
startup cost for partial paths, but this is not the case: it's perfectly
reasonable to want a fast-start path for a plan that involves a LIMIT
(perhaps over an aggregate, so that there is enough data being processed
to justify parallel query but yet we don't want all the result rows).
Accordingly, rewrite add_partial_path and add_partial_path_precheck to
consider startup costs. This also fixes an independent bug in
add_partial_path_precheck: commit e222534679
failed to update it to do anything with the new disabled_nodes field.
That bug fix is formally separate from the rest of this patch and could
be committed separately, but I think it makes more sense to fix both
issues together, because then we can (as this commit does) just make
add_partial_path_precheck do the cost comparisons in the same way as
compare_path_costs_fuzzily, which hopefully reduces the chances of
ending up with something that's still incorrect.
This patch is based on earlier work on this topic by Tomas Vondra,
but I have rewritten a great deal of it.
Co-authored-by: Robert Haas <rhaas@postgresql.org>
Co-authored-by: Tomas Vondra <tomas@vondra.me>
Discussion: http://postgr.es/m/CA+TgmobRufbUSksBoxytGJS1P+mQY4rWctCk-d0iAUO6-k9Wrg@mail.gmail.com
This commit is contained in:
parent
ffc226ab64
commit
8300d3ad4a
6 changed files with 135 additions and 90 deletions
|
|
@ -1048,6 +1048,7 @@ try_partial_nestloop_path(PlannerInfo *root,
|
|||
initial_cost_nestloop(root, &workspace, jointype, nestloop_subtype,
|
||||
outer_path, inner_path, extra);
|
||||
if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
|
||||
workspace.startup_cost,
|
||||
workspace.total_cost, pathkeys))
|
||||
return;
|
||||
|
||||
|
|
@ -1237,6 +1238,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
|
|||
extra);
|
||||
|
||||
if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
|
||||
workspace.startup_cost,
|
||||
workspace.total_cost, pathkeys))
|
||||
return;
|
||||
|
||||
|
|
@ -1369,6 +1371,7 @@ try_partial_hashjoin_path(PlannerInfo *root,
|
|||
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
|
||||
outer_path, inner_path, extra, parallel_hash);
|
||||
if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
|
||||
workspace.startup_cost,
|
||||
workspace.total_cost, NIL))
|
||||
return;
|
||||
|
||||
|
|
|
|||
|
|
@ -778,10 +778,9 @@ add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
|
|||
*
|
||||
* Because we don't consider parameterized paths here, we also don't
|
||||
* need to consider the row counts as a measure of quality: every path will
|
||||
* produce the same number of rows. Neither do we need to consider startup
|
||||
* costs: parallelism is only used for plans that will be run to completion.
|
||||
* Therefore, this routine is much simpler than add_path: it needs to
|
||||
* consider only disabled nodes, pathkeys and total cost.
|
||||
* produce the same number of rows. However, we do need to consider the
|
||||
* startup costs: this partial path could be used beneath a Limit node,
|
||||
* so a fast-start plan could be correct.
|
||||
*
|
||||
* As with add_path, we pfree paths that are found to be dominated by
|
||||
* another partial path; this requires that there be no other references to
|
||||
|
|
@ -819,52 +818,41 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
|||
/* Compare pathkeys. */
|
||||
keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys);
|
||||
|
||||
/* Unless pathkeys are incompatible, keep just one of the two paths. */
|
||||
/*
|
||||
* Unless pathkeys are incompatible, see if one of the paths dominates
|
||||
* the other (both in startup and total cost). It may happen that one
|
||||
* path has lower startup cost, the other has lower total cost.
|
||||
*/
|
||||
if (keyscmp != PATHKEYS_DIFFERENT)
|
||||
{
|
||||
if (unlikely(new_path->disabled_nodes != old_path->disabled_nodes))
|
||||
PathCostComparison costcmp;
|
||||
|
||||
/*
|
||||
* Do a fuzzy cost comparison with standard fuzziness limit.
|
||||
*/
|
||||
costcmp = compare_path_costs_fuzzily(new_path, old_path,
|
||||
STD_FUZZ_FACTOR);
|
||||
if (costcmp == COSTS_BETTER1)
|
||||
{
|
||||
if (new_path->disabled_nodes > old_path->disabled_nodes)
|
||||
accept_new = false;
|
||||
else
|
||||
remove_old = true;
|
||||
}
|
||||
else if (new_path->total_cost > old_path->total_cost
|
||||
* STD_FUZZ_FACTOR)
|
||||
{
|
||||
/* New path costs more; keep it only if pathkeys are better. */
|
||||
if (keyscmp != PATHKEYS_BETTER1)
|
||||
accept_new = false;
|
||||
}
|
||||
else if (old_path->total_cost > new_path->total_cost
|
||||
* STD_FUZZ_FACTOR)
|
||||
{
|
||||
/* Old path costs more; keep it only if pathkeys are better. */
|
||||
if (keyscmp != PATHKEYS_BETTER2)
|
||||
remove_old = true;
|
||||
}
|
||||
else if (keyscmp == PATHKEYS_BETTER1)
|
||||
else if (costcmp == COSTS_BETTER2)
|
||||
{
|
||||
/* Costs are about the same, new path has better pathkeys. */
|
||||
remove_old = true;
|
||||
if (keyscmp != PATHKEYS_BETTER1)
|
||||
accept_new = false;
|
||||
}
|
||||
else if (keyscmp == PATHKEYS_BETTER2)
|
||||
else if (costcmp == COSTS_EQUAL)
|
||||
{
|
||||
/* Costs are about the same, old path has better pathkeys. */
|
||||
accept_new = false;
|
||||
}
|
||||
else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
|
||||
{
|
||||
/* Pathkeys are the same, and the old path costs more. */
|
||||
remove_old = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Pathkeys are the same, and new path isn't materially
|
||||
* cheaper.
|
||||
*/
|
||||
accept_new = false;
|
||||
if (keyscmp == PATHKEYS_BETTER1)
|
||||
remove_old = true;
|
||||
else if (keyscmp == PATHKEYS_BETTER2)
|
||||
accept_new = false;
|
||||
else if (compare_path_costs_fuzzily(new_path, old_path,
|
||||
1.0000000001) == COSTS_BETTER1)
|
||||
remove_old = true;
|
||||
else
|
||||
accept_new = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -915,16 +903,16 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
|
|||
* add_partial_path_precheck
|
||||
* Check whether a proposed new partial path could possibly get accepted.
|
||||
*
|
||||
* Unlike add_path_precheck, we can ignore startup cost and parameterization,
|
||||
* since they don't matter for partial paths (see add_partial_path). But
|
||||
* we do want to make sure we don't add a partial path if there's already
|
||||
* a complete path that dominates it, since in that case the proposed path
|
||||
* is surely a loser.
|
||||
* Unlike add_path_precheck, we can ignore parameterization, since it doesn't
|
||||
* matter for partial paths (see add_partial_path). But we do want to make
|
||||
* sure we don't add a partial path if there's already a complete path that
|
||||
* dominates it, since in that case the proposed path is surely a loser.
|
||||
*/
|
||||
bool
|
||||
add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
|
||||
Cost total_cost, List *pathkeys)
|
||||
Cost startup_cost, Cost total_cost, List *pathkeys)
|
||||
{
|
||||
bool consider_startup = parent_rel->consider_startup;
|
||||
ListCell *p1;
|
||||
|
||||
/*
|
||||
|
|
@ -934,25 +922,81 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
|
|||
* is clearly superior to some existing partial path -- at least, modulo
|
||||
* final cost computations. If so, we definitely want to consider it.
|
||||
*
|
||||
* Unlike add_path(), we always compare pathkeys here. This is because we
|
||||
* expect partial_pathlist to be very short, and getting a definitive
|
||||
* answer at this stage avoids the need to call add_path_precheck.
|
||||
* Unlike add_path(), we never try to exit this loop early. This is
|
||||
* because we expect partial_pathlist to be very short, and getting a
|
||||
* definitive answer at this stage avoids the need to call
|
||||
* add_path_precheck.
|
||||
*/
|
||||
foreach(p1, parent_rel->partial_pathlist)
|
||||
{
|
||||
Path *old_path = (Path *) lfirst(p1);
|
||||
PathCostComparison costcmp;
|
||||
PathKeysComparison keyscmp;
|
||||
|
||||
keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
|
||||
if (keyscmp != PATHKEYS_DIFFERENT)
|
||||
/*
|
||||
* First, compare costs and disabled nodes. This logic should be
|
||||
* identical to compare_path_costs_fuzzily, except that one of the
|
||||
* paths hasn't been created yet, and the fuzz factor is always
|
||||
* STD_FUZZ_FACTOR.
|
||||
*/
|
||||
if (unlikely(old_path->disabled_nodes != disabled_nodes))
|
||||
{
|
||||
if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
|
||||
keyscmp != PATHKEYS_BETTER1)
|
||||
return false;
|
||||
if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
|
||||
keyscmp != PATHKEYS_BETTER2)
|
||||
return true;
|
||||
if (disabled_nodes < old_path->disabled_nodes)
|
||||
costcmp = COSTS_BETTER1;
|
||||
else
|
||||
costcmp = COSTS_BETTER2;
|
||||
}
|
||||
else if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
|
||||
{
|
||||
if (consider_startup &&
|
||||
old_path->startup_cost > startup_cost * STD_FUZZ_FACTOR)
|
||||
costcmp = COSTS_DIFFERENT;
|
||||
else
|
||||
costcmp = COSTS_BETTER2;
|
||||
}
|
||||
else if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR)
|
||||
{
|
||||
if (consider_startup &&
|
||||
startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR)
|
||||
costcmp = COSTS_DIFFERENT;
|
||||
else
|
||||
costcmp = COSTS_BETTER1;
|
||||
}
|
||||
else if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR)
|
||||
costcmp = COSTS_BETTER2;
|
||||
else if (old_path->startup_cost > startup_cost * STD_FUZZ_FACTOR)
|
||||
costcmp = COSTS_BETTER1;
|
||||
else
|
||||
costcmp = COSTS_EQUAL;
|
||||
|
||||
/*
|
||||
* If one path wins on startup cost and the other on total cost, we
|
||||
* can't say for sure which is better.
|
||||
*/
|
||||
if (costcmp == COSTS_DIFFERENT)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the two paths have different pathkeys, we can't say for sure
|
||||
* which is better.
|
||||
*/
|
||||
keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
|
||||
if (keyscmp == PATHKEYS_DIFFERENT)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the existing path is cheaper and the pathkeys are equal or
|
||||
* worse, the new path is not interesting.
|
||||
*/
|
||||
if (costcmp == COSTS_BETTER2 && keyscmp != PATHKEYS_BETTER1)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the new path is cheaper and the pathkeys are equal or better, it
|
||||
* is definitely interesting.
|
||||
*/
|
||||
if (costcmp == COSTS_BETTER1 && keyscmp != PATHKEYS_BETTER2)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -960,14 +1004,9 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
|
|||
* clearly good enough that it might replace one. Compare it to
|
||||
* non-parallel plans. If it loses even before accounting for the cost of
|
||||
* the Gather node, we should definitely reject it.
|
||||
*
|
||||
* Note that we pass the total_cost to add_path_precheck twice. This is
|
||||
* because it's never advantageous to consider the startup cost of a
|
||||
* partial path; the resulting plans, if run in parallel, will be run to
|
||||
* completion.
|
||||
*/
|
||||
if (!add_path_precheck(parent_rel, disabled_nodes, total_cost, total_cost,
|
||||
pathkeys, NULL))
|
||||
if (!add_path_precheck(parent_rel, disabled_nodes, startup_cost,
|
||||
total_cost, pathkeys, NULL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ extern bool add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
|
|||
List *pathkeys, Relids required_outer);
|
||||
extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
|
||||
extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
|
||||
int disabled_nodes,
|
||||
int disabled_nodes, Cost startup_cost,
|
||||
Cost total_cost, List *pathkeys);
|
||||
|
||||
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
|
||||
|
|
|
|||
|
|
@ -1450,21 +1450,23 @@ explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1
|
|||
|
||||
set enable_incremental_sort = on;
|
||||
explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------
|
||||
Limit
|
||||
-> Incremental Sort
|
||||
Sort Key: a, b, (sum(c))
|
||||
Presorted Key: a, b
|
||||
-> GroupAggregate
|
||||
-> Finalize GroupAggregate
|
||||
Group Key: a, b
|
||||
-> Gather Merge
|
||||
Workers Planned: 2
|
||||
-> Incremental Sort
|
||||
Sort Key: a, b
|
||||
Presorted Key: a
|
||||
-> Parallel Index Scan using t_a_idx on t
|
||||
(12 rows)
|
||||
-> Partial GroupAggregate
|
||||
Group Key: a, b
|
||||
-> Incremental Sort
|
||||
Sort Key: a, b
|
||||
Presorted Key: a
|
||||
-> Parallel Index Scan using t_a_idx on t
|
||||
(14 rows)
|
||||
|
||||
-- Incremental sort vs. set operations with varno 0
|
||||
set enable_hashagg to off;
|
||||
|
|
@ -1580,8 +1582,8 @@ from tenk1 t1
|
|||
join tenk1 t2 on t1.unique1 = t2.unique2
|
||||
join tenk1 t3 on t2.unique1 = t3.unique1
|
||||
order by count(*);
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------------------------
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------------------------------
|
||||
Sort
|
||||
Sort Key: (count(*))
|
||||
-> Finalize Aggregate
|
||||
|
|
@ -1591,10 +1593,10 @@ order by count(*);
|
|||
-> Parallel Hash Join
|
||||
Hash Cond: (t2.unique1 = t3.unique1)
|
||||
-> Parallel Hash Join
|
||||
Hash Cond: (t1.unique1 = t2.unique2)
|
||||
-> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
|
||||
Hash Cond: (t2.unique2 = t1.unique1)
|
||||
-> Parallel Index Scan using tenk1_unique2 on tenk1 t2
|
||||
-> Parallel Hash
|
||||
-> Parallel Index Scan using tenk1_unique2 on tenk1 t2
|
||||
-> Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
|
||||
-> Parallel Hash
|
||||
-> Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
|
||||
(15 rows)
|
||||
|
|
|
|||
|
|
@ -76,8 +76,8 @@ insert into extremely_skewed
|
|||
update pg_class
|
||||
set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
|
||||
where relname = 'extremely_skewed';
|
||||
-- Make a relation with a couple of enormous tuples.
|
||||
create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
|
||||
-- Make a relation with several enormous tuples.
|
||||
create table wide as select generate_series(1, 3) as id, rpad('', 320000, 'x') as t;
|
||||
alter table wide set (parallel_workers = 2);
|
||||
-- The "optimal" case: the hash table fits in memory; we plan for 1
|
||||
-- batch, we stick to that number, and peak memory usage stays within
|
||||
|
|
@ -922,7 +922,7 @@ set work_mem = '128kB';
|
|||
set hash_mem_multiplier = 1.0;
|
||||
explain (costs off)
|
||||
select length(max(s.t))
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------
|
||||
Finalize Aggregate
|
||||
|
|
@ -934,10 +934,11 @@ explain (costs off)
|
|||
-> Parallel Seq Scan on wide
|
||||
-> Parallel Hash
|
||||
-> Parallel Seq Scan on wide wide_1
|
||||
(9 rows)
|
||||
Filter: (id < 3)
|
||||
(10 rows)
|
||||
|
||||
select length(max(s.t))
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
|
||||
length
|
||||
--------
|
||||
320000
|
||||
|
|
@ -947,7 +948,7 @@ select final > 1 as multibatch
|
|||
from hash_join_batches(
|
||||
$$
|
||||
select length(max(s.t))
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
|
||||
$$);
|
||||
multibatch
|
||||
------------
|
||||
|
|
|
|||
|
|
@ -83,8 +83,8 @@ update pg_class
|
|||
set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
|
||||
where relname = 'extremely_skewed';
|
||||
|
||||
-- Make a relation with a couple of enormous tuples.
|
||||
create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
|
||||
-- Make a relation with several enormous tuples.
|
||||
create table wide as select generate_series(1, 3) as id, rpad('', 320000, 'x') as t;
|
||||
alter table wide set (parallel_workers = 2);
|
||||
|
||||
-- The "optimal" case: the hash table fits in memory; we plan for 1
|
||||
|
|
@ -496,14 +496,14 @@ set work_mem = '128kB';
|
|||
set hash_mem_multiplier = 1.0;
|
||||
explain (costs off)
|
||||
select length(max(s.t))
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
|
||||
select length(max(s.t))
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
|
||||
select final > 1 as multibatch
|
||||
from hash_join_batches(
|
||||
$$
|
||||
select length(max(s.t))
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
|
||||
from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
|
||||
$$);
|
||||
rollback to settings;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue