Database version: PostgreSQL 12.6
I have a table with 600,000 records.
The table has the columns:
- name (varchar)
- location_type (int) enum values: (1,2,3)
- ancestry (varchar)
Indexes:
- ancestry (btree)
The ancestry column is a way to build a tree where every row has an ancestry containing all parent ids separated by '/'.
Consider the following example:
| id | name | ancestry |
|---|---|---|
| 1 | root | null |
| 5 | node | '1' |
| 12 | node | '1/5' |
| 22 | leaf | '1/5/12' |
The following query takes 686 ms to execute:
SELECT * FROM geolocations
WHERE EXISTS (
SELECT 1 FROM geolocations g2
WHERE g2.ancestry =
CONCAT(geolocations.ancestry, '/', geolocations.id)
)
This query runs in 808 ms:
SELECT * FROM geolocations
WHERE location_type = 2
When combining both queried with an OR, it takes around 4475 ms to finish if it ever finishes.
SELECT * FROM geolocations
WHERE EXISTS (
SELECT 1 FROM geolocations g2
WHERE g2.ancestry =
CONCAT(geolocations.ancestry, '/', geolocations.id)
) OR location_type = 2
Explain:
[
{
"Plan": {
"Node Type": "Seq Scan",
"Parallel Aware": false,
"Relation Name": "geolocations",
"Alias": "geolocations",
"Startup Cost": 0,
"Total Cost": 2760473.54,
"Plan Rows": 582910,
"Plan Width": 68,
"Filter": "((SubPlan 1) OR (location_type = 2))",
"Plans": [
{
"Node Type": "Index Only Scan",
"Parent Relationship": "SubPlan",
"Subplan Name": "SubPlan 1",
"Parallel Aware": false,
"Scan Direction": "Forward",
"Index Name": "index_geolocations_on_ancestry",
"Relation Name": "geolocations",
"Alias": "g2",
"Startup Cost": 0.43,
"Total Cost": 124.91,
"Plan Rows": 30,
"Plan Width": 0,
"Index Cond": "(ancestry = concat(geolocations.ancestry, '/', geolocations.id))"
}
]
},
"JIT": {
"Worker Number": -1,
"Functions": 8,
"Options": {
"Inlining": true,
"Optimization": true,
"Expressions": true,
"Deforming": true
}
}
}
]
While combining them with a union takes 1916 ms:
SELECT * FROM geolocations
WHERE EXISTS (
SELECT 1 FROM geolocations g2
WHERE g2.ancestry =
CONCAT(geolocations.ancestry, '/', geolocations.id)
) UNION SELECT * FROM geolocations WHERE location_type = 2
Explain
[
{
"Plan": {
"Node Type": "Unique",
"Parallel Aware": false,
"Startup Cost": 308693.44,
"Total Cost": 332506.74,
"Plan Rows": 865938,
"Plan Width": 188,
"Plans": [
{
"Node Type": "Sort",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 308693.44,
"Total Cost": 310858.29,
"Plan Rows": 865938,
"Plan Width": 188,
"Sort Key": [
"geolocations.id",
"geolocations.name",
"geolocations.location_type",
"geolocations.pricing",
"geolocations.ancestry",
"geolocations.geolocationable_id",
"geolocations.geolocationable_type",
"geolocations.created_at",
"geolocations.updated_at",
"geolocations.info"
],
"Plans": [
{
"Node Type": "Append",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 15851.41,
"Total Cost": 63464.05,
"Plan Rows": 865938,
"Plan Width": 188,
"Subplans Removed": 0,
"Plans": [
{
"Node Type": "Hash Join",
"Parent Relationship": "Member",
"Parallel Aware": false,
"Join Type": "Inner",
"Startup Cost": 15851.41,
"Total Cost": 35074.94,
"Plan Rows": 299882,
"Plan Width": 68,
"Inner Unique": true,
"Hash Cond": "(concat(geolocations.ancestry, '/', geolocations.id) = (g2.ancestry)::text)",
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Relation Name": "geolocations",
"Alias": "geolocations",
"Startup Cost": 0,
"Total Cost": 13900.63,
"Plan Rows": 599763,
"Plan Width": 68
},
{
"Node Type": "Hash",
"Parent Relationship": "Inner",
"Parallel Aware": false,
"Startup Cost": 15600.65,
"Total Cost": 15600.65,
"Plan Rows": 20061,
"Plan Width": 12,
"Plans": [
{
"Node Type": "Aggregate",
"Strategy": "Hashed",
"Partial Mode": "Simple",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Startup Cost": 15400.04,
"Total Cost": 15600.65,
"Plan Rows": 20061,
"Plan Width": 12,
"Group Key": [
"(g2.ancestry)::text"
],
"Plans": [
{
"Node Type": "Seq Scan",
"Parent Relationship": "Outer",
"Parallel Aware": false,
"Relation Name": "geolocations",
"Alias": "g2",
"Startup Cost": 0,
"Total Cost": 13900.63,
"Plan Rows": 599763,
"Plan Width": 12
}
]
}
]
}
]
},
{
"Node Type": "Seq Scan",
"Parent Relationship": "Member",
"Parallel Aware": false,
"Relation Name": "geolocations",
"Alias": "geolocations_1",
"Startup Cost": 0,
"Total Cost": 15400.04,
"Plan Rows": 566056,
"Plan Width": 68,
"Filter": "(location_type = 2)"
}
]
}
]
}
]
},
"JIT": {
"Worker Number": -1,
"Functions": 15,
"Options": {
"Inlining": false,
"Optimization": false,
"Expressions": true,
"Deforming": true
}
}
}
]
Why does PostgreSQL execute the OR query much slower?