摘要:
2024-05-06 monetdb-空值拒绝-记录.
相关上下文:2022-10-12 mysql-外连接转内连接-空值拒绝-分析_mysql优化器什么时候会把外连接变成内连接-CSDN博客
参考:
Mysql 优化器之外连接消除笔记 - 简书
SQL优化--逻辑优化--外连接、嵌套连接与连接消除_数据库前沿资讯 | 和通数据库
MySQL之基于规则的优化特性(二) - 墨天轮
逻辑优化-空值拒绝
- 在外连接中,如果在连接条件里,存在将NULL值过滤掉的条件,则 将外连接转换为内连接。
- 所谓空值,指代的是特殊值NULL。
- 理解这个转换规则,需要明白连接的语义,以及外连接和内连接的区别。
- 从结果和关系代数上,都是非常清晰的逻辑。
- 那么为什么要转换为内连接呢? 答案自然是利用内连接的一些优化规则,例如只有在内连接时,才可以交换不同表的连接顺序。
- 本文以monetdb为例,再次展示空值拒绝的语义,以及以及空值拒绝对外连接转为内连接的具体的分析。
DDL:
代码文件: monetdb-dev/trunk/profill/proifill_null_reject.sql at main · adofsauron/monetdb-dev (github.com)
表结构和插入数据:
create table t1( a int, b int );
create table t2( a int, b int );
insert into t1(a,b) values(3,4);
insert into t1(a,b) values(5,6);
insert into t2(a,b) values(3,1);
insert into t2(a,b) values(7,2);
外连接-不加空值拒绝:
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a;
外连接-加空值拒绝:
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;
等价的内连接:
SELECT * FROM t1 INNER JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;
外连接-不加空值拒绝-分析:
查看执行计划:
PLAN
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a;
sql>PLAN
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a;
more>+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| rel |
+==============================================================================================================================================================================================+
| project ( |
| | left outer join ( |
| | | table("sys"."t1") [ "t1"."a" UNIQUE MIN "3" MAX "5" NUNIQUES 2.000000, "t1"."b" UNIQUE MIN "4" MAX "6" NUNIQUES 2.000000 ] COUNT 2, |
| | | table("sys"."t2") [ "t2"."a" UNIQUE MIN "3" MAX "7" NUNIQUES 2.000000, "t2"."b" UNIQUE MIN "1" MAX "2" NUNIQUES 2.000000 ] COUNT 2 |
| | ) [ ("t2"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "7") = ("t1"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5") ] COUNT 2 |
| ) [ "t1"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t1"."b" NUNIQUES 2.000000 MIN "4" MAX "6", "t2"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t2"."b" NUNIQUES 2.000000 MIN "1" MAX |
: "2" ] COUNT 2 :
| push_project_down 0 actions 1 usec |
| merge_projects 0 actions 0 usec |
| push_project_up 0 actions 0 usec |
| split_project 0 actions 1 usec |
| remove_redundant_join 0 actions 0 usec |
| simplify_math 0 actions 0 usec |
| optimize_exps 0 actions 1 usec |
| optimize_select_and_joins_bottomup 0 actions 2 usec |
| project_reduce_casts 0 actions 0 usec |
| optimize_projections 0 actions 1 usec |
| optimize_joins 0 actions 2 usec |
| join_order 0 actions 6 usec |
| optimize_semi_and_anti 0 actions 1 usec |
| optimize_select_and_joins_topdown 0 actions 3 usec |
| dce 0 actions 5 usec |
| push_func_and_select_down 0 actions 1 usec |
| get_statistics 0 actions 55 usec |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
23 tuples
查看列的物理执行:
EXPLAIN
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a;
sql>EXPLAIN
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a;
more>+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| mal |
+================================================================================================================================================================================+
| function user.main():void; |
| X_1:void := querylog.define("explain\nselect * from t1 left join t2 on t2.a = t1.a;":str, "default_pipe":str, 52:int); |
| barrier X_140:bit := language.dataflow(); |
| X_4:int := sql.mvc(); |
| C_5:bat[:oid] := sql.tid(X_4:int, "sys":str, "t1":str); |
| X_8:bat[:int] := sql.bind(X_4:int, "sys":str, "t1":str, "a":str, 0:int); |
| X_15:bat[:int] := sql.bind(X_4:int, "sys":str, "t1":str, "b":str, 0:int); |
| C_20:bat[:oid] := sql.tid(X_4:int, "sys":str, "t2":str); |
| X_22:bat[:int] := sql.bind(X_4:int, "sys":str, "t2":str, "a":str, 0:int); |
| X_28:bat[:int] := sql.bind(X_4:int, "sys":str, "t2":str, "b":str, 0:int); |
| X_34:bat[:int] := algebra.projection(C_5:bat[:oid], X_8:bat[:int]); |
| X_35:bat[:int] := algebra.projection(C_5:bat[:oid], X_15:bat[:int]); |
| X_142:void := language.pass(C_5:bat[:oid]); |
| X_36:bat[:int] := algebra.projection(C_20:bat[:oid], X_22:bat[:int]); |
| (X_38:bat[:oid], X_39:bat[:oid]) := algebra.join(X_34:bat[:int], X_36:bat[:int], nil:BAT, nil:BAT, false:bit, nil:lng); |
| C_44:bat[:oid] := bat.mirror(X_34:bat[:int]); |
| C_45:bat[:oid] := algebra.difference(C_44:bat[:oid], X_38:bat[:oid], nil:BAT, nil:BAT, false:bit, false:bit, nil:lng); |
| X_46:bat[:int] := algebra.projection(X_38:bat[:oid], X_34:bat[:int]); |
| X_47:bat[:int] := bat.new(nil:int); |
| X_49:bat[:int] := bat.append(X_47:bat[:int], X_46:bat[:int], true:bit); |
| X_51:bat[:int] := algebra.projection(C_45:bat[:oid], X_34:bat[:int]); |
| X_143:void := language.pass(X_34:bat[:int]); |
| X_52:bat[:int] := bat.append(X_49:bat[:int], X_51:bat[:int], true:bit); |
| X_53:bat[:int] := algebra.projection(X_38:bat[:oid], X_35:bat[:int]); |
| X_144:void := language.pass(X_38:bat[:oid]); |
| X_54:bat[:int] := bat.new(nil:int); |
| X_55:bat[:int] := bat.append(X_54:bat[:int], X_53:bat[:int], true:bit); |
| X_56:bat[:int] := algebra.projection(C_45:bat[:oid], X_35:bat[:int]); |
| X_145:void := language.pass(X_35:bat[:int]); |
| X_57:bat[:int] := bat.append(X_55:bat[:int], X_56:bat[:int], true:bit); |
| X_58:bat[:int] := algebra.projection(X_39:bat[:oid], X_36:bat[:int]); |
| X_146:void := language.pass(X_36:bat[:int]); |
| X_59:bat[:int] := bat.new(nil:int); |
| X_60:bat[:int] := bat.append(X_59:bat[:int], X_58:bat[:int], true:bit); |
| X_62:bat[:int] := algebra.project(C_45:bat[:oid], nil:int); |
| X_63:bat[:int] := bat.append(X_60:bat[:int], X_62:bat[:int], true:bit); |
| X_64:bat[:int] := algebra.projectionpath(X_39:bat[:oid], C_20:bat[:oid], X_28:bat[:int]); |
| X_147:void := language.pass(X_39:bat[:oid]); |
| X_148:void := language.pass(C_20:bat[:oid]); |
| X_65:bat[:int] := bat.new(nil:int); |
| X_67:bat[:int] := bat.append(X_65:bat[:int], X_64:bat[:int], true:bit); |
| X_70:bat[:int] := algebra.project(C_45:bat[:oid], nil:int); |
| X_149:void := language.pass(C_45:bat[:oid]); |
| X_71:bat[:int] := bat.append(X_67:bat[:int], X_70:bat[:int], true:bit); |
| X_73:bat[:str] := bat.pack(".t1":str, ".t1":str, ".t2":str, ".t2":str); |
| X_74:bat[:str] := bat.pack("a":str, "b":str, "a":str, "b":str); |
| X_75:bat[:str] := bat.pack("int":str, "int":str, "int":str, "int":str); |
| X_76:bat[:int] := bat.pack(32:int, 32:int, 32:int, 32:int); |
| X_77:bat[:int] := bat.pack(0:int, 0:int, 0:int, 0:int); |
| exit X_140:bit; |
| X_72:int := sql.resultSet(X_73:bat[:str], X_74:bat[:str], X_75:bat[:str], X_76:bat[:int], X_77:bat[:int], X_52:bat[:int], X_57:bat[:int], X_63:bat[:int], X_71:bat[:int]); |
| end user.main; |
| # optimizer.inline(0:int, 1:lng) |
| # optimizer.remap(0:int, 2:lng) |
| # optimizer.costModel(1:int, 2:lng) |
| # optimizer.coercions(0:int, 3:lng) |
| # optimizer.aliases(2:int, 6:lng) |
| # optimizer.evaluate(0:int, 5:lng) |
| # optimizer.emptybind(4:int, 8:lng) |
| # optimizer.deadcode(4:int, 9:lng) |
| # optimizer.pushselect(0:int, 3:lng) |
| # optimizer.aliases(4:int, 5:lng) |
| # optimizer.for(0:int, 0:lng) |
| # optimizer.dict(0:int, 3:lng) |
| # optimizer.mitosis(0:int, 10:lng) |
| # optimizer.mergetable(0:int, 11:lng) |
| # optimizer.bincopyfrom(0:int, 1:lng) |
| # optimizer.aliases(0:int, 0:lng) |
| # optimizer.constants(3:int, 7:lng) |
| # optimizer.commonTerms(0:int, 7:lng) |
| # optimizer.projectionpath(1:int, 24:lng) |
| # optimizer.deadcode(1:int, 6:lng) |
| # optimizer.matpack(0:int, 1:lng) |
| # optimizer.reorder(1:int, 7:lng) |
| # optimizer.dataflow(1:int, 15:lng) |
| # optimizer.querylog(0:int, 1:lng) |
| # optimizer.multiplex(0:int, 0:lng) |
| # optimizer.generator(0:int, 2:lng) |
| # optimizer.candidates(1:int, 1:lng) |
| # optimizer.deadcode(0:int, 4:lng) |
| # optimizer.postfix(0:int, 5:lng) |
| # optimizer.wlc(0:int, 1:lng) |
| # optimizer.profiler(0:int, 0:lng) |
| # optimizer.garbageCollector(1:int, 14:lng) |
| # optimizer.total(32:int, 212:lng) |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
85 tuples
执行结果:
sql>SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a;
+------+------+------+------+
| a | b | a | b |
+======+======+======+======+
| 3 | 4 | 3 | 1 |
| 5 | 6 | null | null |
+------+------+------+------+
2 tuples
分析:
- 结果中存在空值,无法使用空值拒绝的逻辑转换规则
- 执行计划是使用的 left outer join
- 条件是:
- [ "t1"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t1"."b" NUNIQUES 2.000000 MIN "4" MAX "6", "t2"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t2"."b" NUNIQUES 2.000000 MIN "1" MAX |
: "2" ]
外连接-加空值拒绝:
查看执行计划:
sql>PLAN
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;more>
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| rel |
+==============================================================================================================================================================================================+
| project ( |
| | join ( |
| | | select ( |
| | | | table("sys"."t2") [ "t2"."a" UNIQUE MIN "3" MAX "7" NUNIQUES 2.000000, "t2"."b" UNIQUE MIN "1" MAX "2" NUNIQUES 2.000000 ] COUNT 2 |
| | | ) [ ("t2"."b" UNIQUE NUNIQUES 2.000000 MIN "1" MAX "2") < (int(32) "5") ] COUNT 2, |
| | | table("sys"."t1") [ "t1"."a" UNIQUE MIN "3" MAX "5" NUNIQUES 2.000000, "t1"."b" UNIQUE MIN "4" MAX "6" NUNIQUES 2.000000 ] COUNT 2 |
| | ) [ ("t2"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "7") = ("t1"."a" UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5") ] COUNT 2 |
| ) [ "t1"."a" NOT NULL UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t1"."b" NUNIQUES 2.000000 MIN "4" MAX "6", "t2"."a" NOT NULL UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t2"."b" NOT NULL NU |
: NIQUES 2.000000 MIN "1" MAX "2" ] COUNT 2 :
| split_select 0 actions 1 usec |
| push_project_down 0 actions 2 usec |
| merge_projects 0 actions 1 usec |
| push_project_up 2 actions 15 usec |
| split_project 0 actions 2 usec |
| remove_redundant_join 0 actions 1 usec |
| simplify_math 0 actions 1 usec |
| optimize_exps 0 actions 4 usec |
| optimize_select_and_joins_bottomup 0 actions 6 usec |
| project_reduce_casts 0 actions 0 usec |
| optimize_projections 0 actions 2 usec |
| optimize_joins 1 actions 10 usec |
| join_order 0 actions 22 usec |
| optimize_semi_and_anti 0 actions 3 usec |
| optimize_select_and_joins_topdown 2 actions 7 usec |
| dce 0 actions 9 usec |
| push_func_and_select_down 0 actions 4 usec |
| get_statistics 0 actions 75 usec |
| final_optimization_loop 0 actions 2 usec |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
27 tuples
查看列的物理执行:
sql>
EXPLAIN
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;sql>more>
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| mal |
+================================================================================================================================================================================+
| function user.main():void; |
| X_1:void := querylog.define("explain\nselect * from t1 left join t2 on t2.a = t1.a where t2.b < 5;":str, "default_pipe":str, 34:int); |
| barrier X_122:bit := language.dataflow(); |
| X_4:int := sql.mvc(); |
| C_5:bat[:oid] := sql.tid(X_4:int, "sys":str, "t2":str); |
| X_8:bat[:int] := sql.bind(X_4:int, "sys":str, "t2":str, "a":str, 0:int); |
| X_15:bat[:int] := sql.bind(X_4:int, "sys":str, "t2":str, "b":str, 0:int); |
| C_22:bat[:oid] := algebra.thetaselect(X_15:bat[:int], C_5:bat[:oid], 5:int, "<":str); |
| C_24:bat[:oid] := sql.tid(X_4:int, "sys":str, "t1":str); |
| X_27:bat[:int] := sql.bind(X_4:int, "sys":str, "t1":str, "a":str, 0:int); |
| X_34:bat[:int] := sql.bind(X_4:int, "sys":str, "t1":str, "b":str, 0:int); |
| X_39:bat[:int] := algebra.projection(C_22:bat[:oid], X_8:bat[:int]); |
| X_41:bat[:int] := algebra.projection(C_24:bat[:oid], X_27:bat[:int]); |
| (X_43:bat[:oid], X_44:bat[:oid]) := algebra.join(X_39:bat[:int], X_41:bat[:int], nil:BAT, nil:BAT, false:bit, nil:lng); |
| X_49:bat[:int] := algebra.projection(X_43:bat[:oid], X_39:bat[:int]); |
| X_124:void := language.pass(X_39:bat[:int]); |
| X_50:bat[:int] := algebra.projectionpath(X_43:bat[:oid], C_22:bat[:oid], X_15:bat[:int]); |
| X_125:void := language.pass(X_43:bat[:oid]); |
| X_126:void := language.pass(C_22:bat[:oid]); |
| X_127:void := language.pass(X_15:bat[:int]); |
| X_51:bat[:int] := algebra.projection(X_44:bat[:oid], X_41:bat[:int]); |
| X_128:void := language.pass(X_41:bat[:int]); |
| X_52:bat[:int] := algebra.projectionpath(X_44:bat[:oid], C_24:bat[:oid], X_34:bat[:int]); |
| X_129:void := language.pass(X_44:bat[:oid]); |
| X_130:void := language.pass(C_24:bat[:oid]); |
| X_54:bat[:str] := bat.pack("sys.t1":str, "sys.t1":str, "sys.t2":str, "sys.t2":str); |
| X_55:bat[:str] := bat.pack("a":str, "b":str, "a":str, "b":str); |
| X_56:bat[:str] := bat.pack("int":str, "int":str, "int":str, "int":str); |
| X_57:bat[:int] := bat.pack(32:int, 32:int, 32:int, 32:int); |
| X_58:bat[:int] := bat.pack(0:int, 0:int, 0:int, 0:int); |
| exit X_122:bit; |
| X_53:int := sql.resultSet(X_54:bat[:str], X_55:bat[:str], X_56:bat[:str], X_57:bat[:int], X_58:bat[:int], X_51:bat[:int], X_52:bat[:int], X_49:bat[:int], X_50:bat[:int]); |
| end user.main; |
| # optimizer.inline(0:int, 2:lng) |
| # optimizer.remap(0:int, 2:lng) |
| # optimizer.costModel(1:int, 2:lng) |
| # optimizer.coercions(0:int, 2:lng) |
| # optimizer.aliases(1:int, 4:lng) |
| # optimizer.evaluate(0:int, 4:lng) |
| # optimizer.emptybind(4:int, 6:lng) |
| # optimizer.deadcode(4:int, 5:lng) |
| # optimizer.pushselect(0:int, 8:lng) |
| # optimizer.aliases(4:int, 3:lng) |
| # optimizer.for(0:int, 1:lng) |
| # optimizer.dict(0:int, 2:lng) |
| # optimizer.mitosis(0:int, 7:lng) |
| # optimizer.mergetable(0:int, 7:lng) |
| # optimizer.bincopyfrom(0:int, 1:lng) |
| # optimizer.aliases(0:int, 0:lng) |
| # optimizer.constants(3:int, 9:lng) |
| # optimizer.commonTerms(0:int, 5:lng) |
| # optimizer.projectionpath(2:int, 7:lng) |
| # optimizer.deadcode(2:int, 3:lng) |
| # optimizer.matpack(0:int, 1:lng) |
| # optimizer.reorder(1:int, 4:lng) |
| # optimizer.dataflow(1:int, 12:lng) |
| # optimizer.querylog(0:int, 0:lng) |
| # optimizer.multiplex(0:int, 1:lng) |
| # optimizer.generator(0:int, 1:lng) |
| # optimizer.candidates(1:int, 1:lng) |
| # optimizer.deadcode(0:int, 2:lng) |
| # optimizer.postfix(0:int, 3:lng) |
| # optimizer.wlc(0:int, 1:lng) |
| # optimizer.profiler(0:int, 0:lng) |
| # optimizer.garbageCollector(1:int, 7:lng) |
| # optimizer.total(32:int, 154:lng) |
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
66 tuples
执行结果:
sql>SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;
+------+------+------+------+
| a | b | a | b |
+======+======+======+======+
| 3 | 4 | 3 | 1 |
+------+------+------+------+
1 tuple
分析:
- 添加了条件 WHERE t2.b < 5,连接的结果中,不可能再存在NULL, 也就是隐含了NOT NULL的条件,对外连接进行转换成内连接
- 连接方式直接变成了 join, 也就是inner join:
- 连接条件变为如下,注意额外添加的NOT NULL的条件
- [ "t1"."a" NOT NULL UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t1"."b" NUNIQUES 2.000000 MIN "4" MAX "6", "t2"."a" NOT NULL UNIQUE NUNIQUES 2.000000 MIN "3" MAX "5", "t2"."b" NOT NULL NU |
: NIQUES 2.000000 MIN "1" MAX "2" ]
等价于内连接:
执行结果:
sql>SELECT * FROM t1 INNER JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;
+------+------+------+------+
| a | b | a | b |
+======+======+======+======+
| 3 | 4 | 3 | 1 |
+------+------+------+------+
1 tuple
对比mysql8中的空值拒绝:
查询计划-查询树结构:
explain format=tree
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a WHERE t2.b < 5;
-> Inner hash join (t1.a = t2.a) (cost=0.90 rows=1)
-> Table scan on t1 (cost=0.35 rows=2)
-> Hash
-> Filter: (t2.b < 5) (cost=0.45 rows=1)
-> Table scan on t2 (cost=0.45 rows=2)
查询代价-json结构:
explain format=json
SELECT * FROM t1 LEFT JOIN t2 ON t2.a = t1.a WHERE t2.b < 5 \G
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "0.90"
},
"nested_loop": [
{
"table": {
"table_name": "t2",
"access_type": "ALL",
"rows_examined_per_scan": 2,
"rows_produced_per_join": 1,
"filtered": "50.00",
"cost_info": {
"read_cost": "0.35",
"eval_cost": "0.10",
"prefix_cost": "0.45",
"data_read_per_join": "16"
},
"used_columns": [
"a",
"b"
],
"attached_condition": "(`d3`.`t2`.`b` < 5)"
}
},
{
"table": {
"table_name": "t1",
"access_type": "ALL",
"rows_examined_per_scan": 2,
"rows_produced_per_join": 1,
"filtered": "50.00",
"using_join_buffer": "hash join",
"cost_info": {
"read_cost": "0.25",
"eval_cost": "0.10",
"prefix_cost": "0.90",
"data_read_per_join": "16"
},
"used_columns": [
"a",
"b"
],
"attached_condition": "(`d3`.`t1`.`a` = `d3`.`t2`.`a`)"
}
}
]
}
}