跳到主要内容

计算字段

正如列属性关系中提到的,模型的列有一种变体类型:计算字段。Wren Engine 提供了计算字段来在模型中定义计算。计算可以使用同一模型中已定义的列,或者通过关系使用另一模型中的相关列。通常,一个通用指标与许多不同的表相关。通过计算字段,可以轻松定义一个在不同模型之间交互的通用指标。

定义计算字段

在对模型及其列进行建模后,我们可以考虑基于它们创造更多价值。我们来看一个快速示例

{
"name": "orders",
"tableReference": {
"catalog": "memory",
"schema": "main",
"table": "orders"
},
"columns": [
{
"name": "orderkey",
"type": "INTEGER"
},
{
"name": "custkey",
"type": "INTEGER"
}
{
"name": "price",
"type": "INTEGER"
},
{
"name": "purchasetimestamp",
"type": "TIMESTAMP"
}
]
}

这是一个定义好的名为 orders 的模型,包含 3 列。为了增强模型,我们可能想添加一个名为 customer_last_month_orders_price 的列来了解每个客户的增长情况。我们可以像这样定义一个计算字段

"columns": [
{
"name": "orderkey",
"type": "INTEGER"
},
{
"name": "custkey",
"type": "INTEGER"
}
{
"name": "price",
"type": "INTEGER"
},
{
"name": "purchasetimestamp",
"type": "TIMESTAMP"
},
{
"name": "customer_last_month_orders_price",
"type": "INTEGER",
"isCalculated": "true",
"expression": "lag(price) over (partition by custkey order by date_trunc('YEAR', purchasetimestamp), 0, 0)"
}
]

您可以看到新列有一个名为 isCalculated 的属性。这意味着该列是一个计算字段,可以访问此模型中的其他列,或使用关系字段访问另一模型中的列。关于关系字段,您可以参阅关系了解更多详情。

然后,您可以像这样查询此模型

SELECT customer_last_month_orders_price FROM orders

计算字段工作原理

概念非常简单:只需为模型包裹另一个子查询。让我们看看 Wren Engine 从上面的 orders 模型生成的内容。

WITH
"orders" AS (
-- finaly projection
SELECT
"orders"."orderkey" "orderkey"
, "orders"."custkey" "custkey"
, "orders"."price" "price"
, "orders"."purchasetimestamp" "purchasetimestamp"
, "orders"."customer_last_month_orders_price" "customer_last_month_orders_price"
FROM
(
-- calculated field scope
SELECT
"orders"."orderkey" "orderkey"
, "orders"."custkey" "custkey"
, "orders"."price" "price"
, "orders"."purchasetimestamp" "purchasetimestamp"
-- build customer_last_month_orders_price field
, lag(price) OVER (PARTITION BY custkey ORDER BY date_trunc('YEAR', purchasetimestamp) ASC, 0 ASC, 0 ASC) "customer_last_month_orders_price"
FROM
(
-- normal column scope
SELECT
"orderkey" "orderkey"
, "custkey" "custkey"
, "price" "price"
, "purchasetimestamp" "purchasetimestamp"
FROM
"memory"."main"."orders" "orders"
) "orders"
) "orders"
)
SELECT customer_last_month_orders_price
FROM
orders

我们可以看到这个查询的结构包含多个子查询,每个子查询用于不同的目的。计算字段构建在第二层,即计算字段作用域。

它还提到了计算字段的一个限制:它只能访问同一模型中的普通列。它不能访问同一模型中的另一个计算字段。

我们来看一个更复杂的关系示例。

带关系计算

只需修改上面的 mdl

 "models": [
{
"name": "orders",
"tableReference": {
"catalog": "memory",
"schema": "main",
"table": "orders"
},
"columns": [
{
"name": "orderkey",
"type": "INTEGER"
},
{
"name": "custkey",
"type": "INTEGER"
},
{
"name": "price",
"type": "INTEGER"
},
{
"name": "purchasetimestamp",
"type": "TIMESTAMP"
},
{
"name": "cusotmer",
"type": "customer",
"relationship": "customer_orders"
},
{
"name": "customer_last_month_orders_price",
"type": "INTEGER",
"isCalculated": "true",
"expression": "lag(price) over (partition by custkey order by date_trunc('YEAR', purchasetimestamp), 0, 0)"
},
{
"name": "customer_name_last_month_orders_price",
"type": "INTEGER",
"isCalculated": "true",
"expression": "lag(price) over (partition by cusotmer.name order by date_trunc('YEAR', purchasetimestamp), 0, 0)"
}
],
"primaryKey": "orderkey"
},
{
"name": "customer",
"tableReference": {
"catalog": "memory",
"schema": "main",
"table": "customer"
},
"columns": [
{
"name": "custkey",
"type": "INTEGER"
},
{
"name": "name",
"type": "STRING"
}
]
}
],
"relationships": [
{
"name": "customer_orders",
"models": ["customer", "orders"],
"joinType": "ONE_TO_MANY",
"condition": "customer.custkey = orders.custkey"
}
]

我们创建另一个名为 customer 的模型,并建立关系 customer_orders。我们添加另一个类似的计算字段 customer_name_last_month_orders_price,使用 customer.name 作为分区键。

访问 customer_last_month_orders_pricecustomer_name_last_month_orders_price 的 SQL 如下

SELECT customer_last_month_orders_price, customer_name_last_month_orders_price FROM orders

生成的 SQL 如下

WITH
"customer" AS (
SELECT
"customer"."custkey" "custkey"
, "customer"."name" "name"
FROM
(
SELECT
"customer"."custkey" "custkey"
, "customer"."name" "name"
FROM
(
SELECT
"custkey" "custkey"
, "name" "name"
FROM
"memory"."main"."customer" "customer"
) "customer"
) "customer"
)
, "orders" AS (
SELECT
"orders"."orderkey" "orderkey"
, "orders"."custkey" "custkey"
, "orders"."price" "price"
, "orders"."purchasetimestamp" "purchasetimestamp"
, "orders"."customer_last_month_orders_price" "customer_last_month_orders_price"
, "orders_relationsub"."customer_name_last_month_orders_price" "customer_name_last_month_orders_price"
FROM
((
SELECT
"orders"."orderkey" "orderkey"
, "orders"."custkey" "custkey"
, "orders"."price" "price"
, "orders"."purchasetimestamp" "purchasetimestamp"
-- calcualted field no used relationship
, lag(price) OVER (PARTITION BY custkey ORDER BY date_trunc('YEAR', purchasetimestamp) ASC, 0 ASC, 0 ASC) "customer_last_month_orders_price"
FROM
(
-- normal column scope
SELECT
"orderkey" "orderkey"
, "custkey" "custkey"
, "price" "price"
, "purchasetimestamp" "purchasetimestamp"
FROM
"memory"."main"."orders" "orders"
) "orders"
) "orders"
LEFT JOIN (
SELECT
"orders"."orderkey"
-- calcualted field used relationship
, lag(price) OVER (PARTITION BY "customer"."name" ORDER BY date_trunc('YEAR', purchasetimestamp) ASC, 0 ASC, 0 ASC) "customer_name_last_month_orders_price"
FROM
((
-- normal column scope
SELECT
"orderkey" "orderkey"
, "custkey" "custkey"
, "price" "price"
, "purchasetimestamp" "purchasetimestamp"
FROM
"memory"."main"."orders" "orders"
) "orders"
LEFT JOIN "customer" ON ("customer"."custkey" = "orders"."custkey"))
) "orders_relationsub" ON ("orders"."orderkey" = "orders_relationsub"."orderkey"))
)
SELECT
customer_last_month_orders_price
, customer_name_last_month_orders_price
FROM
orders

很容易看出,调用关系的字段将构建在另一个作用域中,因为它需要额外的连接表。

然而,它们有相同的限制:它们只能使用同一模型中的普通列或另一个相关模型中的列。