WITH ['hello', 'world', 'arrays are great aren\'t they?'] AS string_array
SELECT string_array[0]
┌─arrayElement⋯g_array, 0)─┐
│ │
└──────────────────────────┘
SELECT
FlightDate,
Origin,
groupArray(toStringCutToZero(Dest)) AS Destinations
FROM ontime.ontime
WHERE Origin IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'JFK', 'LAS', 'CLT', 'SFO', 'SEA') AND FlightDate='2024-01-01'
GROUP BY FlightDate, Origin
ORDER BY length(Destinations)
WITH
'2024-01-01' AS date,
busy_airports AS (
SELECT
FlightDate,
Origin,
--highlight-next-line
groupArray(toStringCutToZero(Dest)) AS Destinations
FROM ontime.ontime
WHERE Origin IN ('ATL', 'ORD', 'DFW', 'DEN', 'LAX', 'JFK', 'LAS', 'CLT', 'SFO', 'SEA')
AND FlightDate = date
GROUP BY FlightDate, Origin
ORDER BY length(Destinations)
)
SELECT
Origin,
length(Destinations) AS outward_flights
FROM busy_airports
ORDER BY outward_flights DESC
WITH arrayMap(
d -> if(d >= 30, '延误', if(d >= 15, '预警', '准点')),
groupArray(DepDelayMinutes)
) AS statuses
SELECT
Origin,
toStringCutToZero(Dest) AS Destination,
arrayZip(groupArray(Tail_Number), statuses) as tailNumberStatuses
FROM ontime.ontime
WHERE Origin = 'DEN'
AND FlightDate = '2024-01-01'
AND DepTime IS NOT NULL
AND DepDelayMinutes IS NOT NULL
GROUP BY ALL
在上述查询中,`arrayMap` 函数接收单元素数组 `[DepDelayMinutes]`,并应用 lambda 函数 `d -> if(d >= 30, 'DELAYED', if(d >= 15, 'WARNING', 'ON-TIME'` 对其进行分类。
然后通过 `[DepDelayMinutes][1]` 提取结果数组的第一个元素。
[`arrayZip`](/sql-reference/functions/array-functions#arrayZip) 函数将 `Tail_Number` 数组和 `statuses` 数组合并为单个数组。
### arrayFilter \{#arrayfilter}
接下来,我们将仅查看机场 `DEN`、`ATL` 和 `DFW` 中延误 30 分钟或更长时间的航班数量:
```sql runnable
SELECT
Origin,
OriginCityName,
--highlight-next-line
length(arrayFilter(d -> d >= 30, groupArray(ArrDelayMinutes))) AS num_delays_30_min_or_more
FROM ontime.ontime
WHERE Origin IN ('DEN', 'ATL', 'DFW')
AND FlightDate = '2024-01-01'
GROUP BY Origin, OriginCityName
ORDER BY num_delays_30_min_or_more DESC
WITH airport_routes AS (
SELECT
Origin,
--highlight-next-line
arraySort(groupArray(DISTINCT toStringCutToZero(Dest))) AS destinations
FROM ontime.ontime
WHERE FlightDate = '2024-01-01'
GROUP BY Origin
)
SELECT
a1.Origin AS airport1,
a2.Origin AS airport2,
--highlight-next-line
length(arrayIntersect(a1.destinations, a2.destinations)) AS common_destinations
FROM airport_routes a1
CROSS JOIN airport_routes a2
WHERE a1.Origin < a2.Origin
AND a1.Origin IN ('DEN', 'ATL', 'DFW', 'ORD', 'LAS')
AND a2.Origin IN ('DEN', 'ATL', 'DFW', 'ORD', 'LAS')
ORDER BY common_destinations DESC
LIMIT 10
SELECT
Origin,
toStringCutToZero(Dest) AS Destination,
groupArray(DepDelayMinutes) AS delays,
--highlight-start
round(arrayReduce('avg', groupArray(DepDelayMinutes)), 2) AS avg_delay,
round(arrayReduce('max', groupArray(DepDelayMinutes)), 2) AS worst_delay
--highlight-end
FROM ontime.ontime
WHERE Origin = 'DEN'
AND FlightDate = '2024-01-01'
AND DepDelayMinutes IS NOT NULL
GROUP BY Origin, Destination
ORDER BY avg_delay DESC
在上面的示例中,我们使用 arrayReduce 来计算从 DEN 出发的各个航班的平均和最大延误时间。
arrayReduce 会将一个聚合函数(作为函数的第一个参数指定)应用到提供的数组(作为函数的第二个参数指定)的各个元素上。
WITH range(0, 100, 10) AS delay,
toStringCutToZero(Dest) AS Destination
SELECT
'最多 ' || arrayJoin(delay) || ' 分钟' AS delayTime,
countIf(DepDelayMinutes >= arrayJoin(delay)) AS flightsDelayed
FROM ontime.ontime
WHERE Origin = 'DEN' AND Destination = 'MIA' AND FlightDate = '2024-01-01'
GROUP BY delayTime
ORDER BY flightsDelayed DESC
WITH range(0, 100, 10) AS delay,
toStringCutToZero(Dest) AS Destination
SELECT
'最多 ' || del || ' 分钟' AS delayTime,
countIf(DepDelayMinutes >= del) flightsDelayed
FROM ontime.ontime
ARRAY JOIN delay AS del
WHERE Origin = 'DEN' AND Destination = 'MIA' AND FlightDate = '2024-01-01'
GROUP BY ALL
ORDER BY flightsDelayed DESC