# standardSQL
SELECT domain, names_count,
samples[OFFSET(0)] AS sample_name_1,
samples[SAFE_OFFSET(1)] AS sample_name_2,
samples[SAFE_OFFSET(2)] AS sample_name_3,
samples[SAFE_OFFSET(3)] AS sample_name_4,
samples[SAFE_OFFSET(4)] AS sample_name_5
FROM (
SELECT domain,
COUNT(name) names_count,
ARRAY_AGG(name ORDER BY RAND() LIMIT 5) samples
FROM `project.dataset.table`
GROUP BY domain
)
select
domain,
count(*) names_count,
max(case when rn = 1 then name end) sample_name_1,
max(case when rn = 2 then name end) sample_name_2,
max(case when rn = 3 then name end) sample_name_3,
max(case when rn = 4 then name end) sample_name_4,
max(case when rn = 5 then name end) sample_name_5
from (
select t.*, row_number() over(partition by domaine order by rand()) rn
from mytable t
) t
group by domain
子查询列组 name 他也一样 domain 随机。然后,外部查询按域聚合,并使用条件聚合为每个组带来前5个随机排列的名称。
2条答案
按热度按时间31moq8wy1#
下面是bigquery标准sql
n9vozmp42#
可以使用窗口函数和聚合:
子查询列组
name
他也一样domain
随机。然后,外部查询按域聚合,并使用条件聚合为每个组带来前5个随机排列的名称。