如何在clickhouse中将不同的值分组到(value,count)数组中?

ryoqjall  于 2021-07-15  发布在  ClickHouse
关注(0)|答案(3)|浏览(656)

例如,我有一张table

create table A (
    id Int64,
    discrete1 String
    discrete2 String
) engine=Log;

还有一些数据

insert into A values
(1,'A','a')
(1,'B','b')
(1,'A','c')
(2,'C','a')
(1,'A','a');

如何选择这个结果,元组是(value,count)

1,[(A,3),(B,1)],[(a,2),(b,1),(c,1)]
2,[(C,1)],[(a,1)]

我的表可能有许多离散值列,有没有办法在一个select中做到这一点,而不逐个按离散值列分组。

62o28rlo

62o28rlo1#

SELECT
    id,
    arrayMap((x, y) -> (x, y),
      (arrayReduce('sumMap', [(groupArrayArray([discrete1]) as arrdiscrete1)], 
              [arrayResize(CAST([], 'Array(UInt64)'), length(arrdiscrete1), toUInt64(1))]) as sdiscrete1).1,
      sdiscrete1.2) rdiscrete1,
    arrayMap((x, y) -> (x, y),
      (arrayReduce('sumMap', [(groupArrayArray([discrete2]) as arrdiscrete2)], 
              [arrayResize(CAST([], 'Array(UInt64)'), length(arrdiscrete2), toUInt64(1))]) as sdiscrete2).1,
      sdiscrete2.2) rdiscrete2
FROM A
GROUP BY id

┌─id─┬─rdiscrete1────────┬─rdiscrete2────────────────┐
│  2 │ [('C',1)]         │ [('a',1)]                 │
│  1 │ [('A',3),('B',1)] │ [('a',2),('b',1),('c',1)] │
└────┴───────────────────┴───────────────────────────┘
v7pvogib

v7pvogib2#

尝试此查询(只需定义所需的“离散”列及其计数):

SELECT id, groupArray(result_per_id_column) result_per_id
FROM (
    SELECT id, groupArray(count_result) result_per_id_column
    FROM 
    (
        SELECT id, index_discrete.1 as index, (index_discrete.2,  count()) AS count_result
        FROM 
        (   
            SELECT id, arrayJoin(arrayMap((x, index) -> (index, x),
                            [discrete1, discrete2, discrete3 /* so on for other 'discrete'-columns */],
                            range(3 /* count of 'discrete'-columns */))) index_discrete
            FROM (
                /* test data */
                SELECT 
                    data.1 AS id, 
                    data.2 AS discrete1,
                    data.3 AS discrete2,
                    data.4 AS discrete3
                FROM 
                (
                    SELECT arrayJoin([(1, 'A', 'a', 'aa'), (1, 'B', 'b', 'aa'), (1, 'A', 'c', 'bb'), (2, 'C', 'a', 'bb'), (1, 'A', 'a', 'cc')]) AS data
                )))
        GROUP BY id, index_discrete.1, index_discrete.2
    )
    GROUP BY id, index
    ORDER BY id, index)
GROUP BY id
/* result
┌─id─┬─result_per_id──────────────────────────────────────────────────────────────┐
│  1 │ [[('A',3),('B',1)],[('a',2),('b',1),('c',1)],[('cc',1),('bb',1),('aa',2)]] │
│  2 │ [[('C',1)],[('a',1)],[('bb',1)]]                                           │
└────┴────────────────────────────────────────────────────────────────────────────┘

* /
ecr0jaav

ecr0jaav3#

SELECT
    id,
    sumMap(arr, arrayResize(CAST([], 'Array(UInt64)'), length(arr), toUInt64(1))) AS s
FROM
(
    SELECT
        id,
        groupArrayArray([discrete1, discrete2]) AS arr
    FROM A
    GROUP BY id
)
GROUP BY id

┌─id─┬─s───────────────────────────────────┐
│  2 │ (['C','a'],[1,1])                   │
│  1 │ (['A','B','a','b','c'],[3,1,2,1,1]) │
└────┴─────────────────────────────────────┘

SELECT
    id,
    arrayMap((x, y) -> (x, y), (sumMap(arr, arrayResize(CAST([], 'Array(UInt64)'), length(arr), toUInt64(1))) AS s).1, s.2) AS r
FROM
(
    SELECT
        id,
        groupArrayArray([discrete1, discrete2]) AS arr
    FROM A
    GROUP BY id
)
GROUP BY id

┌─id─┬─r─────────────────────────────────────────┐
│  2 │ [('C',1),('a',1)]                         │
│  1 │ [('A',3),('B',1),('a',2),('b',1),('c',1)] │
└────┴───────────────────────────────────────────┘

SELECT
    id,
    arrayMap((x, y) -> (x, y),
      (arrayReduce('sumMap', [(groupArrayArray([discrete1, discrete2]) as arr)], 
              [arrayResize(CAST([], 'Array(UInt64)'), length(arr), toUInt64(1))]) as s).1,
      s.2) r
FROM A
GROUP BY id
┌─id─┬─r─────────────────────────────────────────┐
│  2 │ [('C',1),('a',1)]                         │
│  1 │ [('A',3),('B',1),('a',2),('b',1),('c',1)] │
└────┴───────────────────────────────────────────┘

相关问题