SQL Server 如何获得一个外部参照列与另一个外部参照列中任意值的不同组合

vyswwuz2  于 2022-11-21  发布在  其他
关注(0)|答案(1)|浏览(149)

我需要在按列A分组的外部参照表中选择列B的唯一值组合的计数。
请考虑以下方案和数据,它表示一个简单的家庭结构。每个孩子都有一个父亲和母亲:

表父级

| 父ID|名称名称名称|
| - -|- -|
| 一个|亚历克斯|
| 2个|鲍勃|

表母亲

| 母亲ID|名称名称名称|
| - -|- -|
| 一个|爱丽斯|
| 2个|芭芭拉|

表格子项

| 子项ID|父ID|母亲ID|名称名称名称|
| - -|- -|- -|- -|
| 一个|1(亚历克斯)|1(爱丽丝)|阿当|
| 2个|1(亚历克斯)|1(爱丽丝)|比利|
| 三个|1(亚历克斯)|2(芭芭拉)|赛琳|
| 四个|2(鲍勃)|2(芭芭拉)|德瑞克|
每个父亲的母亲的不同组合是:

  • 亚历克斯(爱丽丝、芭芭拉)
  • 鲍勃(芭芭拉)

在所有有两个不同的母亲组合
1.爱丽丝芭芭拉
1.芭芭拉
我要编写的查询将返回mother,* 的不同组合的计数,而不管它们与哪个father * 相关联:
| 唯一母组|
| - -|
| 2个|
我可以使用STRING_AGG函数成功地做到这一点,但是感觉很笨拙。它还需要对数百万行进行操作,目前速度相当慢。有没有更常用的方法来代替集合操作?
下面是我的工作示例:

-- Drop pre-existing tables

DROP TABLE IF EXISTS dbo.Child;

DROP TABLE IF EXISTS dbo.Father;

DROP TABLE IF EXISTS dbo.Mother;

-- Create family tables.

CREATE TABLE dbo.Father
(
    FatherID INT NOT NULL
  , Name VARCHAR(50) NOT NULL
);

ALTER TABLE dbo.Father
ADD CONSTRAINT PK_Father
    PRIMARY KEY CLUSTERED (FatherID);

ALTER TABLE dbo.Father SET (LOCK_ESCALATION = TABLE);

CREATE TABLE dbo.Mother
(
    MotherID INT NOT NULL
  , Name VARCHAR(50) NOT NULL
);

ALTER TABLE dbo.Mother
ADD CONSTRAINT PK_Mother
    PRIMARY KEY CLUSTERED (MotherID);

ALTER TABLE dbo.Mother SET (LOCK_ESCALATION = TABLE);

CREATE TABLE dbo.Child
(
    ChildID INT NOT NULL
  , FatherID INT NOT NULL
  , MotherID INT NOT NULL
  , Name VARCHAR(50) NOT NULL
);

ALTER TABLE dbo.Child
ADD CONSTRAINT PK_Child
    PRIMARY KEY CLUSTERED (ChildID);

CREATE NONCLUSTERED INDEX IX_Parents ON dbo.Child (FatherID, MotherID);

ALTER TABLE dbo.Child
ADD CONSTRAINT FK_Child_Father
    FOREIGN KEY (FatherID)
    REFERENCES dbo.Father (FatherID);

ALTER TABLE dbo.Child
ADD CONSTRAINT FK_Child_Mother
    FOREIGN KEY (MotherID)
    REFERENCES dbo.Mother (MotherID);

-- Insert two children with the same parents

INSERT INTO dbo.Father
(
    FatherID
  , Name
)
VALUES
(1, 'Alex')
, (2, 'Bob')
, (3, 'Charlie')

INSERT INTO dbo.Mother
(
    MotherID
  , Name
)
VALUES
(1, 'Alice')
, (2, 'Barbara');

INSERT INTO dbo.Child
(
    ChildID
  , FatherID
  , MotherID
  , Name
)
VALUES
(1, 1, 1, 'Adam')
, (2, 1, 1, 'Billy')
, (3, 1, 2, 'Celine')
, (4, 2, 2, 'Derek')
, (5, 3, 1, 'Eric');

-- CTE Gets distinct combinations of parents
WITH distinctParentCombinations (FatherID, MotherID)
AS (SELECT children.FatherID
         , children.MotherID
    FROM dbo.Child as children
    GROUP BY children.FatherID
           , children.MotherID
   )
   -- CTE Gets uses STRING_AGG to get unique combinations of mothers.
   , motherGroups (Mothers)
AS (SELECT STRING_AGG(CONVERT(VARCHAR(MAX), distinctParentCombinations.MotherID), '-') WITHIN GROUP (ORDER BY distinctParentCombinations.MotherID) AS Mothers
    FROM distinctParentCombinations
    GROUP BY distinctParentCombinations.FatherID
   )

-- Remove the COUNT function to see the actual combinations
SELECT COUNT(motherGroups.Mothers) AS UniqueMotherGroups
FROM motherGroups

-- Clean up the example

DROP TABLE IF EXISTS dbo.Child;

DROP TABLE IF EXISTS dbo.Father;

DROP TABLE IF EXISTS dbo.Mother;
k7fdbhmy

k7fdbhmy1#

感谢您发布如此全面的测试数据设置。但是,我没有对我的DB运行任何CREATE/DROP语句,所以我将这些表转换为表变量。使用您的数据,我提出了下面的查询。只需将表名改回dbo. names,您就可以在自己的环境中进行测试了。用FOR XML PATH将mother combo转换成文本字符串。然后我统计所有不同的combo。如果你发现我的逻辑有错误,请告诉我。我只是把这个放在可能的解决方案中。

WITH distinctCombos AS (
    SELECT DISTINCT
        c.FatherID, c.MotherID
    FROM @Child as c
) , motherComboCount AS (
    SELECT
        f.FatherID
        , f.[Name]
        , STUFF((
            SELECT
                ',' + CAST(dc.MotherID as nvarchar)
            FROM distinctCombos as dc
            WHERE dc.FatherID = f.FatherID
            ORDER BY dc.MotherID ASC
            FOR XML PATH('')
        ),1,1,'') as motherList
    FROM @Father as f
)
SELECT
    COUNT(DISTINCT motherList) as UniqueMotherGroups
FROM motherComboCount as mcc

为了保存一点计算能力,删除了STUFF函数,因为它对于比较来说是不必要的...它只是让列表在显示时看起来更漂亮...我已经习惯了使用它。
看起来我们的方法之间的主要区别是FOR XML PATHSTRING_AGG的使用(我仍然使用旧的SQL)。我使用了DISTINCT两次而不是GROUP BY。如果您有一个更大的数据集来测试,请告诉我这两种方法的比较。我正在尝试考虑一个完全基于集合的方法,但目前还看不到它。
更新:方法2。这是我使用递归CTE构建不同母亲组合的想法。在您的示例数据中,每个父亲只有2个母亲。因此,总共需要执行4个基于集合的查询(第一个CTE,递归CTE中的2个查询和最后的SELECT)。

WITH uniqueCombo as (
    SELECT DISTINCT
        c.FatherID
        , c.MotherID
        , ROW_NUMBER() OVER(PARTITION BY c.FatherID ORDER BY c.MotherID) as row_num
    FROM @Child as c
), combos as (
    SELECT
        uc.FatherID
        , uc.MotherID
        , CAST(uc.MotherID as nvarchar(max)) as [path]
        , row_num
        , 0 as hierarchy_num
    FROM uniqueCombo as uc
    WHERE uc.row_num = 1

    UNION ALL

    SELECT
        uc.FatherID
        , uc.MotherID
        , co.[path] + ',' + CAST(uc.MotherID as nvarchar(max))
        , uc.row_num
        , co.hierarchy_num + 1 as heirarchy_num
    FROM uniqueCombo as uc
        INNER JOIN combos as co
            ON co.FatherID = uc.FatherID
            --AND co.MotherID <> uc.MotherID
            AND co.row_num + 1 = uc.row_num
), rankedCombos as (
    SELECT 
        c.[path]
        , ROW_NUMBER() OVER(PARTITION BY c.FatherID ORDER BY c.hierarchy_num DESC) as row_num
    FROM combos as c
)
SELECT COUNT(DISTINCT rc.[path]) as UniqueMotherGroups
FROM rankedCombos as rc
WHERE rc.row_num = 1

更新二:我还有另一个想法,即使用PIVOT来转置记录,使FatherID位于最左边的列中,而MotherID作为列标题。您必须使用a dynamic PIVOT/dynamic SQL。(在PIVOT中并不真正需要FatherID,因此它不包含在PIVOT查询中。我只需要描述目标是什么。)在透视之后,你可以用SELECT DISTINCT来得到唯一的母亲组合。然后最后一个SELECT得到COUNT。我在SQL Fiddle中运行了这个:
SQL小提琴

MS SQL Server 2017架构设置

-- Create family tables.

CREATE TABLE dbo.Father
(
    FatherID INT NOT NULL
  , Name VARCHAR(50) NOT NULL
);

ALTER TABLE dbo.Father
ADD CONSTRAINT PK_Father
    PRIMARY KEY CLUSTERED (FatherID);

ALTER TABLE dbo.Father SET (LOCK_ESCALATION = TABLE);

CREATE TABLE dbo.Mother
(
    MotherID INT NOT NULL
  , Name VARCHAR(50) NOT NULL
);

ALTER TABLE dbo.Mother
ADD CONSTRAINT PK_Mother
    PRIMARY KEY CLUSTERED (MotherID);

ALTER TABLE dbo.Mother SET (LOCK_ESCALATION = TABLE);

CREATE TABLE dbo.Child
(
    ChildID INT NOT NULL
  , FatherID INT NOT NULL
  , MotherID INT NOT NULL
  , Name VARCHAR(50) NOT NULL
);

ALTER TABLE dbo.Child
ADD CONSTRAINT PK_Child
    PRIMARY KEY CLUSTERED (ChildID);

CREATE NONCLUSTERED INDEX IX_Parents ON dbo.Child (FatherID, MotherID);

ALTER TABLE dbo.Child
ADD CONSTRAINT FK_Child_Father
    FOREIGN KEY (FatherID)
    REFERENCES dbo.Father (FatherID);

ALTER TABLE dbo.Child
ADD CONSTRAINT FK_Child_Mother
    FOREIGN KEY (MotherID)
    REFERENCES dbo.Mother (MotherID);

-- Insert two children with the same parents

INSERT INTO dbo.Father
(
    FatherID
  , Name
)
VALUES
(1, 'Alex')
, (2, 'Bob')
, (3, 'Charlie')

INSERT INTO dbo.Mother
(
    MotherID
  , Name
)
VALUES
(1, 'Alice')
, (2, 'Barbara');

INSERT INTO dbo.Child
(
    ChildID
  , FatherID
  , MotherID
  , Name
)
VALUES
(1, 1, 1, 'Adam')
, (2, 1, 1, 'Billy')
, (3, 1, 2, 'Celine')
, (4, 2, 2, 'Derek')
, (5, 3, 1, 'Eric');

查询1

DECLARE @cols AS nvarchar(MAX)
DECLARE @query AS nvarchar(MAX)

SET @cols = STUFF((
    SELECT DISTINCT ',' + QUOTENAME(m.MotherID) 
    FROM Mother as m
    FOR XML PATH('')) 
,1,1,'')
    
SET @query = 'SELECT COUNT(mCount) as UniqueMotherGroups FROM (
    SELECT DISTINCT ' + @cols + ', 1 as mCount FROM (
        SELECT  ' + @cols + ' 
        FROM (
            SELECT
                c.FatherID
                , c.MotherID
                , 1 as mID
            FROM child as c
        ) x
        PIVOT 
        (
            MAX(mID)
            FOR MotherID in (' + @cols + ')
        ) p
    ) as m
) as mg'

--SELECT @query
Exec(@query)

结果

| UniqueMotherGroups |
|--------------------|
|                  3 |

相关问题