基于产品持有量的排名

zbsbpyhn  于 2021-07-24  发布在  Java
关注(0)|答案(2)|浏览(371)

我有一个表,其中包含客户id和产品id的组合。我想找出收益率最高的产品持有组合。i、 e.哪些产品组合和产品子组合或单个产品的产量最大。
例如,如果我有3个产品的组合,我想根据这些产品的其他组合或单独出现来对其进行排名。试着这样做,这样我就可以确定组合,将有最大的收益。
请分享你的想法。db-sql server。

CREATE TABLE [dbo].[spk_bkup_cust_prod](
    [cust_id] [varchar](100) NULL,
    [prod_id] [varchar](100) NULL
) ON [PRIMARY]

GO
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust1', N'prod1')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust1', N'prod2')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust2', N'prod1')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust3', N'prod2')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust3', N'prod3')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust4', N'prod1')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust5', N'prod1')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust5', N'prod2')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust5', N'prod3')
INSERT [dbo].[spk_bkup_cust_prod] ([cust_id], [prod_id]) VALUES (N'cust6', N'prod1')

prodset             prodrnk_max
prod1               3
prod1-prod2         4
prod1-prod2-prod3   6
prod2-prod3         1

在上面的例子中,如果我研究prod1和prod2,我会碰到4个客户。我会打一个有prod1-prod2的客户和3个只有prod1的客户。
如果我研究prod1、prod2和prod3,那么我就会击中所有客户。
尝试使用字符串分割和xml路径的组合。它有助于整合单一产品子控股,而不是多产品子控股。

with custprod as (
    SELECT
        distinct cust_id , prodset = STUFF((
              SELECT '-' + cp_grp.prod_id
              FROM dbo.spk_bkup_cust_prod cp_grp
              WHERE cp.cust_id = cp_grp.cust_id
              FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
    FROM dbo.spk_bkup_cust_prod cp
), prodcnt as (
    select prodset, count(*) rcnt from custprod group by prodset
), proddis as (
    select 
        prodcnt.prodset
        , prodcnt.rcnt
        , value indivprod
        --, convert(varchar,value)
    from 
    prodcnt
    cross apply string_split(prodset,'-')
), prodrnk as (
    select proddis.*
    , case when proddis.indivprod != proddis.prodset and prodcnt.prodset is not null then  proddis.rcnt + prodcnt.rcnt  else proddis.rcnt end prodrnk 
    from proddis 
    left join prodcnt on prodcnt.prodset = proddis.indivprod
)
select
    prodset, max(prodrnk) prodrnk_max
from 
prodrnk
group by prodset

我认为这是一个错误的方法开始,所以没有附加原来。

svdrlsy4

svdrlsy41#

如果我理解正确的话,事情会是这样的。

with custprod as (
    SELECT
        distinct cust_id , prodset = STUFF((
              SELECT '-' + cp_grp.prod_id
              FROM dbo.spk_bkup_cust_prod cp_grp
              WHERE cp.cust_id = cp_grp.cust_id
              FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
    FROM dbo.spk_bkup_cust_prod cp
), prodcnt as (
    select
        cust_id,
        prod_id,
        count(*) over(partition by cust_id) as cnt -- number of products each customer has
    from dbo.spk_bkup_cust_prod
), custcnt as (
    select distinct
        p1.cust_id,
        count(*) over(partition by p1.cust_id) as cnt -- number of customers contains the same projects
    from prodcnt p1
    inner join prodcnt p2 on p1.prod_id = p2.prod_id
    group by p1.cust_id, p2.cust_id
    having max(p2.cnt) = count(*)
)

select
    prodset,
    max(c.cnt) as prodrnk_max
from custprod p 
inner join custcnt c on c.cust_id = p.cust_id
group by prodset
eufgjt7s

eufgjt7s2#

这似乎适用于我,至少适用于小样本集。

drop table if exists dbo.#CustProdTag
SELECT distinct top 100 percent
    cust_id,
    prodset =   STUFF(
                    (
                        SELECT '%' + cp_grp.prod_id
                        FROM dbo.spk_bkup_cust_prod cp_grp
                        WHERE cp.cust_id = cp_grp.cust_id
                        FOR XML PATH(''), TYPE
                    --  order by cp_grp.prod_id
                        ).value('.', 'NVARCHAR(MAX)'), 1, 1, ''

                )
INTO    dbo.#CustProdTag
FROM    dbo.spk_bkup_cust_prod cp
ORDER BY prodset

------------------------------------------------------------------------

drop table if exists dbo.#ProdCombo
select distinct prodset
into dbo.#ProdCombo
from dbo..#CustProdTag

------------------------------------------------------------------------

SELECT * FROM dbo.#ProdCombo
SELECT * FROM dbo.#CustProdTag  ORDER BY prodset

------------------------------------------------------------------------

SELECT
    pc.prodset,
    COUNT(*)        [RecCnt]
FROM
    dbo.#ProdCombo                  pc
    LEFT JOIN   dbo.#CustProdTag    cpt     ON  pc.prodset like '%' + cpt.prodset + '%'
GROUP BY
    pc.prodset
ORDER BY
    pc.prodset

相关问题