postgresql Postgres：用一组多个条件连接两个表

tjvv9vkg 于 2023-02-04 发布在 PostgreSQL

关注(0)|答案(1)|浏览(229)

我有一个类似

的表
创建相同表的查询：

with t as (
    select *
    from (
        (
            values ('james', '801xxxxxxx', 'james@gmail.com', 'ca', 'a'),
            ('robert', '714xxxxxxx', '', 'ca', 'a'),
            ('william', '', 'william@gmail.com', '', 'a'),
            ('maria', '1234567890', 'maria@gmail.com', '', 'a'),
            ('richard', '', 'richard@gmail.com', '', 'a'),
            ('', '', 'james@gmail.com', '', 'b'),
            ('maria', '1234567890', '', '', 'b'),
            ('robert', '', '', 'ca', 'b')
        )
    ) t (first_name, phone, email, state, "type")
), a_t as (
    select *
    from t
    where "type" = 'a'
), b_t as (
    select *
    from t
    where "type" = 'b'
)
select *
from t

如何使用三组规则删除不同type之间的任何重复项。规则是在以下情况下删除重复项：

email匹配
phone和first_name同时匹配
state和first_name同时匹配。
结果应类似于

我尝试过不同的方法，最新的方法是创建两个表a_t和b_t，其中a_t是type等于a的表，b_t是type等于b的表，然后尝试使用不同类型的连接来合并这两个表。
[EDIT 1]
为了听从@topsail的建议，我试了下面这些网站和其他一些网站，没有一个不符合我的要求。

with t as (
select *
    from (
        (
            values ('james', '801xxxxxxx', 'james@gmail.com', 'ca', 'a'),
            ('robert', '714xxxxxxx', '', 'ca', 'a'),
            ('william', '', 'william@gmail.com', '', 'a'),
            ('maria', '1234567890', 'maria@gmail.com', '', 'a'),
            ('richard', '', 'richard@gmail.com', '', 'a'),
            ('', '', 'james@gmail.com', '', 'b'),
            ('maria', '1234567890', '', '', 'b'),
            ('robert', '', '', 'ca', 'b')
        )
    ) t (first_name, phone, email, state, "type")
),
dedupe_one as (
    select distinct on (email)
        first_name, phone, email, state, "type"
    from t
),
dedupe_two as (
    select distinct on (phone, first_name)
        first_name, phone, email, state, "type"
    from t
),
dedupe_three as (
    select distinct on (state, first_name)
        first_name, phone, email, state, "type"
    from t
),
dedupe_four as (
    select distinct on (email) *
    from t
    union
    select distinct on (phone, first_name) *
    from t
    union
    select distinct on (state, first_name) *
    from t
),
dedupe_five as (
    select distinct on (email) *
    from (
        select distinct on (phone, first_name) *
        from (
            select distinct on (state, first_name) *
            from t
        ) foo2
    ) foo
)
select *
from dedupe_five

postgresql

来源：https://stackoverflow.com/questions/75304467/postgres-join-two-tables-with-a-set-of-multiple-conditions

1条答案

按热度按时间

rjjhvcjd1#

我终于想出了解决办法。
我首先给每一行指定id，然后按照@topsail的建议，分别根据email、phones和first_names以及states和first_names删除重复项。此时，我有三个表。之后，如果在三个删除重复项的表中找到了原始表的行，我就在原始表中做一个标记。然后，如果原始表中有任何类型的标记，我就输出原始表中的任何行。
如果您看到每一步的输出，这将是有意义的。
下面是整个查询：

with t as (
select *
    from (
        (
            values ('james', '801xxxxxxx', 'james@gmail.com', 'ca', 'a', 1),
            ('robert', '714xxxxxxx', '', 'ca', 'a', 2),
            ('william', '', 'william@gmail.com', '', 'a', 3),
            ('maria', '1234567890', 'maria@gmail.com', '', 'a', 4),
            ('richard', '', 'richard@gmail.com', '', 'a', 5),
            ('', '', 'james@gmail.com', '', 'b', 1),
            ('maria', '1234567890', '', '', 'b', 2),
            ('robert', '', '', 'ca', 'b', 3)
        )
    ) t (first_name, phone, email, state, "type", id)
),
dedupe_one as (
    select first_name, phone, email, state, type, id
    from (
        select *, ROW_NUMBER() OVER (PARTITION BY email order by "type" asc) rn
        from t
        where email is not null and email != ''
    ) foo
    where rn = 1
),
dedupe_two as (
    select first_name, phone, email, state, type, id
    from (
        select *, ROW_NUMBER() OVER (PARTITION BY phone, first_name order by "type" asc) rn
        from t
        where phone is not null and phone != '' and first_name is not null and first_name != ''
    ) foo
    where rn = 1
),
dedupe_three as (
    select first_name, phone, email, state, type, id
    from (
        select *, ROW_NUMBER() OVER (PARTITION BY state, first_name order by "type" asc) rn
        from t
        where state is not null and state != '' and first_name is not null and first_name != ''
    ) foo
    where rn = 1
),
marked_table as (
    select t.*,
        exists (
            select 1
            from dedupe_one
            where t.id=dedupe_one.id and t."type"=dedupe_one."type"
        ) as mark_one,
        exists (
            select 1
            from dedupe_two
            where t.id=dedupe_two.id and t."type"=dedupe_two."type"
        ) as mark_two,
        exists (
            select 1
            from dedupe_three
            where t.id=dedupe_three.id and t."type"=dedupe_three."type"
        ) as mark_three
    from t
),
final_table as (
    select *
    from marked_table
    where mark_one = true or mark_two = true or mark_three = True
)
select *
from final_table

dedupe_one：