postgresql PLPGSQL中的函数定义

k0pti3hp  于 2023-08-04  发布在  PostgreSQL
关注(0)|答案(1)|浏览(139)

我尝试使用POSTGIS实现内置的kmeans()函数,如下所示:

create or replace FUNCTION kmeans (number_of_clusters INTEGER) RETURNS TEXT AS
$$
begin
    
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1) 
  OVER(ORDER BY data."ID") AS cluster_id from data)
,

filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
) 

,

accepted_dbscan as
(
select * from dbscan 
where cluster_id in (select cluster_id from filter)

)
,

rejected_dbscan as 
(
select * from dbscan 
where cluster_id not in (select cluster_id from filter)
)

    
RETURN 'SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan';
END;
$$ LANGUAGE PLPGSQL;

字符串
这会产生一个错误:
SQL错误[42601]:错误:在“RETURN”处或附近出现语法错误
整个查询是否需要在RETURN子句中?我还关心是否返回文本或具有不同指定列的表。

**更新:**我也尝试了以下内容:

create or replace FUNCTION kmeans (number_of_clusters INTEGER) 

RETURNS TABLE (ID varchar ,
cluster_id int)

LANGUAGE plpgsql AS  
$func$
DECLARE
number_of_clusters int;
BEGIN

RETURN QUERY    
    
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1) 
  OVER(ORDER BY data."ID") AS cluster_id from data)
,

filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
) 

,

accepted_dbscan as
(
select * from dbscan 
where cluster_id in (select cluster_id from filter)

)
,

rejected_dbscan as 
(
select * from dbscan 
where cluster_id not in (select cluster_id from filter)
)

SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan
    
END
$func$;


但是,这再次给出以下错误:

SQL Error [42601]: ERROR: syntax error at or near "END"
  Position: 1114

**第二次更新:**基于Luuk的评论,我将代码更新为:

create or replace FUNCTION kmeans (number_of_clusters INTEGER) 

RETURNS TABLE (ID varchar ,
cluster_id int)

LANGUAGE plpgsql AS  
$$
DECLARE
number_of_clusters int;
BEGIN

RETURN QUERY    
    
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1) 
  OVER(ORDER BY data."ID") AS cluster_id from data)
,

filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
) 

,

accepted_dbscan as
(
select * from dbscan 
where cluster_id in (select cluster_id from filter)

)
,

rejected_dbscan as 
(
select * from dbscan 
where cluster_id not in (select cluster_id from filter)
)

SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan
    
end;
$$;


但我却犯了个错误
SQL错误[42601]:错误:语法错误在或接近“结束”

ca1c2owp

ca1c2owp1#

错误消息的直接原因是SELECT语句末尾缺少分号(;),正如注解中指出的那样。
但你的大部分功能都是噪音或废话。归结为:

CREATE OR REPLACE FUNCTION kmeans(_number_of_clusters integer)
  RETURNS TABLE (id varchar, cluster_id int)
  LANGUAGE plpgsql AS
$func$
BEGIN
   RETURN QUERY
   WITH data AS (
      SELECT bf."ID", bf."geoJson"
           , st_clusterdbscan(st_transform(bf."geoJson", 24313), eps => 50, minpoints => 1) OVER (ORDER BY bf."ID") AS cluster_id
      FROM   building_footprints bf
      JOIN   area      a   ON a."ID" = bf."areaID"               -- LEFT JOIN was nonsense
      JOIN   area_type at2 ON at2."ID" = a."areaTypeID"
      WHERE  a."ID" IN (SELECT "areaID" FROM campaign_scope)
      AND    at2.name = 'Uc'
      )
   SELECT "ID"
        , st_clusterkmeans(d."geoJson", ' || _number_of_clusters || ') OVER ()  -- AS cluster_id  -- noise
   FROM  (
      SELECT d1.cluster_id
      FROM   data d1
      GROUP  BY d1.cluster_id
      HAVING count(*) >= 10
      ) filter
   JOIN   data d USING (cluster_id);
END
$func$;

字符串
或者用一个简单的SQL函数更简单:

CREATE OR REPLACE FUNCTION kmeans(_number_of_clusters integer)
  RETURNS TABLE (id varchar, cluster_id int)
  LANGUAGE sql AS
$func$
WITH data AS (
   SELECT bf."ID", bf."geoJson"
        , st_clusterdbscan(st_transform(bf."geoJson", 24313), eps => 50, minpoints => 1) OVER (ORDER BY bf."ID") AS cluster_id
   FROM   building_footprints bf
   JOIN   area      a   ON a."ID" = bf."areaID"
   JOIN   area_type at2 ON at2."ID" = a."areaTypeID"
   WHERE  a."ID" IN (SELECT "areaID" FROM campaign_scope)
   AND    at2.name = 'Uc'
   )
SELECT d."ID", st_clusterkmeans(d."geoJson", ' || _number_of_clusters || ') OVER()  -- AS cluster_id   -- noise
FROM  (
   SELECT d1.cluster_id
   FROM   data d1
   GROUP  BY 1
   HAVING count(*) >= 10
   ) filter
JOIN   data d USING (cluster_id);
$func$;


假设building_footprints."ID"定义为NOT NULL
进一步阅读:

  • 带WHERE条件的Postgres LEFT JOIN
  • 被遗忘的赋值运算符“=”和常见的“:=”

相关问题