我尝试使用POSTGIS实现内置的kmeans()
函数,如下所示:
create or replace FUNCTION kmeans (number_of_clusters INTEGER) RETURNS TEXT AS
$$
begin
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1)
OVER(ORDER BY data."ID") AS cluster_id from data)
,
filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
)
,
accepted_dbscan as
(
select * from dbscan
where cluster_id in (select cluster_id from filter)
)
,
rejected_dbscan as
(
select * from dbscan
where cluster_id not in (select cluster_id from filter)
)
RETURN 'SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan';
END;
$$ LANGUAGE PLPGSQL;
字符串
这会产生一个错误:
SQL错误[42601]:错误:在“RETURN”处或附近出现语法错误
整个查询是否需要在RETURN
子句中?我还关心是否返回文本或具有不同指定列的表。
**更新:**我也尝试了以下内容:
create or replace FUNCTION kmeans (number_of_clusters INTEGER)
RETURNS TABLE (ID varchar ,
cluster_id int)
LANGUAGE plpgsql AS
$func$
DECLARE
number_of_clusters int;
BEGIN
RETURN QUERY
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1)
OVER(ORDER BY data."ID") AS cluster_id from data)
,
filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
)
,
accepted_dbscan as
(
select * from dbscan
where cluster_id in (select cluster_id from filter)
)
,
rejected_dbscan as
(
select * from dbscan
where cluster_id not in (select cluster_id from filter)
)
SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan
END
$func$;
型
但是,这再次给出以下错误:
SQL Error [42601]: ERROR: syntax error at or near "END"
Position: 1114
型
**第二次更新:**基于Luuk的评论,我将代码更新为:
create or replace FUNCTION kmeans (number_of_clusters INTEGER)
RETURNS TABLE (ID varchar ,
cluster_id int)
LANGUAGE plpgsql AS
$$
DECLARE
number_of_clusters int;
BEGIN
RETURN QUERY
with data as(
select at2."name" as area_type, a.name,bf.* from building_footprints bf
left join area a on bf."areaID" = a."ID"
left join area_type at2 on at2."ID" = a."areaTypeID"
where a."ID" in (select "areaID" from campaign_scope)
and at2."name"='Uc'
)
,
dbscan as(
select data.*,ST_ClusterDBSCAN(st_transform(data."geoJson",24313),eps := 50, minPoints := 1)
OVER(ORDER BY data."ID") AS cluster_id from data)
,
filter as(
select cluster_id,count("ID") from dbscan
group by cluster_id
having count("ID") < 10
)
,
accepted_dbscan as
(
select * from dbscan
where cluster_id in (select cluster_id from filter)
)
,
rejected_dbscan as
(
select * from dbscan
where cluster_id not in (select cluster_id from filter)
)
SELECT "ID", ST_ClusterKMeans(rejected_dbscan."geoJson", ' || number_of_clusters || ') OVER() AS cluster_id FROM rejected_dbscan
end;
$$;
型
但我却犯了个错误
SQL错误[42601]:错误:语法错误在或接近“结束”
1条答案
按热度按时间ca1c2owp1#
错误消息的直接原因是
SELECT
语句末尾缺少分号(;
),正如注解中指出的那样。但你的大部分功能都是噪音或废话。归结为:
字符串
或者用一个简单的SQL函数更简单:
型
假设
building_footprints."ID"
定义为NOT NULL
。进一步阅读: