R:创建矩阵以存储连接结果

ca1c2owp  于 2023-04-03  发布在  其他
关注(0)|答案(1)|浏览(107)

bounty将在2天后过期。回答此问题可获得+50的声誉奖励。stats_noob正在寻找来自声誉良好来源的答案

我在R中有两个shapefile:

我将这些文件导入R:

file_1 <- sf::st_read("C:/Users/me/OneDrive/Documents/folder1/lada000b21a_e.shp", options = "ENCODING=WINDOWS-1252")
file_2 <- sf::st_read("C:/Users/me/OneDrive/Documents/folder2/lpc_000b21a_e.shp", options = "ENCODING=WINDOWS-1252")

file_1 <- st_transform(file_1, crs = "+proj=longlat +datum=WGS84")
file_2 <- st_transform(file_2, crs = "+proj=longlat +datum=WGS84")

> file_1

Simple feature collection with 1507 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -95.15386 ymin: 41.68132 xmax: -74.34347 ymax: 56.05945
CRS:           +proj=longlat +datum=WGS84
First 10 features:
       ADAUID             DGUID LANDAREA PRUID                       geometry
1567 35010001 2021S051635010001 643.4007    35 MULTIPOLYGON (((-74.48809 4...
1568 35010002 2021S051635010002 605.0164    35 MULTIPOLYGON (((-74.55843 4...
1569 35010003 2021S051635010003 515.4641    35 MULTIPOLYGON (((-74.90049 4...

>file_2
> pop_mini
Simple feature collection with 310 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -116.9893 ymin: 41.98072 xmax: -64.09933 ymax: 53.53916
CRS:           +proj=longlat +datum=WGS84
First 10 features:
   PCUID PCPUID         DGUID DGUIDP                 PCNAME PCTYPE PCCLASS LANDAREA PRUID                       geometry
1   0001 350001 2021S05100001   <NA>                  Acton      2       2   7.8376    35 MULTIPOLYGON (((-80.00597 4...
5   0006 350006 2021S05100006   <NA>             Alexandria      4       2   2.0689    35 MULTIPOLYGON (((-74.63831 4...
6   0007 350007 2021S05100007   <NA>                 Alfred      4       2   0.8654    35 MULTIPOLYGON (((-74.87997 4...

**我的问题:**我试图构造一个矩阵,该矩阵显示file_1中的每个多边形被file_2中的多边形覆盖的百分比,以及file_2中的每个多边形被file_1中的多边形覆盖的百分比。

根据我做的一些研究(例如R: Calculating Polygon Overlaps and Intersectionshttps://github.com/r-lib/isoband/issues/6),我首先用这两个文件修复了几何形状:

library(lwgeom)
library(sf)
library(dplyr)

# Repair invalid geometries in file_1
file_1$geometry <- st_make_valid(file_1$geometry)

# Repair invalid geometries in file_2
file_2$geometry <- st_make_valid(file_2$geometry)

然后,我尝试编写一个矩阵循环过程来计算两个文件中成对多边形的覆盖百分比:

# Calculate the number of polygons in each file
n_file_1 <- nrow(file_1)
n_file_2 <- nrow(file_2)
# Use st_intersection to find overlapping polygons
intersections <- st_intersection(file_1, file_2)

# Create a matrix to store coverage percentages
coverage_matrix <- matrix(0, n_file_1, n_file_2)

# Calculate the number of polygons in each file
n_ada <- nrow(file_1)
n_pop <- nrow(file_2)

# Create a matrix to store coverage percentages
coverage_matrix <- matrix(0, n_file_1, n_file_2)

# Calculate coverage percentages for each pair of polygons
for (i in seq_len(n_file_1)) {
    for (j in seq_len(n_file_2)) {
        intersection_area <- st_area(st_intersection(file_1[i,], file_2[j,]))
        if (length(intersection_area) > 0) {
            file_1_area <- st_area(file_1[i,])
            coverage_matrix[i,j] <- 100 * intersection_area / file_1_area
        }
        # Print intermediate results
        cat(paste0("i: ", i, ", j: ", j, ", coverage: ", coverage_matrix[i,j], "\n"))
    }
}

# Set row and column names for the coverage matrix
rownames(coverage_matrix) <- paste0("file_1 ", seq_len(n_file_1))
colnames(coverage_matrix) <- paste0("file_2 ", seq_len(n_file_2))

# Print the coverage matrix
print(coverage_matrix)

代码似乎正在运行:

i: 1, j: 1, coverage: 0
i: 1, j: 2, coverage: 0.349480438105992
i: 1, j: 3, coverage: 0
i: 1, j: 4, coverage: 0

但我不确定我做得是否正确

有人能告诉我怎么做吗?有没有更有效/更快的方法来做这件事?
谢谢!

h5qlskok

h5qlskok1#

我想我已经知道该怎么做了。

# Repair invalid geometries in file_1
file_1$geometry <- st_make_valid(file_1$geometry)

# Repair invalid geometries in file_2
file_2$geometry <- st_make_valid(file_2$geometry)

# Reproject if necessary
file_1 <- st_transform(file_1, "+proj=longlat +datum=WGS84")
file_2 <- st_transform(file_2, "+proj=longlat +datum=WGS84")

# Use st_intersection to find overlapping polygons
intersections <- st_intersection(file_1, file_2)

# Check for invalid features
invalid_features <- st_is_valid(intersections)

# Remove invalid features
intersections <- intersections[invalid_features, ]

# Calculate area of overlapping polygons
area_intersections <- st_area(intersections)

# Calculate area of all polygons in file_1 and file_2
area_file_1 <- st_area(file_1)
area_file_2 <- st_area(file_2)

# Calculate percentage coverage for each pair of polygons
coverage_matrix <- matrix(0, nrow(file_1), nrow(file_2))
for (i in seq_len(nrow(file_1))) {
    for (j in seq_len(nrow(file_2))) {
        # Calculate area of intersection between polygons i and j
        intersection_area <- sum(area_intersections[which(intersections$ADAUID == file_1$ADAUID[i] & 
                                                              intersections$PCUID == file_2$PCUID[j])])
        # Calculate percentage coverage
        coverage_matrix[i, j] <- intersection_area / (area_file_1[i] + area_file_2[j] - intersection_area)

    }
}

# View coverage matrix
coverage_matrix

# Set row and column names for the coverage matrix
rownames(coverage_matrix) <- file_1$ADAUID
colnames(coverage_matrix) <- file_2$PCUID

从这里,我然后处理结果(每个ADAUID有多行,所以我需要对它们进行分组求和):

# Convert matrix to data frame
coverage_df <- as.data.frame.table(coverage_matrix)

# Rename columns
names(coverage_df) <- c("adauid", "pcuid", "coverage_value")

# Filter non-zero coverage values
coverage_df <- coverage_df[coverage_df$coverage_value != 0, c("adauid", "coverage_value")]

coverage_df$coverage_value = coverage_df$coverage_value * 100

这段代码似乎运行得更快-但它是否正确?
**注:**打印选项

# Calculate percentage coverage for each pair of polygons
coverage_matrix <- matrix(0, nrow(file_1), nrow(file_2))
for (i in seq_len(nrow(file_1))) {
    for (j in seq_len(nrow(file_2))) {
        # Calculate area of intersection between polygons i and j
        intersection_area <- sum(area_intersections[which(intersections$ADAUID == file_1$ADAUID[i] & 
                                                              intersections$PCUID == file_2$PCUID[j])])
        # Calculate percentage coverage
        coverage_matrix[i, j] <- intersection_area / (area_file_1[i] + area_file_2[j] - intersection_area)
        # Print the percentage coverage
        print(paste0("Coverage for polygons ", i, " and ", j, ": ", coverage_matrix[i, j]))
    }
}

相关问题