在ggplot2(R)中创建具有多个“结”的分段回归

velaa5lx  于 2023-07-31  发布在  其他
关注(0)|答案(1)|浏览(87)

我试图使用ggplot2包(R)中的非线性数据进行分段回归。我已经编写了代码,成功地创建了一个,但似乎不能得到我想要的。下图是一个CDF,有两个不同的增长,我试图客观地分配这些增长开始和结束的日期。不幸的是,我的代码只能识别其中一个增量,并且忽略了发送增量,除非我在创建新的 Dataframe 后单独绘制它。下面是代码和图表:

library(segmented)
attach(dai15E_flux_split)
y<- dai15E_flux_split$CDF15E
x<- dai15E_flux_split$Dates
pw_reg_15E<- data.frame(x = x, y = y)
out.lm <- lm(y ~ x, data = pw_reg_15E)
o <- segmented(out.lm, seg.Z = ~x, psi = list(x = c(as.POSIXct('2020-12-20 18:00:00'),as.POSIXct('2021-01-23 18:00:00'))),
               control = seg.control(display = FALSE)
)
dat2 = data.frame(x = x, y = broken.line(o)$fit)
w<- ggplot(pw_reg_15E, aes(x = x, y = y)) +
  geom_point() +
  geom_line(data = dat2, color = 'blue', linewidth = 2) +
  #geom_line(data = dat3, color = 'blue') +
  scale_x_datetime(date_breaks = '5 days', date_labels = '%b-%d') +
  scale_y_continuous(breaks = ~ seq(0, max(.x), .1)) +
  annotate('segment', x= as.POSIXct("2021-01-29"), xend= as.POSIXct("2021-01-29"), yend=-Inf, y=Inf, colour = "black", alpha = 1) +#creates the continuous density function plot for Dai15
  geom_vline(xintercept = (dai15E_flux_split$Dates[dai15E_flux_split$peak_status == "Peak"]), color = "black") +
  geom_point(size = 4) + theme_bw() + #ylab(~paste('CDF', Phi)) +
  labs(x = "Date")
plot(w)

字符串
它产生:

的数据
如果我在第二个断点后的某个日期切断数据,我能够检测到第二次增加的开始,但不能检测到结束(假设这可能是由于数据的性质所致):

我确实试过为一个图创建两条独立的线段,但没有任何结果,我做错了什么吗?最终我想做的是这样的(请原谅这张图的粗糙,我用mspaint画了这条线):

复制数据:

structure(list(x = structure(c(1606820400, 1606906800, 1606993200, 
1607079600, 1607166000, 1607252400, 1607338800, 1607425200, 1607511600, 
1607598000, 1607684400, 1607770800, 1607857200, 1607943600, 1608030000, 
1608116400, 1608202800, 1608289200, 1608375600, 1608462000, 1608548400, 
1608634800, 1608721200, 1608807600, 1608894000, 1608980400, 1609066800, 
1609153200, 1609239600, 1609326000, 1609412400, 1609498800, 1609585200, 
1609671600, 1609758000, 1609844400, 1609930800, 1610017200, 1610103600, 
1610190000, 1610276400, 1610362800, 1610449200, 1610535600, 1610622000, 
1610708400, 1610794800, 1610881200, 1610967600, 1611054000, 1611140400, 
1611226800, 1611313200, 1611399600, 1611486000, 1611572400, 1611658800, 
1611745200, 1611831600), class = c("POSIXct", "POSIXt"), tzone = "Asia/Bangkok"), 
    y = c(0.0127654103696743, 0.0250904882020871, 0.0370714866508862, 
    0.0468027880724444, 0.0536991190985604, 0.0626795693063142, 
    0.0708962963854844, 0.0772446662188717, 0.0842076373362264, 
    0.090949381870277, 0.0970952248012049, 0.104371307628584, 
    0.110358027715881, 0.115450773819791, 0.122580977690793, 
    0.134406168897197, 0.146368284831593, 0.158716595955345, 
    0.170651213164138, 0.190479315670677, 0.224094027748295, 
    0.280359267223023, 0.337800104800282, 0.406506532914733, 
    0.491550665708546, 0.548897965427954, 0.577827044961975, 
    0.596475967835595, 0.605583211717681, 0.611728728447968, 
    0.616878285109383, 0.623334778422036, 0.631319388073111, 
    0.641552889847983, 0.650051719012484, 0.659696487319351, 
    0.666246012002399, 0.672683290607584, 0.677061608197219, 
    0.68231817736804, 0.687772229223841, 0.696619586579348, 0.710702270394893, 
    0.718733992834477, 0.728995458593268, 0.744779674223926, 
    0.763218116035974, 0.790875495532422, 0.825075587533513, 
    0.846944735886294, 0.869832760922987, 0.892032203445386, 
    0.919806428133041, 0.943454538344951, 0.963248306979268, 
    0.977788349576517, 0.986129869261138, 0.993916256029506, 
    1)), row.names = c(NA, 59L), class = "data.frame")

vltsax25

vltsax251#

如果你想抓住两个增长,你至少需要三个断点。这里有一个4的尝试:

library(segmented)
out.lm <- lm(y ~ x, data = pw_reg_15E)
br <- as.POSIXct(c('2020-12-20 18:00:00','2020-12-28 18:00:00', '2021-01-15 18:00:00', '2021-01-23 18:00:00'))
o <- segmented(out.lm, seg.Z = ~x, psi = list(x = br))
dat2 = data.frame(x = pw_reg_15E$x, y = predict(o))

ggplot(pw_reg_15E, aes(x = x, y = y)) +
  geom_point() +
  geom_line(data = dat2, col = 'firebrick') +
  geom_vline(xintercept = o$psi[, 'Est.'])

字符串


的数据

> as.POSIXct(o$psi[, 'Est.'])
                   psi1.x                    psi2.x                    psi3.x                    psi4.x 
"2020-12-20 13:14:11 PST" "2020-12-26 15:35:47 PST" "2021-01-14 12:22:15 PST" "2021-01-23 21:34:05 PST"

相关问题