Settings

library(tidyverse)
library(multidplyr)
library(zoo)
library(slider)
library(pROC)

plot_theme <- theme_bw() 

cores <- parallel::detectCores() - 4
cluster <- new_cluster(cores)

window <- 7 ## Window of days to calculate rolling values
vars <- c("RD", "VPD", "T", "SOIL", "AWSSM", "RAIN", "AET", "D", "GDD")

region_name <- "Southern Rockies"  ## Set region name, should be same as L3 Ecoregion name

settings <- tribble(
    ~region, ~params,
    "Southern Rockies", list(data_dir =  "/home/steve/OneDrive/nothern-rockies-dryness",
                           out_dir =  "/media/steve/storage/waterbalance/fire/out/ap23/",
                           master_csv =  "SR_fire_points_extracted.csv"
                           ),
    "Middle Rockies", list(data_dir =  "/home/steve/OneDrive/nothern-rockies-dryness",
                           out_dir =  "/media/steve/storage/waterbalance/fire/out/",
                           master_csv =  "MR_fire_points_extracted.csv"
                           ),
)


settings <- settings %>% filter(region == region_name) 

## Directory containing metadata file
data_dir <- settings$params[[1]]$data_dir

## Directory with csv files from merge-csv.R
out_dir <- settings$params[[1]]$out_dir

master_csv <- settings$params[[1]]$master_csv

Southern Rockies

Preprocessing

  1. Get MTBS database
  • Extract fire polys by location of interest (Ecoregion)
  • Create centroids for each poly (Pole of inaccessability in QGIS)
  1. Get LANDFIRE EVT (Existing Vegetation Type data)
  • Use statistics tool in QGIS to get mean, median, and mode EVT by pixel count. Only mode is used going further.
  1. Create master csv file with points of interest. Needs columns ‘Event_ID’, ‘Incid_Type’, ‘Ig_Date’, ‘x’ (=Longitude), ‘y’ (=Latitiude), and ‘LF20_EVT_5’ (Landfire 2020 EVT type). Further analysis can look at finer resolution LANDFIRE vegetation types offered.
  2. Get Historical WB data for each centroid from Mike
  3. Get Historical gridmet climate data for Each centroid
  • gridmet-historical.R - download gridmet data for each point
    • pr, tmmx, tmmn, rmax, rmin, vpd
    • creates one dir ‘gridmet-$var’ for each variable
  • merge-csv.R
    • join Mike’s data and historical gridmet data into one csv for each Event_ID
    • use these csvs for input to this script

Master dataframe

Read in file containing meta data for fires of interest.

setwd(data_dir)

master <- read_csv(master_csv) %>%
    filter(Incid_Type == "Wildfire") %>%
##    filter(year(Ig_Date) <= 2014) %>%
    rename("maj_veg_cl" = LF20_EVT_5,
           "Acres" = BurnBndAc) %>%
    ##  maj_class is the majority
    ##    of pixels classification from Landfire 30m pixels in burned
    ##    areas, this data was extracted in QGIS.
    mutate(maj_veg_cl = case_match(maj_veg_cl,
                                   c("Herb", "Shrub", "Sparse") ~ "non-forest",
                                   "Tree" ~ "forest",
                                   c("Agriculture", "Water") ~ "other"),
           filename = paste0(Event_ID, ".csv"),
           doy = yday(Ig_Date))
## Rows: 290 Columns: 46
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (18): Event_ID, irwinID, Incid_Name, Incid_Type, Map_Prog, Asmnt_Type, Pre_ID, Post_ID, Comment, LF20_EVT_2, LF20_EVT_4, LF20_EVT_...
## dbl  (27): Map_ID, BurnBndAc, BurnBndLat, BurnBndLon, Perim_ID, dNBR_offst, dNBR_stdDv, NoData_T, IncGreen_T, Low_T, Mod_T, High_T, _EV...
## date  (1): Ig_Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Map

library(sp)
library(sf)

coords <- data.frame(
    x = master$x,
    y = master$y
)
spdf <- SpatialPointsDataFrame(coords = coords, data = master)

ecoregions <- st_transform(read_sf("/home/steve/OneDrive/nothern-rockies-dryness/data/us_eco_l3/us_eco_l3.shp"), crs = 4326)
ecoregion <- ecoregions %>% filter(US_L3NAME == region_name)

ggplot(ecoregion) +
    geom_sf() +
    geom_point(data = master, aes(x, y, color = maj_veg_cl))

summary_tbl <- master %>% group_by(maj_veg_cl) %>% summarise(n = n())
knitr::kable(summary_tbl)
maj_veg_cl n
forest 153
non-forest 86

Determine Fire Season

forest<-subset(master, maj_veg_cl=="forest")
forest_q<-quantile(forest$doy,c(0,1))#90% of data fall within this range 0.05-0.95# 0.27 to 0.75 or 0,1 to determine min max

nonforest<-subset(master, maj_veg_cl=="non-forest")
nonforest_q<-quantile(nonforest$doy,c(0,1))#90% of data fall within this range 0.05-0.95# 0.27 to 0.75 or 0,1 to determine min

#start and end dates of fire season by vegetation type derived from day of year in min_max table
doy_min_max<-as.data.frame(rbind(forest_q, nonforest_q))
forest_start<-doy_min_max[1,1]
nonforest_start<-doy_min_max[2,1]
forest_end<-doy_min_max[1,2]
nonforest_end<-doy_min_max[2,2]

knitr::kable(doy_min_max)
0% 100%
forest_q 79 303
nonforest_q 7 301

Compute Percentiles

cluster_copy(cluster, "master")
cluster_copy(cluster, "window")
cluster_copy(cluster, "vars")
cluster_copy(cluster, "forest_start")
cluster_copy(cluster, "nonforest_start")
cluster_copy(cluster, "forest_end")
cluster_copy(cluster, "nonforest_end")
cluster_library(cluster, 'tidyverse')
cluster_library(cluster, 'slider')
cluster_library(cluster, 'zoo')


files <- paste0(out_dir, master$filename)
cluster_assign_partition(cluster, files = files)
cluster_send(cluster, wbdata <- vroom::vroom(files, id = "PATH"))
wbdata <- party_df(cluster, "wbdata")
master <- party_df(cluster, "master")

wbdata <- wbdata %>%
    mutate(Event_ID = str_extract(PATH, '(?<=/)[^/]*(?=\\.[^./]+$)')) %>% ## Extract file name without extension, use as Event_ID
    group_by(Event_ID) %>%
    mutate(RD = 100 - RH,
           WHC = max(SOIL), ## Guestimate soil WHC from max soil moisture observed
           AWSSM = WHC - SOIL) %>%
    left_join(select(master,
                     Event_ID, Ig_Date, maj_veg_cl, Acres),
              by = "Event_ID") %>%
    mutate(fire = if_else(Date == Ig_Date, 1, 0)) 

start <- proc.time() # Start clock
wbdata_smoothed <- select(wbdata, c(Event_ID, Ig_Date, maj_veg_cl, Acres, fire, Date, vars)) %>%
    ## mutate_at(vars[1:5],
    ##           ~ slide_dbl(.x, mean, .before = window, .after = 0, .complete = TRUE)) %>%
    ## mutate_at(vars[6:9],
    ##           ~ slide_dbl(.x, sum, .before = window, .after = 0, .complete = TRUE)) %>%
    mutate_at(vars[1:5],
              ~ rollapply(.x, window, mean, by = 1, partial = FALSE, fill = NA, align = "right")) %>%
    mutate_at(vars[6:9],
              ~ rollapply(.x, window, sum, by = 1, partial = FALSE, fill = NA, align = "right")) 
time_elapsed_parallel <- proc.time() - start # End clock

wbdata_percentiles_forest <- select(wbdata_smoothed, c(Event_ID, Ig_Date, maj_veg_cl, Acres, fire, Date, vars)) %>%
    filter(maj_veg_cl == "forest") %>%
    filter(Date <= Ig_Date,
           yday(Date) >= forest_start, ## Filter to fire season
           yday(Date) <= forest_end) %>%  ## To get this to match results from original dryness_script2, we only filter beginning of season.  Original script also didn't use doy, instead it filtered by absolute date, so would only chop off first pre-fireseason from first year in record.
##    slice(window + 1):n()) %>%
    collect() %>%
    drop_na(vars) %>% ### Drop NAs from periods not included in rolling calcs
    mutate(across(vars,
                  percent_rank))

wbdata_percentiles_nonforest <- select(wbdata_smoothed, c(Event_ID, Ig_Date, maj_veg_cl, Acres, fire, Date, vars)) %>%
    filter(maj_veg_cl == "non-forest") %>%
    filter(Date <= Ig_Date,
           yday(Date) >= nonforest_start,  ## Filter to fire season
           yday(Date) <= nonforest_end) %>%
    ##        slice((window + 1):n()) %>%
    collect() %>%
    drop_na(vars) %>% ### Drop NAs from periods not included in rolling calcs
    mutate(across(vars,
                  percent_rank)) 

ignitions_forest <- wbdata_percentiles_forest %>% filter(fire == 1)
nonignitions_forest <- wbdata_percentiles_forest %>% filter(fire == 0)
ignitions_forest_long <- ignitions_forest %>% pivot_longer(cols = vars)
ignitions_nonforest <- wbdata_percentiles_nonforest %>% filter(fire == 1)
nonignitions_nonforest <- wbdata_percentiles_nonforest %>% filter(fire == 0)
ignitions_nonforest_long <- ignitions_nonforest %>% pivot_longer(cols = vars)
ignitions_long <- rbind(ignitions_forest_long, ignitions_nonforest_long)

Corrplot

library(GGally)

ggcorr(wbdata_percentiles_forest, method = c("everything", "pearson")) 
## Warning in ggcorr(wbdata_percentiles_forest, method = c("everything", "pearson")): data in column(s) 'Event_ID', 'Ig_Date', 'maj_veg_cl',
## 'Date' are not numeric and were ignored

ROC Analysis

demonstrate histogram of conditions associated with fire or no fire to see how much overlap there is. This helps interpret the ROC

Determine contribution of various parameters to burned area

## $gfs
##   [1] 0.000000000 0.004206400 0.004097772 0.003898594 0.004268763 0.017098843 0.017131174 0.025847891 0.016510817 0.004853614 0.004685425
##  [12] 0.004962250 0.017098907 0.036611296 0.025895228 0.056579715 0.004688774 0.004688236 0.019834070 0.023288471 0.025853199 0.018152024
##  [23] 0.004806504 0.017106640 0.035727935 0.025888029 0.052821543 0.019847674 0.022849704 0.025847999 0.017962671 0.024040915 0.041937305
##  [34] 0.022929151 0.027977260 0.017299912 0.027200537 0.005312426 0.005512104 0.023891302 0.048884294 0.025903205 0.056848766 0.005417832
##  [45] 0.017620372 0.036617671 0.025918550 0.056579781 0.023711364 0.048578836 0.025924040 0.056812370 0.053900204 0.050067539 0.071697248
##  [56] 0.041168870 0.058709729 0.058131580 0.005341880 0.024249534 0.047146302 0.025893462 0.053173720 0.019848146 0.023824108 0.026784996
##  [67] 0.018871023 0.025810496 0.042123669 0.023022703 0.032025454 0.023665352 0.028372318 0.024058093 0.046841305 0.025911732 0.053129754
##  [78] 0.052335146 0.049701930 0.067015217 0.040005827 0.054481489 0.053894768 0.025659182 0.042106262 0.023002510 0.031614372 0.023196298
##  [89] 0.028194641 0.042355054 0.024075098 0.043881638 0.028221041 0.005952113 0.024352625 0.048903638 0.025926268 0.056848844 0.023967560
## [100] 0.048934552 0.026819362 0.057014283 0.056898744 0.055400132 0.073342409 0.050807335 0.062697743 0.058368782 0.024168131 0.048596437
## [111] 0.025946972 0.056812457 0.053905300 0.050186033 0.071697912 0.041225649 0.058716585 0.058180190 0.057040727 0.055199173 0.073191823
## [122] 0.050457868 0.062538374 0.058345462 0.064147129 0.075013546 0.071701809 0.060373349 0.024335764 0.047207860 0.026810490 0.053355755
## [133] 0.054951914 0.054534581 0.068283490 0.048959975 0.058287492 0.054221642 0.025985390 0.042219075 0.023166982 0.032811864 0.024141849
## [144] 0.029302946 0.042422837 0.026252140 0.044622565 0.033805450 0.055073945 0.054356713 0.068167085 0.048613843 0.058117618 0.054190034
## [155] 0.061894674 0.069626290 0.067093583 0.055580619 0.042438512 0.026087289 0.044626865 0.033310501 0.045817924 0.024435841 0.048954311
## [166] 0.026849782 0.057014298 0.056901219 0.055529985 0.073343118 0.050918251 0.062734030 0.058416754 0.057100958 0.055553869 0.073713244
## [177] 0.050930139 0.062755834 0.058442185 0.064414022 0.075013880 0.073405212 0.064322284 0.057043107 0.055330876 0.073192452 0.050563061
## [188] 0.062573025 0.058394010 0.064325072 0.075021185 0.071702006 0.060391247 0.064472494 0.075016871 0.073251460 0.064228620 0.075018334
## [199] 0.055124566 0.054669226 0.068572416 0.049100147 0.058363603 0.054319382 0.062088124 0.069626349 0.068488427 0.059307905 0.042482322
## [210] 0.026386883 0.044627017 0.034458193 0.045847422 0.062133427 0.069631796 0.068366561 0.059190168 0.069725042 0.045843940 0.057102832
## [221] 0.055679677 0.073714470 0.051045634 0.062792736 0.058486518 0.064596821 0.075021353 0.073405550 0.064322806 0.064624775 0.075380260
## [232] 0.073768574 0.064331795 0.075019385 0.064654886 0.075025147 0.073251815 0.064229559 0.075028588 0.075020351 0.062260401 0.069907925
## [243] 0.068765359 0.059334334 0.069727777 0.045852348 0.069725265 0.064802141 0.075386334 0.073768626 0.064332207 0.075029465 0.075383734
## [254] 0.075030841 0.070001217 0.075391690
## 
## $IJ
##                 I            J       Total
## T     0.010718973 -0.006512573 0.004206400
## SOIL  0.001643852  0.002453921 0.004097772
## GDD   0.007878400 -0.003979806 0.003898594
## AWSSM 0.001604702  0.002664061 0.004268763
## AET   0.013901699  0.003197144 0.017098843
## D     0.011076152  0.006055023 0.017131174
## RD    0.010955235  0.014892656 0.025847891
## VPD   0.017612677 -0.001101861 0.016510817
## 
## $I.perc
##       ind.exp.var
## T       14.217711
## SOIL     2.180415
## GDD     10.449958
## AWSSM    2.128487
## AET     18.439299
## D       14.691476
## RD      14.531092
## VPD     23.361563
## 
## $params
## $params$full.model
## [1] "y ~ T + SOIL + GDD + AWSSM + AET + D + RD + VPD"
## 
## $params$family
## [1] "gaussian"
## 
## $params$link
## [1] "default"
## 
## $params$gof
## [1] "Rsqu"

## $gfs
##   [1] 0.0000000000 0.0002694873 0.0283085039 0.0001250685 0.0282655752 0.0412409115 0.0121510727 0.0326333061 0.0053217738 0.0538618776
##  [11] 0.0199517001 0.0538952885 0.0419936644 0.0470348030 0.0346215093 0.0261263118 0.0520821724 0.0284085769 0.0533905990 0.0311014077
##  [21] 0.0473405182 0.0300294210 0.0521167137 0.0423102853 0.0436982878 0.0341853954 0.0233172920 0.0534784400 0.0310910408 0.0472239026
##  [31] 0.0300134369 0.0501277230 0.0571239323 0.0489917699 0.0333833211 0.0163069949 0.0340859244 0.0668872145 0.0539047594 0.0607316391
##  [41] 0.0596463582 0.0711491994 0.0666647220 0.0668379989 0.0612491188 0.0651795151 0.0534045388 0.0461934214 0.0608691272 0.0596170663
##  [51] 0.0710119364 0.0665811954 0.0611413833 0.0571272587 0.0569292124 0.0512578094 0.0471891210 0.0347163424 0.0521277819 0.0595564194
##  [61] 0.0569477925 0.0694102094 0.0633539998 0.0545785816 0.0311015804 0.0489279800 0.0300347801 0.0533915856 0.0643551135 0.0536869216
##  [71] 0.0635966236 0.0312586613 0.0655896728 0.0596934106 0.0569249297 0.0692759014 0.0632829104 0.0590721519 0.0571364201 0.0552055336
##  [81] 0.0486103585 0.0437582218 0.0342002281 0.0534784952 0.0643625443 0.0537585874 0.0633873233 0.0312431074 0.0654392217 0.0586423088
##  [91] 0.0501408299 0.0572548620 0.0506453135 0.0669225714 0.0753459443 0.0741472742 0.0845404883 0.0806223338 0.0615821792 0.0596725407
## [101] 0.0716466631 0.0668127054 0.0656963731 0.0738569018 0.0719695717 0.0712800830 0.0671287987 0.0712004780 0.0754188364 0.0740644795
## [111] 0.0843424817 0.0804676005 0.0794440738 0.0758448893 0.0765333845 0.0693712385 0.0654859659 0.0536480332 0.0657448040 0.0738282478
## [121] 0.0720023823 0.0711352183 0.0670381765 0.0710644304 0.0641993954 0.0623932233 0.0583672156 0.0537367986 0.0604583677 0.0569607175
## [131] 0.0699022862 0.0634575172 0.0636467445 0.0725102619 0.0692757652 0.0697836458 0.0638949024 0.0694357203 0.0545843805 0.0643734073
## [141] 0.0547724240 0.0643329546 0.0312598809 0.0663579122 0.0694507134 0.0545510837 0.0696013337 0.0660458567 0.0637026093 0.0724880495
## [151] 0.0693203786 0.0696359615 0.0638178121 0.0693003245 0.0626547932 0.0600876466 0.0576075690 0.0523446790 0.0694360814 0.0546617438
## [161] 0.0695764645 0.0658761388 0.0618018378 0.0756720692 0.0745498354 0.0858210019 0.0813040201 0.0816644087 0.0882714703 0.0873216462
## [171] 0.0845411654 0.0807634746 0.0846925137 0.0659833984 0.0738854868 0.0720439261 0.0717684314 0.0672575598 0.0716895792 0.0738837217
## [181] 0.0724201280 0.0741991829 0.0715825851 0.0816704167 0.0882032779 0.0873042209 0.0843425702 0.0805990077 0.0844962558 0.0824691646
## [191] 0.0811026293 0.0775372829 0.0711594281 0.0738532895 0.0724621306 0.0741796969 0.0714332349 0.0642902914 0.0640207120 0.0725272420
## [201] 0.0694099501 0.0702653884 0.0639815752 0.0699369253 0.0726712117 0.0697909934 0.0725531651 0.0698521258 0.0694539645 0.0556903041
## [211] 0.0696357455 0.0667634099 0.0700632754 0.0726447428 0.0698477585 0.0725343557 0.0697026640 0.0630421159 0.0700369315 0.0816758892
## [221] 0.0885532892 0.0873359163 0.0858210494 0.0814169970 0.0859538685 0.0883221284 0.0874430889 0.0889485730 0.0847952965 0.0739136322
## [231] 0.0725094470 0.0742108630 0.0720359726 0.0745922169 0.0882586862 0.0874254446 0.0888924292 0.0845874662 0.0824704304 0.0745734172
## [241] 0.0726914157 0.0699454857 0.0725647128 0.0703151437 0.0729790495 0.0700865710 0.0729609385 0.0886007020 0.0874537140 0.0891518027
## [251] 0.0860223163 0.0890485867 0.0745983327 0.0889888569 0.0729848621 0.0892369058
## 
## $IJ
##                 I             J        Total
## T     0.014085541 -0.0138160536 0.0002694873
## SOIL  0.011407490  0.0169010142 0.0283085039
## GDD   0.012513550 -0.0123884817 0.0001250685
## AWSSM 0.011362898  0.0169026775 0.0282655752
## AET   0.015800311  0.0254406007 0.0412409115
## D     0.006086557  0.0060645159 0.0121510727
## RD    0.013541676  0.0190916300 0.0326333061
## VPD   0.004438884  0.0008828901 0.0053217738
## 
## $I.perc
##       ind.exp.var
## T       15.784434
## SOIL    12.783377
## GDD     14.022842
## AWSSM   12.733406
## AET     17.706027
## D        6.820672
## RD      15.174973
## VPD      4.974269
## 
## $params
## $params$full.model
## [1] "y ~ T + SOIL + GDD + AWSSM + AET + D + RD + VPD"
## 
## $params$family
## [1] "gaussian"
## 
## $params$link
## [1] "default"
## 
## $params$gof
## [1] "Rsqu"

Plots of wb percentile vs. area burned show weak relationships

ECDF

Fire Danger Model

regression_formulas <- tribble(
    ~cover, ~var, ~order, ~formula, ~r2
)

precision <- 7

regression1_formula <- function(a, b) {
    ## Regression formula in string format
    a <- round(a, precision)
    b <- round(b, precision)
    return (paste0("y = ", a, "e^(", b, "x)"))
}

regression2_formula <- function(b0, b1, b2) {
    ## Regression formula in string format
    b0 <- round(b0, precision)
    b1 <- round(b1, precision)
    b2 <- round(b2, precision)
    return (paste0("y = e^(", b0, " + ", b1, "x + ", b2, "x^2)"))
}

regression3_formula <- function(b0, b1, b2, b3) {
    ## Regression formula in string format
    b0 <- round(b0, precision)
    b1 <- round(b1, precision)
    b2 <- round(b2, precision)
    b3 <- round(b3, precision)
    return (paste0("y = e^(", b0, " + ", b1, "x + ", b2, "x^2 + ", b3, "x^3)"))
}

First Order

ecdf_regression1 <- function(ecdf_fn) {
    estimated <- data.frame(
        dec = seq(from = 0.01, to = 0.99, by = 0.01)
    )

    estimated$quants <- quantile(ecdf_fn, estimated$dec)
    estimated$proportions <- ecdf_fn(estimated$quants)

    lm <- lm(log(proportions) ~ quants, data = estimated)
    estimated$predicted <- exp(predict(lm, data = estimated$quants))
    
    ##plot(lm)

    a <- exp(lm$coefficients[[1]])
    b <- lm$coefficients[[2]]

    return (list(lm = lm,
                 estimated = estimated,
                 a = a,
                 b = b))
}
ecdf_regression1_plot <- function(ecdf_fn, estimated, a, b) {
    high_thresh <- .4
    low_thresh <- .1
    
    regression_fn <- function(x) {
        a <- a
        b <- b
        return (a*exp(b*x))
    }

    plot(ecdf_fn)
    points(y = estimated$proportions, x = estimated$quants, col = "blue")
    points(y = estimated$predicted, x = estimated$quants, col = "red")
    curve(regression_fn, add = TRUE, col = "red")
    x_low <- round(log(low_thresh/a)/b,2)
    segments(0, low_thresh, x1 = x_low, y1 = low_thresh) ## Low Fire Danger horiz
    text(x = .3, y = low_thresh + 0.02, label = "Low Fire Danger")
    segments(x_low, 0, x1 = x_low, y1 = low_thresh) ## Low Fire Danger vert
    text(x = x_low, y = -.02, label = x_low)
    x_high <- round(log(high_thresh/a)/b,2)
    segments(0, high_thresh, x1 = x_high, y1 = high_thresh) ## High Fire Danger horiz
    text(x = .3, y = high_thresh + 0.02, label = "High Fire Danger")
    segments(x_high, 0, x1 = x_high, y1 = high_thresh) ## High Fire Danger vert
    text(x = x_high, y = -.02, label = x_high)
}

CWD Forest

ecdf_fn <- ecdf(ignitions_forest$D)
summary(ecdf_fn)
## Empirical CDF:     151 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.7487  0.8914  0.8201  0.9779  1.0000
regression_list <- ecdf_regression1(ecdf_fn)
estimated <- regression_list$estimated
lm <- regression_list$lm
r2 <- summary(lm)$adj.r.squared
a <- regression_list$a
b <- regression_list$b

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ quants, data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42422 -0.06176 -0.02902  0.05981  0.26027 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -4.6532     0.0440 -105.75   <2e-16 ***
## quants        4.4557     0.0517   86.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1022 on 97 degrees of freedom
## Multiple R-squared:  0.9871, Adjusted R-squared:  0.987 
## F-statistic:  7428 on 1 and 97 DF,  p-value: < 2.2e-16
ecdf_regression1_plot(ecdf_fn, estimated, a, b)

regression_formulas <- regression_formulas %>%
    add_row(var = "D", order = "1", cover = "forest", formula = regression1_formula(a, b), r2 = r2)

Estimated function predicting proportion of historic fires burning in forests at or below given percentile of climatic water deficit on day of ignition:

\(\begin{align*}f(x) = 0.0095308 e^{4.4556479 x}\end{align*}\)

VPD Forest

ecdf_fn <- ecdf(ignitions_forest$VPD)
summary(ecdf_fn)
## Empirical CDF:     152 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01662 0.74359 0.90232 0.81565 0.97279 1.00000
regression_list <- ecdf_regression1(ecdf_fn)
estimated <- regression_list$estimated
lm <- regression_list$lm
r2 <- summary(lm)$adj.r.squared
a <- regression_list$a
b <- regression_list$b

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ quants, data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47371 -0.07345 -0.02498  0.06836  0.26426 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.55612    0.04441 -102.58   <2e-16 ***
## quants       4.36876    0.05247   83.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1057 on 97 degrees of freedom
## Multiple R-squared:  0.9862, Adjusted R-squared:  0.9861 
## F-statistic:  6932 on 1 and 97 DF,  p-value: < 2.2e-16
ecdf_regression1_plot(ecdf_fn, estimated, a, b)

regression_formulas <- regression_formulas %>%
    add_row(var = "VPD", order = "1", cover = "forest", formula = regression1_formula(a, b), r2 = r2)

Estimated function predicting proportion of historic fires burning in forests at or below given percentile of vapor pressure deficit on day of ignition:

\(\begin{align*}f(x) = 0.0105027 e^{4.3687578 x}\end{align*}\)

CWD Non-Forest

ecdf_fn <- ecdf(ignitions_nonforest$D)
summary(ecdf_fn)
## Empirical CDF:     86 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.7748  0.9015  0.8195  0.9772  1.0000
regression_list <- ecdf_regression1(ecdf_fn)
estimated <- regression_list$estimated
lm <- regression_list$lm
r2 <- summary(lm)$adj.r.squared
a <- regression_list$a
b <- regression_list$b

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ quants, data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40603 -0.08401 -0.03195  0.10096  0.28000 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.42899    0.05216  -84.91   <2e-16 ***
## quants       4.19292    0.06117   68.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1281 on 97 degrees of freedom
## Multiple R-squared:  0.9798, Adjusted R-squared:  0.9796 
## F-statistic:  4699 on 1 and 97 DF,  p-value: < 2.2e-16
ecdf_regression1_plot(ecdf_fn, estimated, a, b)

regression_formulas <- regression_formulas %>%
    add_row(var = "D", order = "1", cover = "nonforest", formula = regression1_formula(a, b), r2 = r2)

Estimated function predicting proportion of historic fires burning in nonforests at or below given percentile of climatic water deficit on day of ignition:

\(\begin{align*}f(x) = 0.0119265 e^{4.192916 x}\end{align*}\)

VPD Non-Forest

ecdf_fn <- ecdf(ignitions_nonforest$VPD)
summary(ecdf_fn)
## Empirical CDF:     84 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04752 0.70940 0.88828 0.80706 0.98530 1.00000
regression_list <- ecdf_regression1(ecdf_fn)
estimated <- regression_list$estimated
lm <- regression_list$lm
r2 <- summary(lm)$adj.r.squared
a <- regression_list$a
b <- regression_list$b

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ quants, data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75777 -0.09800 -0.01799  0.10066  0.34878 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.28308    0.06210  -68.97   <2e-16 ***
## quants       4.05960    0.07342   55.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.158 on 97 degrees of freedom
## Multiple R-squared:  0.9692, Adjusted R-squared:  0.9689 
## F-statistic:  3057 on 1 and 97 DF,  p-value: < 2.2e-16
ecdf_regression1_plot(ecdf_fn, estimated, a, b)

regression_formulas <- regression_formulas %>%
    add_row(var = "VPD", order = "1", cover = "nonforest", formula = regression1_formula(a, b), r2 = r2)

Estimated function predicting proportion of historic fires burning in nonforests at or below given percentile of vapor pressure deficit on day of ignition:

\(\begin{align*}f(x) = 0.0138001 e^{4.0595998 x}\end{align*}\)

Second Order

Fits to ECDF curves may be improved by including a quadtratic term in regression

CWD Forest

ecdf_fn <- ecdf(ignitions_forest$D)
summary(ecdf_fn)
## Empirical CDF:     151 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.7487  0.8914  0.8201  0.9779  1.0000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 2, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 2, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54728 -0.04542 -0.02587  0.04746  0.27498 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -4.4075     0.0956 -46.104  < 2e-16 ***
## poly(quants, 2, raw = TRUE)1   3.6140     0.2976  12.142  < 2e-16 ***
## poly(quants, 2, raw = TRUE)2   0.6224     0.2170   2.868  0.00507 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09859 on 96 degrees of freedom
## Multiple R-squared:  0.9881, Adjusted R-squared:  0.9879 
## F-statistic:  3995 on 2 and 96 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "D", order = "2", cover = "forest", formula = regression2_formula(b0, b1, b2), r2 = r2)

Estimated function predicting proportion of historic fires burning in forests at or below given percentile of CWD on day of ignition:

\(\begin{align*}f(x) = e^{-4.4075261 + 3.6139803x + 0.6223984x^2}\end{align*}\)

VPD Forest

ecdf_fn <- ecdf(ignitions_forest$VPD)
summary(ecdf_fn)
## Empirical CDF:     152 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01662 0.74359 0.90232 0.81565 0.97279 1.00000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 2, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 2, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47226 -0.07360 -0.02422  0.06860  0.26431 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -4.558896   0.105344 -43.276   <2e-16 ***
## poly(quants, 2, raw = TRUE)1  4.378196   0.328886  13.312   <2e-16 ***
## poly(quants, 2, raw = TRUE)2 -0.006955   0.239211  -0.029    0.977    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1063 on 96 degrees of freedom
## Multiple R-squared:  0.9862, Adjusted R-squared:  0.9859 
## F-statistic:  3430 on 2 and 96 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "VPD", order = "2", cover = "forest", formula = regression2_formula(b0, b1, b2), r2 = r2)

Estimated function predicting proportion of historic fires burning in forests at or below given percentile of VPD on day of ignition:

\(\begin{align*}f(x) = e^{-4.5588956 + 4.3781964x + -0.006955x^2}\end{align*}\)

CWD Non-forest

ecdf_fn <- ecdf(ignitions_nonforest$D)
summary(ecdf_fn)
## Empirical CDF:     86 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.7748  0.9015  0.8195  0.9772  1.0000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 2, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 2, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62331 -0.06699 -0.02555  0.06489  0.26364 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -4.11131    0.09317 -44.125  < 2e-16 ***
## poly(quants, 2, raw = TRUE)1  3.00403    0.30300   9.914 2.27e-16 ***
## poly(quants, 2, raw = TRUE)2  0.91417    0.22884   3.995 0.000127 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1192 on 96 degrees of freedom
## Multiple R-squared:  0.9827, Adjusted R-squared:  0.9823 
## F-statistic:  2720 on 2 and 96 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "D", order = "2", cover = "nonforest", formula = regression2_formula(b0, b1, b2), r2 = r2)

Estimated function predicting proportion of historic fires burning in nonforests at or below given percentile of CWD on day of ignition:

\(\begin{align*}f(x) = e^{-4.1113085 + 3.004031x + 0.9141718x^2}\end{align*}\)

VPD Non-forest

ecdf_fn <- ecdf(ignitions_nonforest$VPD)
summary(ecdf_fn)
## Empirical CDF:     84 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04752 0.70940 0.88828 0.80706 0.98530 1.00000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 2, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 2, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59232 -0.09299 -0.01700  0.11712  0.32551 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -4.5826     0.1413 -32.424   <2e-16 ***
## poly(quants, 2, raw = TRUE)1   5.0988     0.4486  11.367   <2e-16 ***
## poly(quants, 2, raw = TRUE)2  -0.7691     0.3277  -2.347    0.021 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1544 on 96 degrees of freedom
## Multiple R-squared:  0.9709, Adjusted R-squared:  0.9703 
## F-statistic:  1602 on 2 and 96 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "VPD", order = "2", cover = "nonforest", formula = regression2_formula(b0, b1, b2), r2 = r2)

Estimated function predicting proportion of historic fires burning in nonforests at or below given percentile of VPD on day of ignition:

\(\begin{align*}f(x) = e^{-4.5826099 + 5.0987721x + -0.7690767x^2}\end{align*}\)

Third Order

CWD Forest

ecdf_fn <- ecdf(ignitions_forest$D)
summary(ecdf_fn)
## Empirical CDF:     151 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.7487  0.8914  0.8201  0.9779  1.0000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 3, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 3, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.142013 -0.036181  0.000955  0.041161  0.145872 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -5.9987     0.1103  -54.37   <2e-16 ***
## poly(quants, 3, raw = TRUE)1  13.2616     0.6171   21.49   <2e-16 ***
## poly(quants, 3, raw = TRUE)2 -15.8955     1.0290  -15.45   <2e-16 ***
## poly(quants, 3, raw = TRUE)3   8.5349     0.5285   16.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05121 on 95 degrees of freedom
## Multiple R-squared:  0.9968, Adjusted R-squared:  0.9967 
## F-statistic:  9957 on 3 and 95 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]
b3 <- lm$coefficients[[4]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2) + b3*(x^3)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "D", order = "3", cover = "forest", formula = regression3_formula(b0, b1, b2, b3), r2 = r2)

Estimated function predicting proportion of historic fires burning in forests at or below given percentile of CWD on day of ignition:

\(\begin{align*}f(x) = e^{-5.998747 + 13.2616296x + -15.8955453x^2 + 8.5349086x^3}\end{align*}\)

VPD Forest

ecdf_fn <- ecdf(ignitions_forest$VPD)
summary(ecdf_fn)
## Empirical CDF:     152 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01662 0.74359 0.90232 0.81565 0.97279 1.00000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 3, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 3, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.192425 -0.022484 -0.003376  0.026962  0.088180 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -6.28608    0.09062  -69.37   <2e-16 ***
## poly(quants, 3, raw = TRUE)1  14.82632    0.49997   29.66   <2e-16 ***
## poly(quants, 3, raw = TRUE)2 -18.03981    0.83648  -21.57   <2e-16 ***
## poly(quants, 3, raw = TRUE)3   9.40315    0.43315   21.71   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04375 on 95 degrees of freedom
## Multiple R-squared:  0.9977, Adjusted R-squared:  0.9976 
## F-statistic: 1.365e+04 on 3 and 95 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]
b3 <- lm$coefficients[[4]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2) + b3*(x^3)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "VPD", order = "3", cover = "forest", formula = regression3_formula(b0, b1, b2, b3), r2 = r2)

Estimated function predicting proportion of historic fires burning in forests at or below given percentile of VPD on day of ignition:

\(\begin{align*}f(x) = e^{-6.2860803 + 14.8263175x + -18.0398113x^2 + 9.4031475x^3}\end{align*}\)

CWD Non-forest

ecdf_fn <- ecdf(ignitions_nonforest$D)
summary(ecdf_fn)
## Empirical CDF:     86 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.7748  0.9015  0.8195  0.9772  1.0000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 3, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 3, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.154762 -0.029127 -0.000977  0.033943  0.215099 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -5.1959     0.0698  -74.44   <2e-16 ***
## poly(quants, 3, raw = TRUE)1  10.9446     0.4305   25.42   <2e-16 ***
## poly(quants, 3, raw = TRUE)2 -13.9862     0.7731  -18.09   <2e-16 ***
## poly(quants, 3, raw = TRUE)3   8.1364     0.4184   19.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05369 on 95 degrees of freedom
## Multiple R-squared:  0.9965, Adjusted R-squared:  0.9964 
## F-statistic:  9062 on 3 and 95 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]
b3 <- lm$coefficients[[4]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2) + b3*(x^3)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "D", order = "3", cover = "nonforest", formula = regression3_formula(b0, b1, b2, b3), r2 = r2)

Estimated function predicting proportion of historic fires burning in nonforests at or below given percentile of CWD on day of ignition:

\(\begin{align*}f(x) = e^{-5.1959413 + 10.9446545x + -13.9861859x^2 + 8.1364017x^3}\end{align*}\)

VPD Non-forest

ecdf_fn <- ecdf(ignitions_nonforest$VPD)
summary(ecdf_fn)
## Empirical CDF:     84 unique values with summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.04752 0.70940 0.88828 0.80706 0.98530 1.00000
estimated <- data.frame(
    dec = seq(from = 0.01, to = 0.99, by = 0.01)
)

estimated$quants <- quantile(ecdf_fn, estimated$dec)
estimated$proportions <- ecdf_fn(estimated$quants)

lm <- lm(log(proportions) ~ poly(quants, 3, raw = TRUE), data = estimated)
r2 <- summary(lm)$adj.r.squared
estimated$predicted <- exp(predict(lm, data = estimated$quants))

##plot(lm)

summary(lm)
## 
## Call:
## lm(formula = log(proportions) ~ poly(quants, 3, raw = TRUE), 
##     data = estimated)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41643 -0.06774 -0.00257  0.06069  0.39026 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   -6.1516     0.1705  -36.08   <2e-16 ***
## poly(quants, 3, raw = TRUE)1  15.1109     0.9554   15.82   <2e-16 ***
## poly(quants, 3, raw = TRUE)2 -18.6871     1.6387  -11.40   <2e-16 ***
## poly(quants, 3, raw = TRUE)3   9.5663     0.8671   11.03   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1028 on 95 degrees of freedom
## Multiple R-squared:  0.9873, Adjusted R-squared:  0.9868 
## F-statistic:  2452 on 3 and 95 DF,  p-value: < 2.2e-16
b0 <- lm$coefficients[[1]]
b1 <- lm$coefficients[[2]]
b2 <- lm$coefficients[[3]]
b3 <- lm$coefficients[[4]]

plot(ecdf_fn)
points(y = estimated$proportions, x = estimated$quants, col = "blue")
points(y = estimated$predicted, x = estimated$quants, col = "red")
curve(exp(b0 + b1*x + b2*(x^2) + b3*(x^3)), add = TRUE, col = "red")

##curve(exp(predict(lm, newdata = data.frame(quants = x))), add = TRUE, col = "red")

regression_formulas <- regression_formulas %>%
    add_row(var = "VPD", order = "3", cover = "nonforest", formula = regression3_formula(b0, b1, b2, b3), r2 = r2)

Estimated function predicting proportion of historic fires burning in nonforests at or below given percentile of VPD on day of ignition:

\(\begin{align*}f(x) = e^{-6.151629 + 15.1109429x + -18.6870738x^2 + 9.566279x^3}\end{align*}\)

Estimated Regression Formulas

cover var order formula r2
forest D 1 y = 0.0095308e^(4.4556479x) 0.9869762
forest VPD 1 y = 0.0105027e^(4.3687578x) 0.9860576
nonforest D 1 y = 0.0119265e^(4.192916x) 0.9795658
nonforest VPD 1 y = 0.0138001e^(4.0595998x) 0.9689301
forest D 2 y = e^(-4.4075261 + 3.6139803x + 0.6223984x^2) 0.9878793
forest VPD 2 y = e^(-4.5588956 + 4.3781964x + -0.006955x^2) 0.9859124
nonforest D 2 y = e^(-4.1113085 + 3.004031x + 0.9141718x^2) 0.9822960
nonforest VPD 2 y = e^(-4.5826099 + 5.0987721x + -0.7690767x^2) 0.9703098
forest D 3 y = e^(-5.998747 + 13.2616296x + -15.8955453x^2 + 8.5349086x^3) 0.9967297
forest VPD 3 y = e^(-6.2860803 + 14.8263175x + -18.0398113x^2 + 9.4031475x^3) 0.9976118
nonforest D 3 y = e^(-5.1959413 + 10.9446545x + -13.9861859x^2 + 8.1364017x^3) 0.9964079
nonforest VPD 3 y = e^(-6.151629 + 15.1109429x + -18.6870738x^2 + 9.566279x^3) 0.9868478