Processing the 4FGL Catalog

Download Raw/Processed R Data and CSV Files in This Script

Data Source

“FGL4.rData” are downloaded from Fermi LAT data products. “FGL4_tidy_for_FGL3.rData” are processed 4FGL data catalog. (https://fermi.gsfc.nasa.gov/ssc/data/access/)

Step 0a: Processing 4FGL catalog

library(fermicatsR)
library(dplyr)
library(tidyverse)
library(devtools)
library(data.table)

load("fermicatsR/data/FGL4.rData")

sedflux <- function(photon_flux, alpha, Elo, Ehi) { 
        R <- Elo/Ehi
        GeV2erg <- 0.00160217657
        (GeV2erg*Elo)*(alpha-1)*(photon_flux)*(R^(alpha/2 - 1))/(1-(R^(alpha-1)))
}

# Subselect small number of variables from those available in 4FGL
FGL4_01 <- select(FGL4,
                  Source_Name, RAJ2000, DEJ2000, GLON, GLAT, SpectrumType,
                  PL_Index, LP_Index, PLEC_Index, #Spectral_Index, 
                  Variability_Index, Frac_Variability,
                  Conf_68_SemiMajor, Conf_68_SemiMinor, Conf_68_PosAng, 
                  Conf_95_SemiMajor, Conf_95_SemiMinor, Conf_95_PosAng,
                  Signif_Avg,   Pivot_Energy, 
                  PL_Flux_Density,     LP_Flux_Density,     PLEC_Flux_Density, #Flux_Density, 
                  Unc_PL_Flux_Density, Unc_LP_Flux_Density, Unc_PLEC_Flux_Density, #Unc_Flux_Density, 
                  LP_beta, PLEC_Expfactor, PLEC_Exp_Index,
                  LP_SigCurv,  PLEC_SigCurv,  #Signif_Curve, 
                  Signif_Avg,  Flux_Peak, Signif_Peak,
                  Npred,
                  Flux1000, Unc_Flux1000, 
                  Energy_Flux100, Unc_Energy_Flux100,
                  Flux_Band.2, #Flux100_300, 
                  Flux_Band.3, #Flux300_1000, 
                  Flux_Band.4, #Flux1000_3000, 
                  Flux_Band.5, #Flux3000_10000, 
                  Flux_Band.6, # 10 to 30GeV;   #Flux10000_100000, 
                  Flux_Band.7, # 30 to 300 GeV
                  ASSOC_FGL, CLASS1, CLASS2, ASSOC1, ASSOC2, Flags) # CLASS2 = agn/unk

# Add new variables (SED points and hardness ratios) computed using sedflux function above
FGL4_02 <- mutate(FGL4_01,
                  SED100_300 = sedflux(Flux_Band.2, PL_Index, 0.1, 0.3),
                  SED300_1000 = sedflux(Flux_Band.3, PL_Index, 0.3, 1.0), 
                  SED1000_3000 = sedflux(Flux_Band.4, PL_Index, 1.0, 3.0), 
                  SED3000_10000 = sedflux(Flux_Band.5, PL_Index, 3.0, 10.0), 
                  SED10000_30000 = sedflux(Flux_Band.6, PL_Index, 10.0, 30.0), 
                  SED30000_300000 = sedflux(Flux_Band.7, PL_Index, 30.0, 300.0), 
 
                  hr12 = (SED300_1000-SED100_300)/(SED300_1000+SED100_300),
                  hr23 = (SED1000_3000-SED300_1000)/(SED1000_3000+SED300_1000),
                  hr34 = (SED3000_10000-SED1000_3000)/(SED3000_10000+SED1000_3000),
                  hr45 = (SED10000_30000-SED3000_10000)/(SED10000_30000+SED3000_10000), 
                  hr56 = (SED30000_300000-SED10000_30000)/(SED30000_300000+SED10000_30000), 
                  CLASS1 = gsub(" ", "", CLASS1),
                  CLASS2 = gsub(" ", "", CLASS2))

# Get the most correlated variable with respect to each variable
FGL4_02_numeric <- FGL4_02 %>% 
  select_if(is.numeric) %>%  # select numeric columns
  drop_na()   # 5065 -> 4989

FGL4_02_cor <- cor(FGL4_02_numeric)
setDT(melt(FGL4_02_cor))[Var1 != Var2, .SD[which.max(value)], keyby=Var1]
##                      Var1                  Var2      value
##  1:               RAJ2000                  GLAT 0.17889792
##  2:               DEJ2000                  GLAT 0.51099476
##  3:                  GLON               LP_beta 0.04110554
##  4:                  GLAT               DEJ2000 0.51099476
##  5:              PL_Index              LP_Index 0.93639884
##  6:              LP_Index              PL_Index 0.93639884
##  7:            PLEC_Index              LP_Index 0.69388532
##  8:     Variability_Index             Flux_Peak 0.87694671
##  9:      Frac_Variability              PL_Index 0.24547949
## 10:     Conf_68_SemiMajor     Conf_95_SemiMajor 1.00000000
## 11:     Conf_68_SemiMinor     Conf_95_SemiMinor 1.00000000
## 12:        Conf_68_PosAng        Conf_95_PosAng 1.00000000
## 13:     Conf_95_SemiMajor     Conf_68_SemiMajor 1.00000000
## 14:     Conf_95_SemiMinor     Conf_68_SemiMinor 1.00000000
## 15:        Conf_95_PosAng        Conf_68_PosAng 1.00000000
## 16:            Signif_Avg           Signif_Peak 0.93286733
## 17:          Pivot_Energy                  hr34 0.57296206
## 18:       PL_Flux_Density       LP_Flux_Density 0.99940260
## 19:       LP_Flux_Density     PLEC_Flux_Density 0.99995431
## 20:     PLEC_Flux_Density       LP_Flux_Density 0.99995431
## 21:   Unc_PL_Flux_Density   Unc_LP_Flux_Density 0.99382456
## 22:   Unc_LP_Flux_Density Unc_PLEC_Flux_Density 0.99441611
## 23: Unc_PLEC_Flux_Density   Unc_LP_Flux_Density 0.99441611
## 24:               LP_beta        PLEC_Expfactor 0.79394103
## 25:        PLEC_Expfactor               LP_beta 0.79394103
## 26:        PLEC_Exp_Index     Conf_68_SemiMinor 0.02950397
## 27:            LP_SigCurv          PLEC_SigCurv 0.99287168
## 28:          PLEC_SigCurv            LP_SigCurv 0.99287168
## 29:             Flux_Peak           Flux_Band.2 0.94852689
## 30:           Signif_Peak            Signif_Avg 0.93286733
## 31:                 Npred        Energy_Flux100 0.94319580
## 32:              Flux1000          SED1000_3000 0.99758752
## 33:          Unc_Flux1000    Unc_Energy_Flux100 0.90394137
## 34:        Energy_Flux100              Flux1000 0.98784449
## 35:    Unc_Energy_Flux100          Unc_Flux1000 0.90394137
## 36:           Flux_Band.2            SED100_300 0.99991575
## 37:           Flux_Band.3           SED300_1000 0.99990862
## 38:           Flux_Band.4          SED1000_3000 0.99991734
## 39:           Flux_Band.5         SED3000_10000 0.99965009
## 40:           Flux_Band.6        SED10000_30000 0.99971065
## 41:           Flux_Band.7       SED30000_300000 0.99937079
## 42:                 Flags               LP_beta 0.25938862
## 43:            SED100_300           Flux_Band.2 0.99991575
## 44:           SED300_1000           Flux_Band.3 0.99990862
## 45:          SED1000_3000           Flux_Band.4 0.99991734
## 46:         SED3000_10000           Flux_Band.5 0.99965009
## 47:        SED10000_30000           Flux_Band.6 0.99971065
## 48:       SED30000_300000           Flux_Band.7 0.99937079
## 49:                  hr12          Pivot_Energy 0.30559523
## 50:                  hr23          Pivot_Energy 0.56217753
## 51:                  hr34          Pivot_Energy 0.57296206
## 52:                  hr45          Pivot_Energy 0.46957400
## 53:                  hr56          Pivot_Energy 0.22448756
##                      Var1                  Var2      value
# Drop highly correlated and some unused variables
FGL4_03 <- select(FGL4_02,
                  -Flux1000, -Unc_Flux1000, -Energy_Flux100, 
                  -Conf_95_SemiMajor, -Conf_95_SemiMinor, 
                  -Conf_68_SemiMajor, -Conf_68_SemiMinor, -Conf_68_PosAng, 
                  
                  -SED100_300, -SED300_1000, -SED1000_3000, 
                  -SED3000_10000, -SED10000_30000, -SED30000_300000,
                  -Flux_Band.2, -Flux_Band.3, -Flux_Band.4, -Flux_Band.5,-Flux_Band.6, -Flux_Band.7
                  )

# Take log of variables with highly skewed distributions (removing zeroes to avoid -Inf) 
FGL4_04 <- filter(FGL4_03, FGL4_03$LP_SigCurv != 0 & FGL4_03$PLEC_SigCurv != 0) %>%   # 5065->5010
  mutate(Variability_Index = log(Variability_Index), Pivot_Energy = log(Pivot_Energy),
         PL_Flux_Density = log(PL_Flux_Density),      
         LP_Flux_Density = log(LP_Flux_Density),     
         PLEC_Flux_Density = log(PLEC_Flux_Density),      
         Unc_PL_Flux_Density = log(Unc_PL_Flux_Density), 
         Unc_LP_Flux_Density = log(Unc_LP_Flux_Density), 
         Unc_PLEC_Flux_Density = log(Unc_PLEC_Flux_Density), 
         Unc_Energy_Flux100 = log(Unc_Energy_Flux100), 
         LP_SigCurv = log(LP_SigCurv), 
         PLEC_SigCurv = log(PLEC_SigCurv), 
  )
# Drop a few more unused variables
FGL4_05 <- select(FGL4_04, #-Unc_PL_Flux_Density ,
                  -Unc_LP_Flux_Density, -Unc_PLEC_Flux_Density
                  )

# Create a tidy data set including "pulsarness" and "agnness" factors

FGL4_tidy <- select(FGL4_05, -RAJ2000, -DEJ2000, -GLON, -Flags, -Signif_Peak, -Flux_Peak) %>% 
  mutate(Signif = Signif_Avg) %>% #  , 
  mutate(agnness = factor(CLASS1 == "BCU" | CLASS1 == "bcu" 
                          |CLASS1 == "BLL" | CLASS1 == "bll"
                          |CLASS1 == "FSRQ"| CLASS1 == "fsrq" 
                          |CLASS1 == "rdg" | CLASS1 == "RDG" 
                          |CLASS1 == "nlsy1" | CLASS1 == "NLSY1" 
                          |CLASS1 == "agn" | CLASS1 == "ssrq" |CLASS1 == "sey" 
                          |CLASS2 == "agn" | CLASS2 == "sey",
                          labels = c("Non-AGN", "AGN")), 
         pulsarness = factor(CLASS1 == "PSR" | CLASS1 == "psr" | CLASS2 == "psr",
                          labels = c("Non-Pulsar", "Pulsar")))

FGL4_tidy<- select(FGL4_tidy, -Signif_Avg) 

save(FGL4_tidy, file = "FGL4_tidy.rData", compress = "xz")

FGL4_04_numeric <- FGL4_tidy %>% 
  select_if(is.numeric) %>%  # select numeric columns
  drop_na()   # 5010 -> 4935

FGL4_04_cor <- cor(FGL4_04_numeric)
setDT(melt(FGL4_04_cor))[Var1 != Var2, .SD[which.max(value)], keyby=Var1]
##                    Var1              Var2      value
##  1:                GLAT              hr23 0.02128922
##  2:            PL_Index          LP_Index 0.86965001
##  3:            LP_Index          PL_Index 0.86965001
##  4:          PLEC_Index          LP_Index 0.45570188
##  5:   Variability_Index  Frac_Variability 0.77286964
##  6:    Frac_Variability Variability_Index 0.77286964
##  7:      Conf_95_PosAng            Signif 0.02625141
##  8:        Pivot_Energy              hr34 0.59518921
##  9:     PL_Flux_Density PLEC_Flux_Density 0.99675632
## 10:     LP_Flux_Density PLEC_Flux_Density 0.99968233
## 11:   PLEC_Flux_Density   LP_Flux_Density 0.99968233
## 12: Unc_PL_Flux_Density PLEC_Flux_Density 0.96799303
## 13:             LP_beta    PLEC_Expfactor 0.79090037
## 14:      PLEC_Expfactor           LP_beta 0.79090037
## 15:      PLEC_Exp_Index    Conf_95_PosAng 0.01459451
## 16:          LP_SigCurv      PLEC_SigCurv 0.84386346
## 17:        PLEC_SigCurv        LP_SigCurv 0.84386346
## 18:               Npred            Signif 0.81226463
## 19:  Unc_Energy_Flux100 PLEC_Flux_Density 0.47436561
## 20:                hr12           LP_beta 0.18122101
## 21:                hr23      Pivot_Energy 0.48204925
## 22:                hr34      Pivot_Energy 0.59518921
## 23:                hr45      Pivot_Energy 0.45291472
## 24:                hr56      Pivot_Energy 0.19825096
## 25:              Signif             Npred 0.81226463
##                    Var1              Var2      value
FGL4_tidy_for_FGL3 <- select(FGL4_03, -Unc_LP_Flux_Density, -Unc_PLEC_Flux_Density, -Signif_Peak, -Flux_Peak) %>%
    mutate(agnness = factor(CLASS1 == "BCU" | CLASS1 == "bcu" 
                          |CLASS1 == "BLL" | CLASS1 == "bll"
                          |CLASS1 == "FSRQ"| CLASS1 == "fsrq" 
                          |CLASS1 == "rdg" | CLASS1 == "RDG" 
                          |CLASS1 == "nlsy1" | CLASS1 == "NLSY1" 
                          |CLASS1 == "agn" | CLASS1 == "ssrq" |CLASS1 == "sey" 
                          |CLASS2 == "agn" | CLASS2 == "sey",
                          labels = c("Non-AGN", "AGN")), 
         pulsarness = factor(CLASS1 == "PSR" | CLASS1 == "psr" | CLASS2 == "psr",
                          labels = c("Non-Pulsar", "Pulsar"))) %>%
  mutate(Source_Name = substr(Source_Name, 6, 18), 
         FGL3_Source_name = trimws(substr(ASSOC_FGL,7,18)),
         class = case_when(agnness == "AGN"~"AGN", pulsarness == "Pulsar"~"PSR", 
                           (agnness == "Non-AGN")&(pulsarness == "Non-Pulsar")~"Other"),
         class = factor(class, levels = c("AGN", "PSR", "Other"))
  )

save(FGL4_tidy_for_FGL3, file = "FGL4_tidy_for_FGL3.rData", compress = "xz")


# Create the FGL4_results data frame that will contain final results 
FGL4_results <- select(FGL4_05, Source_Name, ASSOC_FGL,
                       Signif = Signif_Avg,
                       #Flux = Flux_Density,
                       PL_Flux_Density,     
                       LP_Flux_Density,     
                       PLEC_Flux_Density, 
                       RA = RAJ2000,
                       DEC = DEJ2000,
                       GLON, GLAT, ASSOC1, CLASS1,
                       ASSOC2, CLASS2, Flags
                      ) %>%
  mutate(Source_Name = substr(Source_Name, 6, 18), FGL3_Source_name = trimws(substr(ASSOC_FGL,7,18)),
         RA = format(RA, digits = 2),
         DEC = format(DEC, digits = 2),
         GLON = format(GLON, digits = 2),
         GLAT = format(GLAT, digits = 2), Signif = round(Signif, digits = 3)) %>%
  mutate(PL_Flux_Density = format(exp(PL_Flux_Density), scientific = TRUE, digits = 3))%>%
  mutate(LP_Flux_Density = format(exp(LP_Flux_Density), scientific = TRUE, digits = 3))%>%
  mutate(PLEC_Flux_Density = format(exp(PLEC_Flux_Density), scientific = TRUE, digits = 3)) 

save(FGL4_results, file = "FGL4_results.rData", compress = "xz")

Step 0b: Processing 4FGL catalog and pulsar data sets for YNG vs MSP classification

load("fermicatsR/data/pulsars.rData")
pulsars_long <- mutate(pulsars, PSR_coords = substr(PSR, 6, 12)) 
FGL4_pulsars <- FGL4_tidy %>%
        filter (pulsarness == "Pulsar") %>% mutate(ASSOC1 = as.character(ASSOC1))
a <- strsplit(FGL4_pulsars$ASSOC1, "PSR J") 
b <- character(nrow(FGL4_pulsars))

for (i in 1:nrow(FGL4_pulsars)) { 
        b[i] <- gsub(" ", "", a[[i]][2])
}

FGL4_pulsars$ASSOC1_code <- b
FGL4_pulsars <- mutate(FGL4_pulsars, ASSOC1_code = substr(ASSOC1_code, 1, 7)) 
bothFGL4andpulsars <- inner_join(pulsars_long, FGL4_pulsars,
                                 by = c("PSR_coords" = "ASSOC1_code"))  # 230 obs

bothFGL4andpulsars <- bothFGL4andpulsars[ , !(names(bothFGL4andpulsars) %in% c("date_public"))]

FGL4_Pulsars_full <- mutate(bothFGL4andpulsars,
                       pulsarness = factor(P_ms >= 10, labels = c("MSP", "YNG"))) 
save(FGL4_Pulsars_full, file = "FGL4_Pulsars_full.rData", compress = "xz")

FGL4_Pulsars <- select (FGL4_Pulsars_full, -P_ms, -Codes, -RAJ_deg, -DECJ_deg, -Refs,
                               -CLASS1, -ASSOC1, -CLASS2, -ASSOC2,
                -Source_Name, -ASSOC_FGL, -PSR, -PSR_coords, -Edot, -agnness)

save(FGL4_Pulsars_full, file = "FGL4_Pulsars_full.rData", compress = "xz")

FGL4_Pulsars <- select (FGL4_Pulsars_full, -P_ms, -Codes, -RAJ_deg, -DECJ_deg, -Refs,
                               -CLASS1, -ASSOC1, -CLASS2, -ASSOC2,
                -Source_Name, -ASSOC_FGL, -PSR, -PSR_coords, -Edot, -agnness)
save(FGL4_Pulsars, file = "FGL4_Pulsars.rData", compress = "xz")
library(rmarkdown)
paged_table(FGL4_results, options = list(rows.print = 20))