Processing the 4FGL Catalog
sh
3/29/2020
Download Raw/Processed R Data and CSV Files in This Script
- ‘FGL4.rData’ Download here.
- ‘pulsars.rData’ Download here. (This and the above are included in the updated fermicatsR package).
- ‘FGL4_Pulsars_full.rData’ Download here.
- ‘FGL4_tidy.rData’ Download here.
- ‘FGL4_results.rData’ Download here.
- ‘FGL4_results.csv’ Download here.
Data Source
“FGL4.rData” are downloaded from Fermi LAT data products. “FGL4_tidy_for_FGL3.rData” are processed 4FGL data catalog. (https://fermi.gsfc.nasa.gov/ssc/data/access/)
Step 0a: Processing 4FGL catalog
library(fermicatsR)
library(dplyr)
library(tidyverse)
library(devtools)
library(data.table)
load("fermicatsR/data/FGL4.rData")
sedflux <- function(photon_flux, alpha, Elo, Ehi) {
R <- Elo/Ehi
GeV2erg <- 0.00160217657
(GeV2erg*Elo)*(alpha-1)*(photon_flux)*(R^(alpha/2 - 1))/(1-(R^(alpha-1)))
}
# Subselect small number of variables from those available in 4FGL
FGL4_01 <- select(FGL4,
Source_Name, RAJ2000, DEJ2000, GLON, GLAT, SpectrumType,
PL_Index, LP_Index, PLEC_Index, #Spectral_Index,
Variability_Index, Frac_Variability,
Conf_68_SemiMajor, Conf_68_SemiMinor, Conf_68_PosAng,
Conf_95_SemiMajor, Conf_95_SemiMinor, Conf_95_PosAng,
Signif_Avg, Pivot_Energy,
PL_Flux_Density, LP_Flux_Density, PLEC_Flux_Density, #Flux_Density,
Unc_PL_Flux_Density, Unc_LP_Flux_Density, Unc_PLEC_Flux_Density, #Unc_Flux_Density,
LP_beta, PLEC_Expfactor, PLEC_Exp_Index,
LP_SigCurv, PLEC_SigCurv, #Signif_Curve,
Signif_Avg, Flux_Peak, Signif_Peak,
Npred,
Flux1000, Unc_Flux1000,
Energy_Flux100, Unc_Energy_Flux100,
Flux_Band.2, #Flux100_300,
Flux_Band.3, #Flux300_1000,
Flux_Band.4, #Flux1000_3000,
Flux_Band.5, #Flux3000_10000,
Flux_Band.6, # 10 to 30GeV; #Flux10000_100000,
Flux_Band.7, # 30 to 300 GeV
ASSOC_FGL, CLASS1, CLASS2, ASSOC1, ASSOC2, Flags) # CLASS2 = agn/unk
# Add new variables (SED points and hardness ratios) computed using sedflux function above
FGL4_02 <- mutate(FGL4_01,
SED100_300 = sedflux(Flux_Band.2, PL_Index, 0.1, 0.3),
SED300_1000 = sedflux(Flux_Band.3, PL_Index, 0.3, 1.0),
SED1000_3000 = sedflux(Flux_Band.4, PL_Index, 1.0, 3.0),
SED3000_10000 = sedflux(Flux_Band.5, PL_Index, 3.0, 10.0),
SED10000_30000 = sedflux(Flux_Band.6, PL_Index, 10.0, 30.0),
SED30000_300000 = sedflux(Flux_Band.7, PL_Index, 30.0, 300.0),
hr12 = (SED300_1000-SED100_300)/(SED300_1000+SED100_300),
hr23 = (SED1000_3000-SED300_1000)/(SED1000_3000+SED300_1000),
hr34 = (SED3000_10000-SED1000_3000)/(SED3000_10000+SED1000_3000),
hr45 = (SED10000_30000-SED3000_10000)/(SED10000_30000+SED3000_10000),
hr56 = (SED30000_300000-SED10000_30000)/(SED30000_300000+SED10000_30000),
CLASS1 = gsub(" ", "", CLASS1),
CLASS2 = gsub(" ", "", CLASS2))
# Get the most correlated variable with respect to each variable
FGL4_02_numeric <- FGL4_02 %>%
select_if(is.numeric) %>% # select numeric columns
drop_na() # 5065 -> 4989
FGL4_02_cor <- cor(FGL4_02_numeric)
setDT(melt(FGL4_02_cor))[Var1 != Var2, .SD[which.max(value)], keyby=Var1]
## Var1 Var2 value
## 1: RAJ2000 GLAT 0.17889792
## 2: DEJ2000 GLAT 0.51099476
## 3: GLON LP_beta 0.04110554
## 4: GLAT DEJ2000 0.51099476
## 5: PL_Index LP_Index 0.93639884
## 6: LP_Index PL_Index 0.93639884
## 7: PLEC_Index LP_Index 0.69388532
## 8: Variability_Index Flux_Peak 0.87694671
## 9: Frac_Variability PL_Index 0.24547949
## 10: Conf_68_SemiMajor Conf_95_SemiMajor 1.00000000
## 11: Conf_68_SemiMinor Conf_95_SemiMinor 1.00000000
## 12: Conf_68_PosAng Conf_95_PosAng 1.00000000
## 13: Conf_95_SemiMajor Conf_68_SemiMajor 1.00000000
## 14: Conf_95_SemiMinor Conf_68_SemiMinor 1.00000000
## 15: Conf_95_PosAng Conf_68_PosAng 1.00000000
## 16: Signif_Avg Signif_Peak 0.93286733
## 17: Pivot_Energy hr34 0.57296206
## 18: PL_Flux_Density LP_Flux_Density 0.99940260
## 19: LP_Flux_Density PLEC_Flux_Density 0.99995431
## 20: PLEC_Flux_Density LP_Flux_Density 0.99995431
## 21: Unc_PL_Flux_Density Unc_LP_Flux_Density 0.99382456
## 22: Unc_LP_Flux_Density Unc_PLEC_Flux_Density 0.99441611
## 23: Unc_PLEC_Flux_Density Unc_LP_Flux_Density 0.99441611
## 24: LP_beta PLEC_Expfactor 0.79394103
## 25: PLEC_Expfactor LP_beta 0.79394103
## 26: PLEC_Exp_Index Conf_68_SemiMinor 0.02950397
## 27: LP_SigCurv PLEC_SigCurv 0.99287168
## 28: PLEC_SigCurv LP_SigCurv 0.99287168
## 29: Flux_Peak Flux_Band.2 0.94852689
## 30: Signif_Peak Signif_Avg 0.93286733
## 31: Npred Energy_Flux100 0.94319580
## 32: Flux1000 SED1000_3000 0.99758752
## 33: Unc_Flux1000 Unc_Energy_Flux100 0.90394137
## 34: Energy_Flux100 Flux1000 0.98784449
## 35: Unc_Energy_Flux100 Unc_Flux1000 0.90394137
## 36: Flux_Band.2 SED100_300 0.99991575
## 37: Flux_Band.3 SED300_1000 0.99990862
## 38: Flux_Band.4 SED1000_3000 0.99991734
## 39: Flux_Band.5 SED3000_10000 0.99965009
## 40: Flux_Band.6 SED10000_30000 0.99971065
## 41: Flux_Band.7 SED30000_300000 0.99937079
## 42: Flags LP_beta 0.25938862
## 43: SED100_300 Flux_Band.2 0.99991575
## 44: SED300_1000 Flux_Band.3 0.99990862
## 45: SED1000_3000 Flux_Band.4 0.99991734
## 46: SED3000_10000 Flux_Band.5 0.99965009
## 47: SED10000_30000 Flux_Band.6 0.99971065
## 48: SED30000_300000 Flux_Band.7 0.99937079
## 49: hr12 Pivot_Energy 0.30559523
## 50: hr23 Pivot_Energy 0.56217753
## 51: hr34 Pivot_Energy 0.57296206
## 52: hr45 Pivot_Energy 0.46957400
## 53: hr56 Pivot_Energy 0.22448756
## Var1 Var2 value
# Drop highly correlated and some unused variables
FGL4_03 <- select(FGL4_02,
-Flux1000, -Unc_Flux1000, -Energy_Flux100,
-Conf_95_SemiMajor, -Conf_95_SemiMinor,
-Conf_68_SemiMajor, -Conf_68_SemiMinor, -Conf_68_PosAng,
-SED100_300, -SED300_1000, -SED1000_3000,
-SED3000_10000, -SED10000_30000, -SED30000_300000,
-Flux_Band.2, -Flux_Band.3, -Flux_Band.4, -Flux_Band.5,-Flux_Band.6, -Flux_Band.7
)
# Take log of variables with highly skewed distributions (removing zeroes to avoid -Inf)
FGL4_04 <- filter(FGL4_03, FGL4_03$LP_SigCurv != 0 & FGL4_03$PLEC_SigCurv != 0) %>% # 5065->5010
mutate(Variability_Index = log(Variability_Index), Pivot_Energy = log(Pivot_Energy),
PL_Flux_Density = log(PL_Flux_Density),
LP_Flux_Density = log(LP_Flux_Density),
PLEC_Flux_Density = log(PLEC_Flux_Density),
Unc_PL_Flux_Density = log(Unc_PL_Flux_Density),
Unc_LP_Flux_Density = log(Unc_LP_Flux_Density),
Unc_PLEC_Flux_Density = log(Unc_PLEC_Flux_Density),
Unc_Energy_Flux100 = log(Unc_Energy_Flux100),
LP_SigCurv = log(LP_SigCurv),
PLEC_SigCurv = log(PLEC_SigCurv),
)
# Drop a few more unused variables
FGL4_05 <- select(FGL4_04, #-Unc_PL_Flux_Density ,
-Unc_LP_Flux_Density, -Unc_PLEC_Flux_Density
)
# Create a tidy data set including "pulsarness" and "agnness" factors
FGL4_tidy <- select(FGL4_05, -RAJ2000, -DEJ2000, -GLON, -Flags, -Signif_Peak, -Flux_Peak) %>%
mutate(Signif = Signif_Avg) %>% # ,
mutate(agnness = factor(CLASS1 == "BCU" | CLASS1 == "bcu"
|CLASS1 == "BLL" | CLASS1 == "bll"
|CLASS1 == "FSRQ"| CLASS1 == "fsrq"
|CLASS1 == "rdg" | CLASS1 == "RDG"
|CLASS1 == "nlsy1" | CLASS1 == "NLSY1"
|CLASS1 == "agn" | CLASS1 == "ssrq" |CLASS1 == "sey"
|CLASS2 == "agn" | CLASS2 == "sey",
labels = c("Non-AGN", "AGN")),
pulsarness = factor(CLASS1 == "PSR" | CLASS1 == "psr" | CLASS2 == "psr",
labels = c("Non-Pulsar", "Pulsar")))
FGL4_tidy<- select(FGL4_tidy, -Signif_Avg)
save(FGL4_tidy, file = "FGL4_tidy.rData", compress = "xz")
FGL4_04_numeric <- FGL4_tidy %>%
select_if(is.numeric) %>% # select numeric columns
drop_na() # 5010 -> 4935
FGL4_04_cor <- cor(FGL4_04_numeric)
setDT(melt(FGL4_04_cor))[Var1 != Var2, .SD[which.max(value)], keyby=Var1]
## Var1 Var2 value
## 1: GLAT hr23 0.02128922
## 2: PL_Index LP_Index 0.86965001
## 3: LP_Index PL_Index 0.86965001
## 4: PLEC_Index LP_Index 0.45570188
## 5: Variability_Index Frac_Variability 0.77286964
## 6: Frac_Variability Variability_Index 0.77286964
## 7: Conf_95_PosAng Signif 0.02625141
## 8: Pivot_Energy hr34 0.59518921
## 9: PL_Flux_Density PLEC_Flux_Density 0.99675632
## 10: LP_Flux_Density PLEC_Flux_Density 0.99968233
## 11: PLEC_Flux_Density LP_Flux_Density 0.99968233
## 12: Unc_PL_Flux_Density PLEC_Flux_Density 0.96799303
## 13: LP_beta PLEC_Expfactor 0.79090037
## 14: PLEC_Expfactor LP_beta 0.79090037
## 15: PLEC_Exp_Index Conf_95_PosAng 0.01459451
## 16: LP_SigCurv PLEC_SigCurv 0.84386346
## 17: PLEC_SigCurv LP_SigCurv 0.84386346
## 18: Npred Signif 0.81226463
## 19: Unc_Energy_Flux100 PLEC_Flux_Density 0.47436561
## 20: hr12 LP_beta 0.18122101
## 21: hr23 Pivot_Energy 0.48204925
## 22: hr34 Pivot_Energy 0.59518921
## 23: hr45 Pivot_Energy 0.45291472
## 24: hr56 Pivot_Energy 0.19825096
## 25: Signif Npred 0.81226463
## Var1 Var2 value
FGL4_tidy_for_FGL3 <- select(FGL4_03, -Unc_LP_Flux_Density, -Unc_PLEC_Flux_Density, -Signif_Peak, -Flux_Peak) %>%
mutate(agnness = factor(CLASS1 == "BCU" | CLASS1 == "bcu"
|CLASS1 == "BLL" | CLASS1 == "bll"
|CLASS1 == "FSRQ"| CLASS1 == "fsrq"
|CLASS1 == "rdg" | CLASS1 == "RDG"
|CLASS1 == "nlsy1" | CLASS1 == "NLSY1"
|CLASS1 == "agn" | CLASS1 == "ssrq" |CLASS1 == "sey"
|CLASS2 == "agn" | CLASS2 == "sey",
labels = c("Non-AGN", "AGN")),
pulsarness = factor(CLASS1 == "PSR" | CLASS1 == "psr" | CLASS2 == "psr",
labels = c("Non-Pulsar", "Pulsar"))) %>%
mutate(Source_Name = substr(Source_Name, 6, 18),
FGL3_Source_name = trimws(substr(ASSOC_FGL,7,18)),
class = case_when(agnness == "AGN"~"AGN", pulsarness == "Pulsar"~"PSR",
(agnness == "Non-AGN")&(pulsarness == "Non-Pulsar")~"Other"),
class = factor(class, levels = c("AGN", "PSR", "Other"))
)
save(FGL4_tidy_for_FGL3, file = "FGL4_tidy_for_FGL3.rData", compress = "xz")
# Create the FGL4_results data frame that will contain final results
FGL4_results <- select(FGL4_05, Source_Name, ASSOC_FGL,
Signif = Signif_Avg,
#Flux = Flux_Density,
PL_Flux_Density,
LP_Flux_Density,
PLEC_Flux_Density,
RA = RAJ2000,
DEC = DEJ2000,
GLON, GLAT, ASSOC1, CLASS1,
ASSOC2, CLASS2, Flags
) %>%
mutate(Source_Name = substr(Source_Name, 6, 18), FGL3_Source_name = trimws(substr(ASSOC_FGL,7,18)),
RA = format(RA, digits = 2),
DEC = format(DEC, digits = 2),
GLON = format(GLON, digits = 2),
GLAT = format(GLAT, digits = 2), Signif = round(Signif, digits = 3)) %>%
mutate(PL_Flux_Density = format(exp(PL_Flux_Density), scientific = TRUE, digits = 3))%>%
mutate(LP_Flux_Density = format(exp(LP_Flux_Density), scientific = TRUE, digits = 3))%>%
mutate(PLEC_Flux_Density = format(exp(PLEC_Flux_Density), scientific = TRUE, digits = 3))
save(FGL4_results, file = "FGL4_results.rData", compress = "xz")
Step 0b: Processing 4FGL catalog and pulsar data sets for YNG vs MSP classification
load("fermicatsR/data/pulsars.rData")
pulsars_long <- mutate(pulsars, PSR_coords = substr(PSR, 6, 12))
FGL4_pulsars <- FGL4_tidy %>%
filter (pulsarness == "Pulsar") %>% mutate(ASSOC1 = as.character(ASSOC1))
a <- strsplit(FGL4_pulsars$ASSOC1, "PSR J")
b <- character(nrow(FGL4_pulsars))
for (i in 1:nrow(FGL4_pulsars)) {
b[i] <- gsub(" ", "", a[[i]][2])
}
FGL4_pulsars$ASSOC1_code <- b
FGL4_pulsars <- mutate(FGL4_pulsars, ASSOC1_code = substr(ASSOC1_code, 1, 7))
bothFGL4andpulsars <- inner_join(pulsars_long, FGL4_pulsars,
by = c("PSR_coords" = "ASSOC1_code")) # 230 obs
bothFGL4andpulsars <- bothFGL4andpulsars[ , !(names(bothFGL4andpulsars) %in% c("date_public"))]
FGL4_Pulsars_full <- mutate(bothFGL4andpulsars,
pulsarness = factor(P_ms >= 10, labels = c("MSP", "YNG")))
save(FGL4_Pulsars_full, file = "FGL4_Pulsars_full.rData", compress = "xz")
FGL4_Pulsars <- select (FGL4_Pulsars_full, -P_ms, -Codes, -RAJ_deg, -DECJ_deg, -Refs,
-CLASS1, -ASSOC1, -CLASS2, -ASSOC2,
-Source_Name, -ASSOC_FGL, -PSR, -PSR_coords, -Edot, -agnness)
save(FGL4_Pulsars_full, file = "FGL4_Pulsars_full.rData", compress = "xz")
FGL4_Pulsars <- select (FGL4_Pulsars_full, -P_ms, -Codes, -RAJ_deg, -DECJ_deg, -Refs,
-CLASS1, -ASSOC1, -CLASS2, -ASSOC2,
-Source_Name, -ASSOC_FGL, -PSR, -PSR_coords, -Edot, -agnness)
save(FGL4_Pulsars, file = "FGL4_Pulsars.rData", compress = "xz")
library(rmarkdown)
paged_table(FGL4_results, options = list(rows.print = 20))