3FGL Source Classification vs. 4FGL Discovery
sh
Data Source
“FGL4.rData” are downloaded from Fermi LAT data products. “FGL4_tidy_for_FGL3.rData” are processed 4FGL data catalog. (https://fermi.gsfc.nasa.gov/ssc/data/access/ ). See the processing steps here.
“FGL3_results.rData” and related scripts are associated with the following publication:
“Classification and Ranking of Fermi LAT Gamma-ray Sources from the 3FGL Catalog Using Machine Learning Techniques”,
Saz Parkinson, P. M. (HKU/LSR, SCIPP), Xu, H. (HKU), Yu, P. L. H. (HKU), Salvetti, D. (INAF-Milan), Marelli, M. (INAF-Milan), and Falcone, A. D. (Penn State), The Astrophysical Journal, 2016 (http://arxiv.org/abs/1602.00385).
library(dplyr)
library(devtools)
library(ggplot2)
load("fermicatsR/data/FGL4.rData")
load("fermicatsR/FGL3_results.rdata")
load("fermicatsR/FGL4_tidy_for_FGL3.rData")
Check 3FGL classification of Pulsars vs. AGNs
Out of 3033 3FGL sources, 469 are missing in 4FGL for various reasons.
Important variables:
- Variability Index
- Curvature
- Hardneww Ratio [Ackerman et al. 2012]:
\(hr_{ij} = \frac{EnergyFlux_j - EnergyFlux_i} {EnergyFlux_j + EnergyFlux_i}\), where
\(EnergyFlux_{i}\) is log photon flux in energy bands. hr = -1 for very soft source (high \(EnergyFlux_j = 0\)), +1 for very hard source (low \(EnergyFlux_i = 0\))
In plot variabiltiy vs. curvature, pulsars (\(\bigtriangleup\)) in bottom-right corner, with curved spectrum and low variability. Misclassifications (red) are low variability sources
In plot variabiltiy vs. hr45, pulsars (\(\bigtriangleup\)) are in bottom-left corner, soft in hr45 (energy band 1~3GeV vs. 3~10GeV)
1. 3FGL Unassociated sources with significance > 10
FGL3_unassoc_signif_res <- FGL3_results %>%
filter(CLASS1 == "") %>%
filter(Signif > 10) %>% #>10: 162 obs
# filter(LR_Pred==RF_Pred) %>%
# filter(LR_Pred=="PSR") %>%
arrange(desc(Signif)) #149
FGL3_unassoc_res_FGL4 <- inner_join(FGL3_unassoc_signif_res, FGL4_tidy_for_FGL3,
by = c("Source_Name" = "FGL3_Source_name")) %>% #>10: 116/consistent prediction: 108
filter (CLASS1.y != "" | CLASS2 != "") #>10: 48
FGL3_unassoc_res_FGL4 <- FGL3_unassoc_res_FGL4[ -c(8,9) ] # "ASSOC1.x" "CLASS1.x"
FGL3_accuracy_pred <- mutate(FGL3_unassoc_res_FGL4,
accurate=(LR_Pred==class)&(LR_Pred==RF_Pred) ,
accurate_LR=(LR_Pred==class),
accurate_RF=(RF_Pred==class),
)
# rearrange the columns
ncol <- ncol(FGL3_accuracy_pred)
new_colseq <- c(ncol,ncol-1,ncol-2,ncol-3,ncol-13, ncol-14, ncol-15, 1:(ncol-16),(ncol-12):(ncol-4))
FGL3_accuracy_pred<- FGL3_accuracy_pred[new_colseq]
table(FGL3_accuracy_pred$class, FGL3_accuracy_pred$accurate)
##
## FALSE TRUE
## AGN 6 17
## PSR 2 19
## Other 8 0
table(FGL3_accuracy_pred$class,FGL3_accuracy_pred$accurate_LR)
##
## FALSE TRUE
## AGN 5 18
## PSR 2 19
## Other 8 0
table(FGL3_accuracy_pred$class,FGL3_accuracy_pred$accurate_RF)
##
## FALSE TRUE
## AGN 6 17
## PSR 1 20
## Other 8 0
# contingency table by subclass
table(FGL3_accuracy_pred$CLASS1.y, FGL3_accuracy_pred$accurate)
##
## FALSE TRUE
## 1 0
## bcu 5 11
## bll 1 5
## fsrq 0 1
## psr 1 0
## PSR 1 19
## snr 2 0
## spp 1 0
## unk 4 0
table(FGL3_accuracy_pred$CLASS2, FGL3_accuracy_pred$accurate)
##
## FALSE TRUE
## 15 36
## unk 1 0
# mis-classified pulsars or AGNs with significance >10
FGL3_accuracy_pred[FGL3_accuracy_pred$accurate==FALSE&FGL3_accuracy_pred$class!="Other",][,c(1,4,5,7:10)]
## # A tibble: 8 x 7
## accurate_RF class ASSOC1.y CLASS1.y Source_Name Signif.x Flux
## <lgl> <fct> <fct> <chr> <chr> <dbl> <chr>
## 1 TRUE PSR "PSR J1901-0125 … PSR 1901.5-0126 14.0 1.28e…
## 2 FALSE AGN "1RXS J112041.6+071335… bcu 1120.6+0713 13.2 8.78e…
## 3 FALSE AGN "2MASS J15034786-58005… bcu 1503.5-5801 12.8 2.75e…
## 4 FALSE PSR "PSR J1417-4402 … psr 1417.5-4402 12.6 1.24e…
## 5 FALSE AGN "1RXS J210346.8-111335… bcu 2103.7-1113 12.3 5.95e…
## 6 FALSE AGN "SSTSL2 J202336.19+412… bcu 2023.5+4126 11.6 2.78e…
## 7 FALSE AGN "1RXS J154439.4-112820… bcu 1544.6-1125 10.8 2.67e…
## 8 FALSE AGN "NVSS J112903+375655 … bll 1129.0+3758 10.3 3.00e…
Summary of 3FGL Unassociated sources vs. 4FGL Discovery, with significance > 10
library(rmarkdown)
paged_table(FGL3_accuracy_pred, options = list(rows.print = 16))
2. 3FGL Unassociated sources with significance > 4 (Whole Catalog)
FGL3_unassoc_results <- FGL3_results %>%
filter(CLASS1 == "") %>%
filter(Signif > 4) %>% #4: 1008 obs
# filter(LR_Pred==RF_Pred) %>% #4: 921 obs
# filter(LR_Pred=="PSR") %>%
arrange(desc(Signif))
FGL3_unassoc_res_FGL4_all <- inner_join(FGL3_unassoc_results, FGL4_tidy_for_FGL3,
by = c("Source_Name" = "FGL3_Source_name")) %>% #>4: 613/consistent prediction: 566
filter (CLASS1.y != "" | CLASS2 != "") #329/consistent prediction:315
FGL3_unassoc_res_FGL4_all <- FGL3_unassoc_res_FGL4_all[ -c(8,9) ]
FGL3_accuracy_pred_all <- mutate(FGL3_unassoc_res_FGL4_all,
accurate_LR=(LR_Pred==class),
accurate_RF=(RF_Pred==class),
accurate=(RF_Pred==LR_Pred)&(RF_Pred==class))
# rearrange columns
ncol <- ncol(FGL3_accuracy_pred_all)
new_colseq <- c(ncol,ncol-1,ncol-2,ncol-3,ncol-13, ncol-14, ncol-15, 1:(ncol-16),(ncol-12):(ncol-4))
FGL3_accuracy_pred_all<- FGL3_accuracy_pred_all[new_colseq]
table(FGL3_accuracy_pred_all$class,FGL3_accuracy_pred_all$accurate)
##
## FALSE TRUE
## AGN 22 233
## PSR 6 24
## Other 39 0
table(FGL3_accuracy_pred_all$class,FGL3_accuracy_pred_all$accurate_LR)
##
## FALSE TRUE
## AGN 18 237
## PSR 5 25
## Other 39 0
table(FGL3_accuracy_pred_all$class,FGL3_accuracy_pred_all$accurate_RF)
##
## FALSE TRUE
## AGN 16 239
## PSR 5 25
## Other 39 0
# contingency table by subclass
table(FGL3_accuracy_pred_all$CLASS1.y, FGL3_accuracy_pred_all$accurate)
##
## FALSE TRUE
## 16 7
## agn 0 1
## bcu 16 178
## bll 1 34
## fsrq 1 10
## NLSY1 0 1
## psr 1 0
## PSR 5 24
## pwn 1 0
## rdg 0 1
## RDG 0 1
## SFR 1 0
## snr 3 0
## SNR 1 0
## spp 3 0
## unk 18 0
table(FGL3_accuracy_pred_all$CLASS2, FGL3_accuracy_pred_all$accurate)
##
## FALSE TRUE
## 51 250
## agn 4 6
## glc 1 0
## sbg 1 0
## sey 0 1
## unk 10 0
3FGL Unassociated sources vs. 4FGL Discovery, with significance > 4 (Whole Catalog)
library(rmarkdown)
paged_table(FGL3_accuracy_pred_all, options = list(rows.print = 16))
Check 3FGL classification of YNG vs. MSP
load("fermicatsR/FGL4_Pulsars_full.rData")
FGL4_Pulsars_FGL3 <- FGL4_Pulsars_full %>%
mutate(Source_Name = substr(Source_Name, 6, 18),
FGL3_Source_name = trimws(substr(ASSOC_FGL,7,18))) # 230
FGL3_unassoc_pulsar_FGL4 <- inner_join(FGL3_unassoc_results, FGL4_Pulsars_FGL3,
by = c("Source_Name" = "FGL3_Source_name")) # 27
FGL3_psr_accuracy_pred <- mutate(FGL3_unassoc_pulsar_FGL4,
accurate=(FGL3_unassoc_pulsar_FGL4$Blogistic_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness)&FGL3_unassoc_pulsar_FGL4$RF_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness,
accurate_BLR=(FGL3_unassoc_pulsar_FGL4$Blogistic_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness),
accurate_RF=(FGL3_unassoc_pulsar_FGL4$RF_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness),
)
#rearrange
ncol <-ncol(FGL3_psr_accuracy_pred)
col_seq<- c(ncol-2,ncol-1,ncol,ncol-3,ncol-12,1,2:7,10:(ncol-13),(ncol-11):(ncol-4))
FGL3_psr_accuracy_pred<- FGL3_psr_accuracy_pred[,col_seq]
table(FGL3_psr_accuracy_pred$pulsarness,FGL3_psr_accuracy_pred$accurate)
##
## FALSE TRUE
## MSP 2 8
## YNG 8 9
table(FGL3_psr_accuracy_pred$pulsarness,FGL3_psr_accuracy_pred$accurate_BLR)
##
## FALSE TRUE
## MSP 2 8
## YNG 8 9
table(FGL3_psr_accuracy_pred$pulsarness,FGL3_psr_accuracy_pred$accurate_RF)
##
## FALSE TRUE
## MSP 2 8
## YNG 6 11
library(rmarkdown)
paged_table(FGL3_psr_accuracy_pred, options = list(rows.print = 16))