3FGL Source Classification vs. 4FGL Discovery

Data Source

“FGL4.rData” are downloaded from Fermi LAT data products. “FGL4_tidy_for_FGL3.rData” are processed 4FGL data catalog. (https://fermi.gsfc.nasa.gov/ssc/data/access/ ). See the processing steps here.

“FGL3_results.rData” and related scripts are associated with the following publication:

“Classification and Ranking of Fermi LAT Gamma-ray Sources from the 3FGL Catalog Using Machine Learning Techniques”,

Saz Parkinson, P. M. (HKU/LSR, SCIPP), Xu, H. (HKU), Yu, P. L. H. (HKU), Salvetti, D. (INAF-Milan), Marelli, M. (INAF-Milan), and Falcone, A. D. (Penn State), The Astrophysical Journal, 2016 (http://arxiv.org/abs/1602.00385).

library(dplyr)
library(devtools)
library(ggplot2)

load("fermicatsR/data/FGL4.rData")
load("fermicatsR/FGL3_results.rdata")
load("fermicatsR/FGL4_tidy_for_FGL3.rData")

Check 3FGL classification of Pulsars vs. AGNs

Out of 3033 3FGL sources, 469 are missing in 4FGL for various reasons.

Important variables:

  • Variability Index
  • Curvature
  • Hardneww Ratio [Ackerman et al. 2012]:

\(hr_{ij} = \frac{EnergyFlux_j - EnergyFlux_i} {EnergyFlux_j + EnergyFlux_i}\), where

\(EnergyFlux_{i}\) is log photon flux in energy bands. hr = -1 for very soft source (high \(EnergyFlux_j = 0\)), +1 for very hard source (low \(EnergyFlux_i = 0\))

In plot variabiltiy vs. curvature, pulsars (\(\bigtriangleup\)) in bottom-right corner, with curved spectrum and low variability. Misclassifications (red) are low variability sources

In plot variabiltiy vs. hr45, pulsars (\(\bigtriangleup\)) are in bottom-left corner, soft in hr45 (energy band 1~3GeV vs. 3~10GeV)

1. 3FGL Unassociated sources with significance > 10

FGL3_unassoc_signif_res <- FGL3_results %>%
        filter(CLASS1 == "") %>%
        filter(Signif > 10) %>%       #>10: 162 obs
        # filter(LR_Pred==RF_Pred) %>%
        # filter(LR_Pred=="PSR") %>%
        arrange(desc(Signif)) #149

FGL3_unassoc_res_FGL4 <- inner_join(FGL3_unassoc_signif_res, FGL4_tidy_for_FGL3,
                                   by = c("Source_Name" = "FGL3_Source_name")) %>% #>10: 116/consistent prediction: 108
                         filter (CLASS1.y != "" | CLASS2 != "")  #>10: 48
FGL3_unassoc_res_FGL4 <- FGL3_unassoc_res_FGL4[ -c(8,9) ] #  "ASSOC1.x"            "CLASS1.x" 

FGL3_accuracy_pred <- mutate(FGL3_unassoc_res_FGL4, 
       accurate=(LR_Pred==class)&(LR_Pred==RF_Pred) ,
       accurate_LR=(LR_Pred==class),
       accurate_RF=(RF_Pred==class),
       )

# rearrange the columns 
ncol <- ncol(FGL3_accuracy_pred)
new_colseq <- c(ncol,ncol-1,ncol-2,ncol-3,ncol-13, ncol-14, ncol-15, 1:(ncol-16),(ncol-12):(ncol-4))
FGL3_accuracy_pred<- FGL3_accuracy_pred[new_colseq]

table(FGL3_accuracy_pred$class, FGL3_accuracy_pred$accurate)
##        
##         FALSE TRUE
##   AGN       6   17
##   PSR       2   19
##   Other     8    0
table(FGL3_accuracy_pred$class,FGL3_accuracy_pred$accurate_LR)
##        
##         FALSE TRUE
##   AGN       5   18
##   PSR       2   19
##   Other     8    0
table(FGL3_accuracy_pred$class,FGL3_accuracy_pred$accurate_RF)
##        
##         FALSE TRUE
##   AGN       6   17
##   PSR       1   20
##   Other     8    0
# contingency table by subclass
table(FGL3_accuracy_pred$CLASS1.y, FGL3_accuracy_pred$accurate) 
##       
##        FALSE TRUE
##            1    0
##   bcu      5   11
##   bll      1    5
##   fsrq     0    1
##   psr      1    0
##   PSR      1   19
##   snr      2    0
##   spp      1    0
##   unk      4    0
table(FGL3_accuracy_pred$CLASS2, FGL3_accuracy_pred$accurate)
##      
##       FALSE TRUE
##          15   36
##   unk     1    0
# mis-classified pulsars or AGNs with significance >10
FGL3_accuracy_pred[FGL3_accuracy_pred$accurate==FALSE&FGL3_accuracy_pred$class!="Other",][,c(1,4,5,7:10)]
## # A tibble: 8 x 7
##   accurate_RF class ASSOC1.y                CLASS1.y Source_Name Signif.x Flux  
##   <lgl>       <fct> <fct>                   <chr>    <chr>          <dbl> <chr> 
## 1 TRUE        PSR   "PSR J1901-0125       … PSR      1901.5-0126     14.0 1.28e…
## 2 FALSE       AGN   "1RXS J112041.6+071335… bcu      1120.6+0713     13.2 8.78e…
## 3 FALSE       AGN   "2MASS J15034786-58005… bcu      1503.5-5801     12.8 2.75e…
## 4 FALSE       PSR   "PSR J1417-4402       … psr      1417.5-4402     12.6 1.24e…
## 5 FALSE       AGN   "1RXS J210346.8-111335… bcu      2103.7-1113     12.3 5.95e…
## 6 FALSE       AGN   "SSTSL2 J202336.19+412… bcu      2023.5+4126     11.6 2.78e…
## 7 FALSE       AGN   "1RXS J154439.4-112820… bcu      1544.6-1125     10.8 2.67e…
## 8 FALSE       AGN   "NVSS J112903+375655  … bll      1129.0+3758     10.3 3.00e…

Summary of 3FGL Unassociated sources vs. 4FGL Discovery, with significance > 10

library(rmarkdown)
paged_table(FGL3_accuracy_pred, options = list(rows.print = 16))

2. 3FGL Unassociated sources with significance > 4 (Whole Catalog)

FGL3_unassoc_results <- FGL3_results %>%
        filter(CLASS1 == "") %>%
        filter(Signif > 4) %>%      #4: 1008 obs
        # filter(LR_Pred==RF_Pred) %>% #4: 921 obs
        # filter(LR_Pred=="PSR") %>%
        arrange(desc(Signif)) 

FGL3_unassoc_res_FGL4_all <- inner_join(FGL3_unassoc_results, FGL4_tidy_for_FGL3,
                                    by = c("Source_Name" = "FGL3_Source_name")) %>% #>4: 613/consistent prediction: 566
                             filter (CLASS1.y != "" | CLASS2 != "") #329/consistent prediction:315
FGL3_unassoc_res_FGL4_all <- FGL3_unassoc_res_FGL4_all[ -c(8,9) ]

FGL3_accuracy_pred_all <- mutate(FGL3_unassoc_res_FGL4_all, 
                        accurate_LR=(LR_Pred==class),
                        accurate_RF=(RF_Pred==class),
                        accurate=(RF_Pred==LR_Pred)&(RF_Pred==class))

# rearrange columns
ncol <- ncol(FGL3_accuracy_pred_all)
new_colseq <- c(ncol,ncol-1,ncol-2,ncol-3,ncol-13, ncol-14, ncol-15, 1:(ncol-16),(ncol-12):(ncol-4))
FGL3_accuracy_pred_all<- FGL3_accuracy_pred_all[new_colseq]

table(FGL3_accuracy_pred_all$class,FGL3_accuracy_pred_all$accurate)
##        
##         FALSE TRUE
##   AGN      22  233
##   PSR       6   24
##   Other    39    0
table(FGL3_accuracy_pred_all$class,FGL3_accuracy_pred_all$accurate_LR)
##        
##         FALSE TRUE
##   AGN      18  237
##   PSR       5   25
##   Other    39    0
table(FGL3_accuracy_pred_all$class,FGL3_accuracy_pred_all$accurate_RF)
##        
##         FALSE TRUE
##   AGN      16  239
##   PSR       5   25
##   Other    39    0
# contingency table by subclass
table(FGL3_accuracy_pred_all$CLASS1.y, FGL3_accuracy_pred_all$accurate)
##        
##         FALSE TRUE
##            16    7
##   agn       0    1
##   bcu      16  178
##   bll       1   34
##   fsrq      1   10
##   NLSY1     0    1
##   psr       1    0
##   PSR       5   24
##   pwn       1    0
##   rdg       0    1
##   RDG       0    1
##   SFR       1    0
##   snr       3    0
##   SNR       1    0
##   spp       3    0
##   unk      18    0
table(FGL3_accuracy_pred_all$CLASS2, FGL3_accuracy_pred_all$accurate)
##      
##       FALSE TRUE
##          51  250
##   agn     4    6
##   glc     1    0
##   sbg     1    0
##   sey     0    1
##   unk    10    0

3FGL Unassociated sources vs. 4FGL Discovery, with significance > 4 (Whole Catalog)

library(rmarkdown)
paged_table(FGL3_accuracy_pred_all, options = list(rows.print = 16))

Check 3FGL classification of YNG vs. MSP

load("fermicatsR/FGL4_Pulsars_full.rData")

FGL4_Pulsars_FGL3 <-  FGL4_Pulsars_full %>%
        mutate(Source_Name = substr(Source_Name, 6, 18), 
         FGL3_Source_name = trimws(substr(ASSOC_FGL,7,18))) # 230

FGL3_unassoc_pulsar_FGL4 <- inner_join(FGL3_unassoc_results, FGL4_Pulsars_FGL3,
                                    by = c("Source_Name" = "FGL3_Source_name")) # 27

FGL3_psr_accuracy_pred <- mutate(FGL3_unassoc_pulsar_FGL4, 
       accurate=(FGL3_unassoc_pulsar_FGL4$Blogistic_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness)&FGL3_unassoc_pulsar_FGL4$RF_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness,
       accurate_BLR=(FGL3_unassoc_pulsar_FGL4$Blogistic_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness),
       accurate_RF=(FGL3_unassoc_pulsar_FGL4$RF_YNG_Pred==FGL3_unassoc_pulsar_FGL4$pulsarness),
       )

#rearrange
ncol <-ncol(FGL3_psr_accuracy_pred)
col_seq<- c(ncol-2,ncol-1,ncol,ncol-3,ncol-12,1,2:7,10:(ncol-13),(ncol-11):(ncol-4))
FGL3_psr_accuracy_pred<- FGL3_psr_accuracy_pred[,col_seq]
table(FGL3_psr_accuracy_pred$pulsarness,FGL3_psr_accuracy_pred$accurate)
##      
##       FALSE TRUE
##   MSP     2    8
##   YNG     8    9
table(FGL3_psr_accuracy_pred$pulsarness,FGL3_psr_accuracy_pred$accurate_BLR)
##      
##       FALSE TRUE
##   MSP     2    8
##   YNG     8    9
table(FGL3_psr_accuracy_pred$pulsarness,FGL3_psr_accuracy_pred$accurate_RF)
##      
##       FALSE TRUE
##   MSP     2    8
##   YNG     6   11

library(rmarkdown)
paged_table(FGL3_psr_accuracy_pred, options = list(rows.print = 16))