import sys
sys.path.insert(0, '../')
import warnings
warnings.simplefilter("ignore")
# warnings.filterwarnings("error", message="overflow encountered in cast")
# warnings.filterwarnings("ignore", message="`alltrue` is deprecated as of NumPy")
import os
import numpy as np
import matplotlib.pyplot as plt
import tables
from astropy.table import Table, join
from astropy.coordinates import SkyCoord
from astropy import units as u
from pathlib import Path
from IPython.display import display, Markdown
from agasc_gaia.datasets import get_agasc_summary
import agasc_gaia.cross_match as xm
from agasc_gaia.gaia_model import get_gaia_model
from agasc_gaia.star_report import Report
from agasc_gaia import star_report
import seaborn as sns
SKA = Path(os.environ['SKA'])
AGASC_FILE = SKA / 'data' / 'agasc' / 'agasc1p7.h5'
generate_reports = True
# @utils.cached(name="comparison_table")
def get_comparison_table():
print("loading tables")
print(" - summary")
agasc_summary = get_agasc_summary()
print(" - indirect x-matches")
agasc_indirect = xm.get_agasc_tycho_gsc_gaia_x_match()
print(" - direct x-matches")
agasc_direct = xm.get_agasc_gaia_x_match_difficult_fixed()
agasc_direct = agasc_direct[agasc_direct["best_match"] & (agasc_direct['p_value'] > 0.001)]
assert len(agasc_indirect), len(np.unique(agasc_indirect['agasc_id']))
assert len(agasc_direct), len(np.unique(agasc_direct['agasc_id']))
print(" - AGASC 1.7")
with tables.open_file(SKA / 'data' / 'agasc' / 'agasc1p7.h5') as h5:
agasc_1p7 = Table(h5.root.data[:])
# this removes a star that is duplicated in the AGASC catalog
agasc_1p7 = agasc_1p7[
~(
(agasc_1p7["AGASC_ID"] == 154534513)
& (agasc_1p7["MAG_CATID"] == 1)
)
]
agasc_ids = np.unique(np.concatenate([agasc_indirect['agasc_id'].data, agasc_direct['agasc_id'].data]))
agasc_1p7 = agasc_1p7[np.in1d(agasc_1p7['AGASC_ID'], agasc_ids)]
cols = [
# 'pm_ra',
# 'pm_dec',
'mag_aca',
'mag_aca_err',
'mag_aca_obs',
'mag_aca_err_obs',
'guide',
'acq',
'tycho_id',
'tycho_tdsc_id',
'mag_catid',
'mag_band',
]
i = np.searchsorted(agasc_summary['agasc_id'], agasc_indirect['agasc_id'])
for col in cols:
agasc_indirect[col] = agasc_summary[col][i]
agasc_indirect['mag_1p7'] = agasc_summary['mag'][i]
i = np.searchsorted(agasc_summary['agasc_id'], agasc_direct['agasc_id'])
for col in cols:
agasc_direct[col] = agasc_summary[col][i]
agasc_direct['mag_1p7'] = agasc_summary['mag'][i]
gaia_model = get_gaia_model()
cols = ['g_mag', 'rp_mag', 'bp_mag', 'mag_catid', 'mag_band', 'has_mag', "phot_variable_flag", "mad_mag_g_fov"]
agasc_indirect['mag_aca_pred'] = np.ma.masked_all(len(agasc_indirect), dtype=np.float16)
agasc_indirect['mag_aca_pred'][agasc_indirect['has_mag'] != 0] = gaia_model.predict(agasc_indirect[agasc_indirect['has_mag'] != 0][cols].to_pandas())
agasc_direct['mag_aca_pred'] = np.ma.masked_all(len(agasc_direct), dtype=np.float16)
agasc_direct['mag_aca_pred'][agasc_direct['has_mag'] != 0] = gaia_model.predict(agasc_direct[agasc_direct['has_mag'] != 0][cols].to_pandas())
print("joining indirect table")
data = join(
agasc_1p7,
agasc_indirect,
keys_left='AGASC_ID',
keys_right='agasc_id',
table_names=['1', '2'],
uniq_col_name='{col_name}_{table_name}',
metadata_conflicts='silent',
join_type='left',
)
assert not np.any([col[-2:] == '_1' for col in data.colnames])
print("joining direct table")
data = join(
data,
agasc_direct,
keys_left='AGASC_ID',
keys_right='agasc_id',
table_names=['indirect', 'direct'],
uniq_col_name='{col_name}_{table_name}',
metadata_conflicts='silent',
join_type='left',
)
direct = ~data['agasc_id_direct'].mask
indirect = ~data['agasc_id_indirect'].mask
print("removing redundant columns")
cols = [
'agasc_id',
'acq',
'guide',
'mag_aca',
'mag_aca_err',
'mag_aca_err_obs',
'mag_aca_obs',
'tycho_id',
'tycho_tdsc_id',
]
for col in cols:
if f'{col}_direct' in data.colnames:
data.remove_column(f'{col}_direct')
if f'{col}_indirect' in data.colnames:
data.remove_column(f'{col}_indirect')
cols = [
'acq',
'gsc2.3',
'guide',
'mag',
'mag_aca',
'mag_aca_err',
'mag_aca_err_obs',
'mag_aca_obs',
'tycho_id',
'tycho_tdsc_id',
]
i = np.searchsorted(agasc_summary['agasc_id'], data['AGASC_ID'])
for col in cols:
data[col] = agasc_summary[col][i]
# data['mag_1p7'] = agasc_summary['mag'][i]
cols = ['agasc_id', 'ra', 'dec', 'epoch', 'pm_ra', 'pm_dec']
for col in cols:
data[col] = agasc_summary[col][i]
print("setting positions")
data['has_gsc'] = data["XREF_ID1"] != -9999
data['has_tyc'] = data["XREF_ID3"] != -9999
data['PM_RA'][data['PM_RA'] == -9999] = 0
data['PM_DEC'][data['PM_DEC'] == -9999] = 0
# these are "corrected" 1p7 positions,
# same as 1p7 except Tycho2 positions were shifted to the actual epoch instead of 2000.
# This was to avoid errors arising from the 1p7 proper motions
data["coord"] = SkyCoord(
ra=data["ra"],
dec=data["dec"],
unit="deg"
)
data["coord_direct_today"] = SkyCoord(
ra=data["ra_direct"],
dec=data["dec_direct"],
# ra=(
# data["ra_direct"]
# + data["pm_ra_direct"]
# * (2023 - data["epoch_direct"])
# / 1000
# / 3600
# / np.cos(np.deg2rad(data["dec_direct"]))
# ),
# dec=(
# data["dec_direct"]
# + data["pm_dec_direct"]
# * (2023 - data["epoch_direct"])
# / 1000
# / 3600
# ),
unit="deg"
)
data["coord_indirect_today"] = SkyCoord(
ra=data["ra_indirect"],
dec=data["dec_indirect"],
# ra=(
# data["ra_indirect"]
# + data["pm_ra_indirect"]
# * (2023 - data["epoch_indirect"])
# / 1000
# / 3600
# / np.cos(np.deg2rad(data["dec_indirect"]))
# ),
# dec=(
# data["dec_indirect"]
# + data["pm_dec_indirect"]
# * (2023 - data["epoch_indirect"])
# / 1000
# / 3600
# ),
unit="deg"
)
data["coord_direct"] = SkyCoord(
ra=(
data["ra_direct"]
+ data["pm_ra_direct"]
* (data["epoch"] - data["epoch_direct"])
/ 1000
/ 3600
/ np.cos(np.deg2rad(data["dec_direct"]))
),
dec=(
data["dec_direct"]
+ data["pm_dec_direct"]
* (data["epoch"] - data["epoch_direct"])
/ 1000
/ 3600
),
unit="deg"
)
data["coord_indirect"] = SkyCoord(
ra=(
data["ra_indirect"]
+ data["pm_ra_indirect"]
* (data["epoch"] - data["epoch_indirect"])
/ 1000
/ 3600
/ np.cos(np.deg2rad(data["dec_indirect"]))
),
dec=(
data["dec_indirect"]
+ data["pm_dec_indirect"]
* (data["epoch"] - data["epoch_indirect"])
/ 1000
/ 3600
),
unit="deg"
)
# this is to compare the 1p7 positions to what they would be with the 1p8 proper motion
# our reference are the "corrected" positions above, and these are the 1p7 positions
# shifted to their original epoch using the 1p8 proper motion
data["coord_1p7"] = SkyCoord(
ra=(
data["RA"]
+ data["pm_ra_direct"]
* (data["epoch"] - data["EPOCH"])
/ 1000
/ 3600
/ np.cos(np.deg2rad(data["DEC"]))
),
dec=(
data["DEC"]
+ data["pm_dec_direct"]
* (data["epoch"] - data["EPOCH"])
/ 1000
/ 3600
),
unit="deg"
)
# the difference between the two methods
data["d2d"] = data["coord_direct"].separation(data["coord_indirect"]).to(u.arcsec)
data["d2d_direct"] = data["coord"].separation(data["coord_direct"]).to(u.arcsec)
data["d2d_indirect"] = data["coord"].separation(data["coord_indirect"]).to(u.arcsec)
# differences introduced by removing 1p7 proper motion
data["d2d_1p7"] = data["coord"].separation(data["coord_1p7"]).to(u.arcsec)
# the difference between the two methods propagated to today
data["d2d_today"] = data["coord_direct_today"].separation(data["coord_indirect_today"]).to(u.arcsec)
return data
data = get_comparison_table()
loading tables - summary - indirect x-matches - direct x-matches - AGASC 1.7 joining indirect table joining direct table removing redundant columns setting positions
direct = ~data['gaia_id_direct'].mask
indirect = ~data['gaia_id_indirect'].mask
data_intersection = data[indirect & direct]
observed = ~data_intersection['mag_aca_obs'].mask
agree = (data_intersection['gaia_id_direct'] == data_intersection['gaia_id_indirect'])
disagree = data_intersection[~agree]
High Level Plots¶
This first plot is the difference in magnitude and radial offset between 1p7 and Gaia. The lines correspond to the match probability used.
fig, axes = plt.subplot_mosaic([["mag", "d2d"]], figsize=(9, 4))
plt.sca(axes["mag"])
plt.title("$\Delta$ mag")
plt.hist(
data_intersection['d_mag_direct'],
bins=np.linspace(-10, 10, 100),
histtype='step',
density=True,
label="direct"
)
plt.hist(
data_intersection['d_mag_indirect'],
bins=np.linspace(-10, 10, 100),
histtype='step',
density=True,
label="indirect"
)
plt.yscale('log')
plt.xlabel("mag$_{pred}$ - mag$_{Gaia}$")
N = len(data_intersection)
x = np.linspace(-3, 3, 100)
plt.plot(x, xm.d_mag_probability(x))
plt.sca(axes["d2d"])
plt.title("Radial offset")
plt.hist(
data_intersection['d2d_direct'],
# bins=np.logspace(-5, 1, 100),
bins=np.linspace(0, 10, 100),
histtype='step',
density=True,
label="direct",
)
plt.hist(
data_intersection['d2d_indirect'],
# bins=np.logspace(-5, 1, 100),
bins=np.linspace(0, 10, 100),
histtype='step',
density=True,
label="indirect",
)
plt.yscale('log')
plt.xlabel("d2d (arcsec)")
N = len(data_intersection['d2d_direct'])
# x = np.logspace(-3, 1, 101)
x = np.linspace(0, 10, 101)
dx = np.diff(x)
x = x[:-1] + dx / 2
# p_d2d = (
# 1*d2d_probability(x, sigma_d2d=.2)
# + 0.05*d2d_probability(x, sigma_d2d=.5)
# # + 0.5*d2d_probability(x, sigma_d2d=1.5)
# )
p_d2d = xm.d2d_probability(x)
p_d2d /= np.sum(dx * p_d2d)
plt.plot(x, p_d2d)
p_d2d = xm.gaussian_d2d_probability_(x, sigma_d2d=1.5)
p_d2d /= np.sum(dx * p_d2d)
plt.plot(x, p_d2d)
# p_d2d = d2d_probability(x, sigma_d2d=0.5)
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p = 0.1
# p_d2d = (
# (1-p)*d2d_probability(x, sigma_d2d=0.25) +
# p*d2d_probability(x, sigma_d2d=.5)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p = 0.1
# p2 = 0.008
# p_d2d = (
# (1-p)*xm.gaussian_d2d_probability_(x, sigma_d2d=0.25) +
# p*xm.gaussian_d2d_probability_(x, sigma_d2d=.5) +
# p2*xm.gaussian_d2d_probability_(x, sigma_d2d=.8)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p, p1, p2, p3 = 0.89020772, 0.09891197, 0.00791296, 0.00296736
# p_d2d = (
# p*xm.gaussian_d2d_probability_(x, sigma_d2d=0.25) +
# p1*xm.gaussian_d2d_probability_(x, sigma_d2d=.5) +
# p2*xm.gaussian_d2d_probability_(x, sigma_d2d=.8) +
# p3*xm.gaussian_d2d_probability_(x, sigma_d2d=1.2)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
plt.ylim(ymin=1e-7, ymax=4)
# plt.ylim(ymin=1e-1, ymax=1e6)
plt.xlim((0, 5))
plt.legend()
plt.tight_layout()
fig, axes = plt.subplot_mosaic([["mag", "d2d"]], figsize=(9, 4))
plt.sca(axes["mag"])
plt.hist(
data_intersection['d_mag_direct'],
bins=np.linspace(-10, 10, 100),
histtype='step',
label="direct",
)
plt.hist(
data_intersection['d_mag_indirect'],
bins=np.linspace(-10, 10, 100),
histtype='step',
label="indirect",
)
plt.xlabel("mag_1p8 - mag_1p7")
plt.legend()
plt.title("Magnitude")
plt.yscale('log')
plt.sca(axes["d2d"])
bins = np.linspace(0, 4, 100)
plt.hist(
data_intersection["d2d_direct"][~agree],
bins=bins,
histtype='step',
label='direct',
)
plt.hist(
data_intersection["d2d_indirect"][~agree],
bins=bins,
histtype='step',
label='indirect',
)
plt.title("Radial offset")
plt.legend()
plt.xlabel("d2d$_{1p7}$ (arcsec)")
plt.tight_layout()
Some numbers¶
n_both, n_indirect_no_direct, n_direct_no_indirect = np.count_nonzero(indirect & direct), np.count_nonzero(indirect & ~direct), np.count_nonzero(direct & ~indirect)
n_both_candidates = np.count_nonzero(indirect & direct & (data['guide'] | data['acq']))
n_indirect_no_direct_candidates = np.count_nonzero(indirect & ~direct & (data['guide'] | data['acq']))
n_direct_no_indirect_candidates = np.count_nonzero(direct & ~indirect & (data['guide'] | data['acq']))
n_both_observed = np.count_nonzero(indirect & direct & ~data['mag_aca_obs'].mask)
n_indirect_no_direct_observed = np.count_nonzero(indirect & ~direct & ~data['mag_aca_obs'].mask)
n_direct_no_indirect_observed = np.count_nonzero(direct & ~indirect & ~data['mag_aca_obs'].mask)
mag_out_indirect_2_5 = np.abs(data["mag_pred_indirect"] - data["MAG"]) > 2.5
mag_out_direct_2_5 = np.abs(data["mag_pred_direct"] - data["MAG"]) > 2.5
mag_out_indirect_5 = np.abs(data["mag_pred_indirect"] - data["MAG"]) > 5
mag_out_direct_5 = np.abs(data["mag_pred_direct"] - data["MAG"]) > 5
radial_outlier_direct_3 = data["d2d_direct"] > 3
radial_outlier_indirect_3 = data["d2d_indirect"] > 3
radial_outlier_direct_5 = data["d2d_direct"] > 5
radial_outlier_indirect_5 = data["d2d_indirect"] > 5
n_both_mag_outliers_2_5 = np.count_nonzero(indirect & direct & (mag_out_indirect_2_5 | mag_out_direct_2_5))
n_indirect_mag_outliers_2_5 = np.count_nonzero(indirect & ~direct & mag_out_indirect_2_5)
n_direct_mag_outliers_2_5 = np.count_nonzero(~indirect & direct & mag_out_direct_2_5)
n_both_mag_outliers_5 = np.count_nonzero(indirect & direct & (mag_out_indirect_5 | mag_out_direct_5))
n_indirect_mag_outliers_5 = np.count_nonzero(indirect & ~direct & mag_out_indirect_5)
n_direct_mag_outliers_5 = np.count_nonzero(~indirect & direct & mag_out_direct_5)
n_direct_radial_outliers_3 = np.count_nonzero(~indirect & direct & radial_outlier_direct_3)
n_indirect_radial_outliers_3 = np.count_nonzero(indirect & ~direct & radial_outlier_indirect_3)
n_both_radial_outliers_3 = np.count_nonzero(indirect & direct & radial_outlier_direct_3)
n_direct_radial_outliers_5 = np.count_nonzero(~indirect & direct & radial_outlier_direct_5)
n_indirect_radial_outliers_5 = np.count_nonzero(indirect & ~direct & radial_outlier_indirect_5)
n_both_radial_outliers_5 = np.count_nonzero(indirect & direct & radial_outlier_direct_5)
# "agree" is defined only in the intersection, which implies (indirect & direct)
n_disagree, f_agree = np.count_nonzero(~agree), np.count_nonzero(agree)/n_both
n_disagree_observed = np.count_nonzero(~agree & observed)
n_disagree_candidates = np.count_nonzero(~agree & (data_intersection['guide'] | data_intersection['acq']))
disagree_mag_out_indirect_2_5 = np.abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 2.5
disagree_mag_out_direct_2_5 = np.abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 2.5
disagree_mag_out_indirect_5 = np.abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 5
disagree_mag_out_direct_5 = np.abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 5
disagree_radial_outlier_direct_3 = data_intersection["d2d_direct"] > 3
disagree_radial_outlier_indirect_3 = data_intersection["d2d_indirect"] > 3
disagree_radial_outlier_direct_5 = data_intersection["d2d_direct"] > 5
disagree_radial_outlier_indirect_5 = data_intersection["d2d_indirect"] > 5
n_disagree_both_mag_outliers_2_5 = np.count_nonzero(agree & disagree_mag_out_indirect_2_5)
n_disagree_indirect_mag_outliers_2_5 = np.count_nonzero(~agree & disagree_mag_out_indirect_2_5)
n_disagree_direct_mag_outliers_2_5 = np.count_nonzero(~agree & disagree_mag_out_direct_2_5)
n_disagree_both_mag_outliers_5 = np.count_nonzero(agree & disagree_mag_out_indirect_5)
n_disagree_indirect_mag_outliers_5 = np.count_nonzero(~agree & disagree_mag_out_indirect_5)
n_disagree_direct_mag_outliers_5 = np.count_nonzero(~agree & disagree_mag_out_direct_5)
n_disagree_direct_radial_outliers_3 = np.count_nonzero(~agree & disagree_radial_outlier_direct_3)
n_disagree_indirect_radial_outliers_3 = np.count_nonzero(~agree & disagree_radial_outlier_indirect_3)
n_disagree_both_radial_outliers_3 = np.count_nonzero(agree & disagree_radial_outlier_direct_3)
n_disagree_direct_radial_outliers_5 = np.count_nonzero(~agree & disagree_radial_outlier_direct_5)
n_disagree_indirect_radial_outliers_5 = np.count_nonzero(~agree & disagree_radial_outlier_indirect_5)
n_disagree_both_radial_outliers_5 = np.count_nonzero(agree & disagree_radial_outlier_direct_5)
n_large_d2d_today = np.count_nonzero(data_intersection["d2d_today"] > 5)
n_large_d2d_direct = np.count_nonzero(data_intersection["d2d_direct"] > 3)
n_large_d2d_indirect = np.count_nonzero(data_intersection["d2d_indirect"] > 3)
n_disagree_indirect_mag_outliers_2_5 = np.count_nonzero(abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 5 )
n_disagree_direct_mag_outliers_2_5 = np.count_nonzero(abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 5 )
n_disagree_indirect_p_relative = np.count_nonzero(data_intersection["p_relative_indirect"] < 0.5)
n_disagree_direct_p_relative = np.count_nonzero(data_intersection["p_relative_direct"] < 0.5)
txt = f"""
Disagreements in terms of which stars are matched to a Gaia counterpart:
| condition | both | direct | indirect |
| --- | --- | --- | --- |
| any | {n_both} | {n_direct_no_indirect} ({n_direct_no_indirect/n_both:.1%}) | {n_indirect_no_direct} ({n_indirect_no_direct/n_both:.1%})|
| guide or acq | {n_both_candidates} | {n_direct_no_indirect_candidates} ({n_direct_no_indirect_candidates/n_both_candidates:.1%}) | {n_indirect_no_direct_candidates} ({n_indirect_no_direct_candidates/n_both_candidates:.1%}) |
| observed | {n_both_observed} | {n_direct_no_indirect_observed} ({n_direct_no_indirect_observed/n_both_observed:.1%}) | {n_indirect_no_direct_observed} ({n_indirect_no_direct_observed/n_both_observed:.1%}) |
| d2d > 3 | {n_both_radial_outliers_3} | {n_direct_radial_outliers_3} | {n_indirect_radial_outliers_3} |
| d2d > 5 | {n_both_radial_outliers_5} | {n_direct_radial_outliers_5} | {n_indirect_radial_outliers_5} |
| abs(mag_pred - MAG) > 2.5 | {n_both_mag_outliers_2_5} | {n_direct_mag_outliers_2_5} | {n_indirect_mag_outliers_2_5} |
| abs(mag_pred - MAG) > 5 | {n_both_mag_outliers_5} | {n_direct_mag_outliers_5} | {n_indirect_mag_outliers_5} |
Disagreements among the stars that are matched to a Gaia counterpart in both methods:
| | in both | disagree |
| --- | --- | --- |
| any | {n_both} | {n_disagree} ({n_disagree/n_both:.3%}) |
| candidates | {n_both_candidates} | {n_disagree_candidates} ({n_disagree_candidates/n_both_candidates:.3%}) |
| observed | {n_both_observed} | {n_disagree_observed} ({n_disagree_observed/n_both_observed:.3%}) |
| d2d_direct > 3 arcsec | | {n_large_d2d_direct} |
| d2d_indirect > 3 arcsec | | {n_large_d2d_indirect} |
| d2d_today > 5 | | {n_large_d2d_today} |
| abs(mag_pred - MAG) > 5 (indirect)| | {n_disagree_indirect_mag_outliers_2_5} |
| abs(mag_pred - MAG) > 5 (direct)| | {n_disagree_direct_mag_outliers_2_5} |
| p_relative_direct < 0.5 | | {n_disagree_direct_p_relative} |
| p_relative_indirect < 0.5 | | {n_disagree_indirect_p_relative} |
"""
display(Markdown(txt))
Disagreements in terms of which stars are matched to a Gaia counterpart:
condition | both | direct | indirect |
---|---|---|---|
any | 8467466 | 489879 (5.8%) | 13169 (0.2%) |
guide or acq | 436738 | 6991 (1.6%) | 593 (0.1%) |
observed | 92432 | 2194 (2.4%) | 1 (0.0%) |
d2d > 3 | 7 | 418 | 1174 |
d2d > 5 | 2 | 187 | 7 |
abs(mag_pred - MAG) > 2.5 | 4528 | 643 | 1351 |
abs(mag_pred - MAG) > 5 | 642 | 111 | 503 |
Disagreements among the stars that are matched to a Gaia counterpart in both methods:
in both | disagree | |
---|---|---|
any | 8467466 | 4896 (0.058%) |
candidates | 436738 | 379 (0.087%) |
observed | 92432 | 43 (0.047%) |
d2d_direct > 3 arcsec | 7 | |
d2d_indirect > 3 arcsec | 259 | |
d2d_today > 5 | 52 | |
abs(mag_pred - MAG) > 5 (indirect) | 632 | |
abs(mag_pred - MAG) > 5 (direct) | 74 | |
p_relative_direct < 0.5 | 3 | |
p_relative_indirect < 0.5 | 102 |
Proper Motion¶
fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4), sharex=True, sharey=True)
plt.suptitle("$\Delta PM$")
plt.sca(axes["ra"])
plt.plot(disagree['pm_ra_direct'] - disagree['PM_RA'], disagree['pm_ra_indirect'] - disagree['PM_RA'], '.')
plt.xlabel("$\Delta$PM$_{RA}$ direct")
plt.ylabel("$\Delta$PM$_{RA}$ indirect")
plt.title("RA")
plt.sca(axes["dec"])
plt.plot(disagree['pm_dec_direct'] - disagree['PM_DEC'], disagree['pm_dec_indirect'] - disagree['PM_DEC'], '.')
plt.xlabel("$\Delta$PM$_{DEC}$ direct")
plt.ylabel("$\Delta$PM$_{DEC}$ indirect")
plt.ylim((-1000, 1000))
plt.xlim((-1000, 1000))
plt.title("DEC")
fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4), sharex=True, sharey=True)
plt.suptitle("$\Delta PM$ (zoomed in)")
plt.sca(axes["ra"])
plt.plot(disagree['pm_ra_direct'] - disagree['PM_RA'], disagree['pm_ra_indirect'] - disagree['PM_RA'], '.')
plt.xlabel("$\Delta$PM$_{RA}$ direct")
plt.ylabel("$\Delta$PM$_{RA}$ indirect")
plt.title("RA")
plt.sca(axes["dec"])
plt.plot(disagree['pm_dec_direct'] - disagree['PM_DEC'], disagree['pm_dec_indirect'] - disagree['PM_DEC'], '.')
plt.xlabel("$\Delta$PM$_{DEC}$ direct")
plt.ylabel("$\Delta$PM$_{DEC}$ indirect")
plt.ylim((-100, 100))
plt.xlim((-100, 100))
plt.title("DEC")
Text(0.5, 1.0, 'DEC')
fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4))
plt.suptitle("1p7-Direct")
plt.sca(axes["ra"])
plt.plot(disagree['PM_RA'], disagree['pm_ra_indirect'], '.')
plt.xlabel("PM_RA 1p7")
plt.ylabel("PM_RA indirect")
plt.title("RA")
plt.sca(axes["dec"])
plt.plot(disagree['PM_DEC'], disagree['pm_dec_indirect'], '.')
plt.xlabel("PM_DEC 1p7")
plt.ylabel("PM_DEC indirect")
plt.title("DEC")
fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4))
plt.suptitle("1p7-Direct")
plt.sca(axes["ra"])
plt.plot(disagree['PM_RA'], disagree['pm_ra_direct'], '.')
plt.xlabel("PM_RA 1p7")
plt.ylabel("PM_RA direct")
plt.title("RA")
plt.sca(axes["dec"])
plt.plot(disagree['PM_DEC'], disagree['pm_dec_direct'], '.')
plt.xlabel("PM_DEC 1p7")
plt.ylabel("PM_DEC direct")
plt.title("DEC")
Text(0.5, 1.0, 'DEC')
Radial Offset¶
This section includes plots looking at radial separations. Not much came of it except to realize that the differences are cases where two Gaia stars are near a single AGASC star.
This can be seen in the plot below, which shows the histogram of radial offsets between 1p7 and 1p8 with each method. In log-scale, there is a clear bimodal distribution.
Because the direct method uses magnitudes in the matching probability, I expected a trade-off, where the radial offsets are larger. That is not the case. The radial offsets are smaller with the direct method. That could be because proper motions are better handled?
bins = np.logspace(-3, 2, 100)
plt.hist(
data_intersection["d2d_direct"][~agree],
bins=bins,
histtype='step',
label='direct-1p7',
)
plt.hist(
data_intersection["d2d_indirect"][~agree],
bins=bins,
histtype='step',
label='indirect-1p7',
)
# plt.hist(
# data_intersection["d2d"][~agree],
# bins=bins,
# histtype='step',
# label='indirect-direct',
# )
plt.xscale('log')
plt.legend()
plt.xlabel("d2d (arcsec)")
plt.tight_layout()
this bimodal distribution in log-scale actually corresponds to a long-tailed distribution
bins = np.linspace(0, 4, 100)
plt.hist(
data_intersection["d2d_direct"][~agree],
bins=bins,
histtype='step',
label='direct-1p7',
)
plt.hist(
data_intersection["d2d_indirect"][~agree],
bins=bins,
histtype='step',
label='indirect-1p7',
)
# plt.hist(
# data_intersection["d2d"][~agree],
# bins=bins,
# histtype='step',
# label='indirect-direct',
# )
plt.legend()
plt.xlabel("d2d (arcsec)")
plt.tight_layout()
d2d_today
is the angular separation between the Gaia matches from both methods as it would be observed today.
plt.hist(
data_intersection["d2d_today"][~agree],
bins=np.logspace(-3, 2, 100),
histtype="step",
)
plt.xscale('log')
plt.yscale('log')
data_intersection[data_intersection['d2d_today'] > 10][['AGASC_ID', 'd2d_today']]
AGASC_ID | d2d_today |
---|---|
arcsec | |
int32 | float64 |
13770888 | 37.63962906801418 |
44828928 | 17.38145521107779 |
56629648 | 24.798016439675187 |
64100472 | 25.144329520220083 |
295182256 | 46.28792664226764 |
328992728 | 28.51402100385447 |
400826496 | 26.660939304221365 |
586811608 | 39.33190385339832 |
685910296 | 14.346379809274778 |
1082400928 | 44.33634978852832 |
1092224544 | 12.218642797037425 |
plt.hist(
disagree["d2d_direct"] - disagree["d2d_indirect"],
bins=np.linspace(-10, 10, 100),
histtype='step',
);
plt.yscale('log')
fig, axes = plt.subplot_mosaic(
[
["direct-indirect", "1p7-indirect"],
["1p7-direct", "."]
],
figsize=(9, 8)
)
plt.sca(axes["direct-indirect"])
bins = np.linspace(0, 5, 50)
sns.histplot(
x=disagree["d2d_direct"],
y=disagree["d2d_indirect"],
bins=[bins, bins],
)
plt.sca(axes["1p7-indirect"])
bins = np.linspace(0, 5, 50)
sns.histplot(
x=disagree["d2d"],
y=disagree["d2d_indirect"],
bins=[bins, bins],
)
plt.sca(axes["1p7-direct"])
bins = np.linspace(0, 5, 50)
sns.histplot(
x=disagree["d2d"],
y=disagree["d2d_direct"],
bins=[bins, bins],
)
<Axes: label='1p7-direct', xlabel='d2d', ylabel='d2d_direct'>
Magnitude Changes¶
bins = np.linspace(-10, 10, 100)
plt.hist(
(data_intersection["mag_pred_direct"] - data_intersection["MAG"])[~agree],
bins=bins,
histtype='step',
label='direct',
)
plt.hist(
(data_intersection["mag_pred_indirect"] - data_intersection["MAG"])[~agree],
bins=bins,
histtype='step',
label='Tycho2-GSC2.3',
)
plt.xlabel('mag_1p8 - mag_1p7')
plt.yscale('log')
plt.legend()
plt.tight_layout()
Reports¶
agasc_id = 966533624
# agasc_id = 680536288
report = star_report.Report(agasc_id)
report = star_report.Report(agasc_id, alternates={'direct': 5974018828337490944, "indirect": 5974018828340131456})
# report.show_in_notebook()
# report.save_html("bla")
fig = report.plotly_fig()
fig.show()
Checking p_value¶
disagree[disagree['p_value_indirect'] > 0.99][['agasc_id', 'p_value_direct', 'p_value_indirect', 'gaia_id_direct', 'gaia_id_indirect']]
agasc_id | p_value_direct | p_value_indirect | gaia_id_direct | gaia_id_indirect |
---|---|---|---|---|
int32 | float32 | float32 | int64 | int64 |
agasc_id = 530408
report = star_report.Report(agasc_id, alternates=dict( disagree[disagree['agasc_id'] == agasc_id][['gaia_id_direct', 'gaia_id_indirect']][0]))
# report.show_in_notebook()
# report.save_html("bla")
fig = report.plotly_fig()
fig.show()
plt.hist(
disagree['p_value_direct'],
bins=np.linspace(0, 1, 100),
histtype='step',
);
plt.hist(
disagree['p_value_indirect'],
bins=np.linspace(0, 1, 100),
histtype='step',
);
plt.yscale('log')
plt.hist(
data['p_value_direct'],
bins=np.linspace(0, 1, 100),
histtype='step',
label="direct",
)
plt.hist(
data['p_value_indirect'],
bins=np.linspace(0, 1, 100),
histtype='step',
label="indirect",
)
plt.yscale('log')
plt.xlabel("p-value")
plt.legend()
plt.tight_layout()
Observed¶
for col in ['d2d_direct', 'd2d_indirect', 'd_mag_direct', 'd_mag_indirect']:
data_intersection[col].format = "{:.2f}"
for col in ['p_value_direct', 'p_value_indirect']:
data_intersection[col].format = "{:.4f}"
data_intersection[~agree & observed][[
'agasc_id',
'gaia_id_direct',
'gaia_id_indirect',
'd_mag_direct',
'd2d_direct',
'd_mag_indirect',
'd2d_indirect',
'p_value_direct',
'p_value_indirect',
]].pprint(max_width=-1, max_lines=-1)
agasc_id gaia_id_direct gaia_id_indirect d_mag_direct d2d_direct d_mag_indirect d2d_indirect p_value_direct p_value_indirect arcsec arcsec ---------- ------------------- ------------------- ------------ ---------- -------------- ------------ -------------- ---------------- 6687480 4760816628892160 4760820923834112 0.19 1.25 0.23 0.69 0.3632 0.3498 9446616 3273969976794445056 3273969972497767552 0.14 0.21 0.16 0.17 0.5646 0.5625 32243760 3831419417137291904 3831419412842627456 0.18 0.71 0.18 0.88 0.5498 0.0414 36444624 3897649565189277952 3897649565189298688 0.02 0.27 8.11 0.67 0.6729 0.0001 52053176 4392691618897267200 4392691618894632064 0.05 0.24 0.17 1.27 0.7053 0.6406 77604992 2756819802270040064 2756819802269026688 0.27 1.33 0.27 0.40 0.3410 0.2008 81535712 2568639108730134016 2568639108730133888 0.69 0.46 5.03 3.08 0.0424 0.0000 87428640 3299310421279505152 3299310421279819776 -0.06 0.71 0.03 0.59 0.9111 0.1986 170537688 3397818657309068288 3397818657308098048 0.10 0.15 0.14 0.27 0.7330 0.4884 176951584 3360388906382410752 3360388910675923584 -0.00 0.04 nan 0.67 0.9928 nan 183638376 607916787837169152 607916783542120448 0.17 0.44 0.25 0.36 0.5880 0.3473 210767352 4516726426141479808 4516726426144371456 0.10 0.17 7.27 1.33 0.7069 0.0000 265030112 1282430921954322048 1282430921955436032 0.05 0.07 nan 0.32 0.9218 nan 295583712 2852925360578759424 2852925364875262080 0.00 0.07 0.18 0.74 0.9758 0.0907 296757336 2863668486831283840 2863668486830442368 0.18 0.11 0.19 0.19 0.5751 0.4824 382992664 207915763322753408 207915763325485696 -0.00 0.04 nan 0.35 0.9933 nan 417858744 1954085707369157888 1954085703068556800 0.14 0.18 0.16 0.35 0.6058 0.5821 420499768 1957445058987177856 1957445058984715648 0.06 0.04 nan 1.40 0.9176 nan 496248896 985405277415464448 985405277413204480 0.11 1.15 0.16 0.82 0.6878 0.5409 541984456 1039008251679233792 1039008255973683840 0.14 0.10 nan 0.30 0.6885 nan 576595136 1690468547538461184 1690468551833654144 0.04 0.03 nan 0.87 0.9661 nan 593245120 1115458948720469632 1115458948720469888 0.07 0.04 nan 0.87 0.8892 nan 623249168 3214697297806159616 3214697293514063616 0.13 0.15 0.31 1.08 0.6591 0.2472 643965560 3806051759739888256 3806051755444147584 0.42 0.19 0.44 0.24 0.2080 0.1721 731121880 6333200204490021888 6333200208784059648 0.07 0.07 3.21 1.21 0.8724 0.0001 766651552 2362388594423841792 2362388594422948480 0.08 0.58 0.15 0.22 0.8048 0.5443 908462360 6801811888448490624 6801811892745605632 0.08 0.11 0.46 1.00 0.8360 0.0689 939393312 5440213797128328064 5440213801426139008 0.13 0.20 0.31 0.64 0.6198 0.2590 966525880 5975153215090484224 5975153210796430336 0.05 0.49 0.16 0.27 0.7706 0.5602 966533624 5974018828337490944 5974018828340131456 0.06 0.23 11.73 0.83 0.6970 0.0000 977410600 6793999961413449216 6793999965707654656 0.01 0.19 0.19 0.70 0.8201 0.0780 997071584 4799566167336113536 4799566171630256000 0.14 0.05 0.62 0.33 0.7106 0.0822 1038767952 6714057120955575424 6714057120952340864 0.12 0.04 6.62 0.96 0.7689 0.0000 1054738896 4913415652886181504 4913415652884481792 0.13 0.10 6.40 0.60 0.7053 0.0003 1057103368 4737095837951376128 4737095837950650624 0.23 1.15 0.23 0.95 0.4613 0.4151 1089212088 5985256936705385344 5985256936695918720 0.14 0.06 12.23 1.08 0.7094 0.0000 1089217664 5984852694390760960 5984852694382630656 -0.01 0.06 2.65 0.93 0.9816 0.0005 1091193912 5942780839701934208 5942780844009984512 -0.16 0.13 1.32 0.56 0.6246 0.0084 1106379424 6561257337207141120 6561257337207686912 0.07 0.06 nan 1.05 0.8844 nan 1141794848 5932241509752072576 5932241509774006656 0.07 0.05 12.80 0.90 0.9015 0.0000 1142837608 5832158120199627392 5832158120187577344 0.05 0.03 nan 0.82 0.9498 nan 1186733048 5911787191757998208 5911787191762479872 0.00 0.05 nan 1.06 0.9865 nan 1201407648 4652097129542392448 4652097125197823232 0.20 0.57 0.23 0.29 0.5154 0.3239
observed_disagree = data_intersection[~agree & observed][[
'agasc_id',
'gaia_id_direct',
'gaia_id_indirect',
'p_value_direct',
'p_value_indirect',
'd_mag_direct',
'd2d_direct',
'd_mag_indirect',
'd2d_indirect'
]]
np.random.seed(1186733048)
# generate_reports = True
if generate_reports:
print(f"Making report with {len(observed_disagree)} stars")
description = """
<p>This list includes stars that have been observed, and the direct and indirect methods disagree. </p>
<p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
<ol>
<li> It does not take magnitude into account. </li>
<li> Some stars with proper motion seem to be wrongly matched. </li>
</ol>
<p>
The purpose of this report is to check whether outliers can be caused by misidentification.
When looking at the report for a given outlier, consider that if the true match is a star with no
proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
</p>
<p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>
<h2> Notable examples:</h2>
<ul>
<li>
<a href="report_36444624.html"> 36444624 </a>. The indirect method does not use magnitude as a criterion, and matches a star with the wrong magnitude. The correct match is actually closer if one considers proper motion.
</li>
<li>
<a href="report_52053176.html"> 52053176 </a>. Maybe the indirect method did not consider proper motion?
</li>
<li>
<a href="report_170537688.html"> 170537688 </a>. This one should be a blend of two stars. The direct method chooses one, and the indirect method the other. Still, the indirect method does not choose the closest one.
</li>
<li>
<a href="report_176951584.html"> 176951584 </a>. The indirect method gives the star with the wrong magnitude even though the right one is closer.
</li>
</ul>
<h2> Stars </h2>
"""
reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/observed-disagree"
star_report.make_report_list(
data=observed_disagree,
path=reports_dir,
title='Observed Stars with Different direct/indirect matches',
description=description,
alternates=['direct', 'indirect'],
overwrite=True
)
Candidates¶
candidates_disagree = data_intersection[~agree & (data_intersection['guide'] | data_intersection['acq'])]
candidates_disagree.sort('mag_aca')
# candidates.pprint(max_width=-1, max_lines=-1)
candidates_disagree = candidates_disagree[[
'agasc_id',
'gaia_id_direct',
'gaia_id_indirect',
'p_value_direct',
'p_value_indirect',
'd_mag_direct',
'd2d_direct',
'd_mag_indirect',
'd2d_indirect'
]][:100]
np.random.seed(1186733048)
# generate_reports = True
if generate_reports:
print(f"Making report with {len(candidates_disagree)} stars")
description = """
<p>This list includes stars that are candidates, and the direct and indirect methods disagree. </p>
<p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
<ol>
<li> It does not take magnitude into account. </li>
<li> Some stars with proper motion seem to be wrongly matched. </li>
</ol>
<p>
The purpose of this report is to check whether outliers can be caused by misidentification.
When looking at the report for a given outlier, consider that if the true match is a star with no
proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
</p>
<p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>
<h2> Notable examples:</h2>
<ul>
</ul>
<h2> Stars </h2>
"""
reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/candidates-disagree"
star_report.make_report_list(
data=candidates_disagree,
path=reports_dir,
title='Candidate Guide/Acq Stars with Different direct/indirect matches',
description=description,
alternates=['direct', 'indirect'],
overwrite=True
)
stars with d2d > 3 arcsec¶
disagree = data_intersection[~agree]
disagree[disagree['d2d'] > 3][['AGASC_ID', 'gaia_id_direct', 'gaia_id_indirect', 'd2d_direct', 'd2d_indirect']]
AGASC_ID | gaia_id_direct | gaia_id_indirect | d2d_direct | d2d_indirect |
---|---|---|---|---|
arcsec | arcsec | |||
int32 | int64 | int64 | float64 | float64 |
1185880 | 2555582442510174208 | 2555582438215436672 | 0.95 | 2.87 |
10881736 | 3230906126423182336 | 3230906126423182592 | 0.56 | 3.38 |
13770888 | 3235885299189856384 | 3235885402267895936 | 1.19 | 3.21 |
15990856 | 3236565892590502656 | 3236565892595740800 | 1.30 | 1.76 |
16255456 | 3320471072312202880 | 3320471072312203136 | 2.38 | 1.10 |
20201336 | 3131384170742362112 | 3131384170736025344 | 0.19 | 3.66 |
21893408 | 3115206132743613824 | 3115206132738760448 | 0.82 | 2.66 |
26616568 | 3094626126928169600 | 3094626126928170240 | 1.45 | 1.83 |
32637200 | 3856621907273557376 | 3856621907273557504 | 1.22 | 2.28 |
... | ... | ... | ... | ... |
1181491160 | 5849591907847389440 | 5849591907847641984 | 0.46 | 4.05 |
1182531872 | 5875721183218108160 | 5875721183218108672 | 1.31 | 2.17 |
1183204944 | 5832534702911047040 | 5832534702911047680 | 1.06 | 3.69 |
1183975024 | 5824353752266031232 | 5824353752266031488 | 1.38 | 1.99 |
1183980544 | 5824702537975197440 | 5824702537975197184 | 0.86 | 3.65 |
1207468040 | 5238263820712678144 | 5238264022569299712 | 0.67 | 2.98 |
1209151256 | 5235047508682934144 | 5235047474323195904 | 1.17 | 3.21 |
1210454568 | 5842593653714409984 | 5842593653714410368 | 1.14 | 1.97 |
1214128920 | 5799016331422764288 | 5799016331422763264 | 0.17 | 3.15 |
1214648312 | 5795492357996370048 | 5795492362281899520 | 0.22 | 3.38 |
agasc_ids = np.unique(np.concatenate(
[
[
1185232,
1185880,
2754344,
2892808,
7609200,
8657120,
10881736,
11274712,
11802440,
1213475776,
1213613240,
1214128920,
1214384968,
1214519736,
1214648312,
1214664424,
1248595144,
],
disagree[disagree['d2d'] > 3]['AGASC_ID'][:40]
]
))
large_d2d = disagree[np.in1d(disagree['agasc_id'], agasc_ids)][[
'agasc_id',
'gaia_id_direct',
'gaia_id_indirect',
'd_mag_direct',
'd2d_direct',
'd_mag_indirect',
'd2d_indirect',
'p_value_direct',
'p_value_indirect',
]]
np.random.seed(1248595144)
# generate_reports = True
if generate_reports:
print(f"Making report with {len(large_d2d)} stars")
description = """
<p>This list includes stars that have been observed, and the direct and indirect methods disagree. </p>
<p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
<ol>
<li> It does not take magnitude into account. </li>
<li> Some stars with proper motion seem to be wrongly matched. </li>
</ol>
<p>Anyway, these are all faint stars.</p>
<p>
The purpose of this report is to check whether outliers can be caused by misidentification.
When looking at the report for a given outlier, consider that if the true match is a star with no
proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
</p>
<p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>
<h2> Notable examples:</h2>
<ul>
<li>
<a href="report_1185880.html"> 1185880 </a>.
direct method gives a star that is closer in magnitude and angular separation
no idea why the indirect method gives another
</li>
<li>
<a href="report_10881736.html"> 10881736 </a>.
two stars close in magnitude with the AGASC star right in between (AGASC was not resolved)
the direct method gives a star that is MUCH closer in magnitude and angular separation
</li>
<li>
<a href="report_1214128920.html"> 1214128920 </a>.
the direct method gives a star that is MUCH closer in magnitude and angular separation
</li>
<li>
<a href="report_1214648312.html"> 1214648312 </a>.
the direct method gives a star that is MUCH closer in magnitude and angular separation
still, this is a faint star, so irrelevant for us
</li>
</ul>
</ul>
<h2> Stars </h2>
"""
reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/large_d2d"
star_report.make_report_list(
data=large_d2d,
path=reports_dir,
title='Distance between direct and indirect matches > 3 arcsec',
description=description,
alternates=['direct', 'indirect'],
overwrite=True
)