Source code for agasc_gaia.datasets

import os
import re
import tables
import functools
import requests
import zipfile
import numpy as np
import networkx as nx

from bs4 import BeautifulSoup
from tqdm import tqdm
from pathlib import Path

from astroquery.vizier import Vizier
from astropy.table import Table, join, unique, vstack, hstack, MaskedColumn
from astropy import units as u
from astropy.coordinates import SkyCoord

from agasc import get_supplement_table
from Quaternion import Quat

from agasc_gaia import utils


Vizier.ROW_LIMIT = -1


# @functools.cache
[docs]@utils.cached(name="tycho2") def get_tycho(): """ Loads the Tycho catalog. """ print("Getting Tycho2") cols = [ "RA(ICRS)", "DE(ICRS)", "RAmdeg", "DEmdeg", "pmRA", "pmDE", "EpRAm", "EpDEm", "BTmag", "VTmag", "EpRA-1990", "EpDE-1990", "TYC1", "TYC2", "TYC3", ] vizier = Vizier(catalog="I/259", columns=cols, row_limit=-1) tycho = vizier.query_constraints()[0] # tycho = vizier.query_constraints(VTmag='0.0..4.0') return tycho
# @functools.cache
[docs]@utils.cached(name="tdsc") def get_tdsc(): """ Loads the TDSC catalog. """ print("Getting TDSC") cols = [ "TDSC", "RAJ2000", "DEJ2000", "pmRA", "pmDE", "EpRA", "EpDE", "BTmag", "VTmag", "Nmain", "Nsup", "TYC1", "TYC2", "TYC3", # "WDS", "Rcmp", "PA", "Sep" ] vizier = Vizier(catalog="I/276", columns=cols, row_limit=-1) tdsc = vizier.query_constraints()[0] return tdsc
[docs]@functools.cache @utils.cached(name="gsc23") def get_gsc23(): """ Loads the GSC 2.3 catalog. """ print("Getting GSC 2.3") mags = np.linspace(5, 15, 101) constraints = [] constraints += [{"Vmag": "<5.0"}] constraints += [ {"Vmag": f"{v1:.1f}..{v2:.1f}"} for v1, v2 in zip(mags[:-1], mags[1:]) ] queries = [] vizier = Vizier( catalog="I/305", columns=["GSC2.3", "GSC1", "Vmag", "RAJ2000", "DEJ2000", "Epoch"], row_limit=-1, ) for constraint in tqdm(constraints): queries.append(vizier.query_constraints(**constraint)[0]) gsc_23 = vstack(queries, metadata_conflicts="silent") gsc_23 = unique(gsc_23, keys=["GSC1", "GSC2.3"]) gsc_23.rename_columns( ["RAJ2000", "DEJ2000", "Epoch", "Vmag"], ["RA_GSC23", "DEC_GSC23", "Epoch_GSC23", "Vmag_GSC23"], ) return gsc_23
[docs]@functools.cache @utils.cached(name="gsc1") def get_gsc11(download=False): """ Loads the GSC 1.1 catalog. """ print("Getting GSC 1.1") if not download: raise Exception( "GSC 1.1 file is not present." "Set download=True to download it." "It might be a better idea to place it in the data directory by hand." ) url = "http://gsc.dc3.com/IndividualParts/gsc" r = requests.get(url) soup = BeautifulSoup(r.content.decode(), "html.parser") files = sorted( [ a.attrs["href"] for a in soup.find_all("a") if re.match("[NS][0-9]{4}.zip", a.attrs["href"]) ] ) root = Path("data/GSC_1.1") root.mkdir(parents=True, exist_ok=True) table_list = [] for file in tqdm(files): r = requests.get(f"{url}/{file}") with open(root / file, "wb") as fh: fh.write(r.content) zf = zipfile.ZipFile(root / file) zf.extractall(root / "gsc") for f in zf.namelist(): if not f.endswith(".gsc.gz"): continue t = Table.read(root / "gsc" / f) t["GSC_REGION"] = int(f[1:5]) del t.meta["EXTNAME"] table_list.append(t) gsc_11 = vstack(table_list) gsc_11.rename_columns( ["GSC_ID", "GSC_REGION", "RA_DEG", "DEC_DEG"], ["gsc1_star", "gsc1_region", "RA_GSC11", "DEC_GSC11"], ) return gsc_11
[docs]@functools.cache def get_plain_agasc(): """Loads the AGASC catalog and adds Tycho IDs.""" agasc = Table.read( Path(os.environ["SKA"], "data", "agasc", "agasc1p7.h5"), path="/data" ) has_tyc = agasc["XREF_ID3"] != -9999 agasc = agasc[has_tyc] agasc["tyc2"] = agasc["XREF_ID3"] agasc["tyc3"] = agasc["XREF_ID4"] agasc["tyc1"] = ( agasc["AGASC_ID"] - agasc["tyc2"] * 8 - (agasc["tyc3"] - 1) ) / 131072 agasc = agasc[ [ "AGASC_ID", "tyc1", "tyc2", "tyc3", "RA", "DEC", "EPOCH", "PM_RA", "PM_DEC", "MAG", "CLASS", ] ] agasc.rename_columns( *zip(*[(c, c.lower()) for c in agasc.colnames if c != c.lower()]) ) return agasc
[docs]def tycho_id(t1, t2, t3): """Fast function to assemble Tycho ID strings.""" f = np.frompyfunc("{:d}-{}-{}".format, 3, 1) return f(t1.astype(int), t2.astype(int), t3.astype(int)).astype(str)
[docs]def tycho2_tdsc_separation_prob(separation): """Separation probability used to match Tycho2 and TDSC.""" sigma_sep_2 = 0.75 sigma_sep_1 = 0.15 return 0.6 * np.exp(-(separation**2) / (2 * sigma_sep_1**2)) / ( sigma_sep_1 * np.sqrt(2 * np.pi) ) + 0.4 * np.exp(-(separation**2) / (2 * sigma_sep_2**2)) / ( sigma_sep_2 * np.sqrt(2 * np.pi) )
[docs]def tycho2_tdsc_mag_diff_prob(mag_diff): """Magnitude difference probability used to match Tycho2 and TDSC.""" sigma_mag = 0.25 return np.exp(-(mag_diff**2) / (2 * sigma_mag**2)) / np.sqrt( 2 * np.pi * sigma_mag )
[docs]def tycho2_tdsc_get_best_matches(matches): """ Given a list of matches (TYC3 values from Tycho2 and TDSC), return the best matches. The goodness of the match is determined by the xmatch_weight column. The algorithm is the following: - sort the matches by xmatch_weight in reverse order - iterate over matches, adding pairs that have not been added yet. """ matches = matches[ ["tyc3_tycho", "tyc3_tdsc", "separation", "mag_diff", "xmatch_weight"] ] i_sorted = np.argsort( matches[ [ "xmatch_weight", ] ] )[::-1] # i_sorted = np.argsort(matches[['separation', 'mag_diff',]]) result = {} i_result = [] for i in i_sorted: row = matches[i] if ( row["tyc3_tycho"] not in result.keys() and row["tyc3_tdsc"] not in result.values() ): result[row["tyc3_tycho"]] = row["tyc3_tdsc"] i_result.append(i) # return matches[indices] return np.array(i_result)
[docs]@utils.cached(name="tycho_tdsc") def get_tycho2_tdsc(): """Cross-match the positions of Tycho2 and TDSC and return a union table. The union table includes a "tdsc_id", which is the ID in TDSC. This is not always the same as the Tycho2 ID (hence this function). """ print("Getting Tycho2-TDSC (cross-matched)") # Plain Tycho2 tycho = get_tycho() tycho.rename_columns(["pmRA", "pmDE", "VTmag"], ["PM_RA", "PM_DEC", "MAG"]) tycho.rename_columns( *zip(*[(c, c.lower()) for c in tycho.colnames if c != c.lower()]) ) no_ra = tycho["ramdeg"].mask tycho["has_pm_tycho"] = ~no_ra # We are not using Tycho2's RAmdeg/DEmdeg, but instead the RAdeg/DEdeg (ICRS RA/DEC) # That is because ICRS RA/DEC are not modified by proper motion # (which we want to update from Gaia) tycho["epoch"] = 1990 + (tycho["epra-1990"] + tycho["epde-1990"]) / 2 tycho["ra"] = tycho["ra_icrs_"] tycho["dec"] = tycho["de_icrs_"] # TDSC tdsc = get_tdsc() tdsc.rename_columns( ["RAJ2000", "DEJ2000", "pmRA", "pmDE", "TYC1", "TYC2", "TYC3", "VTmag"], ["ra", "dec", "pm_ra", "pm_dec", "tyc1", "tyc2", "tyc3", "mag"], ) tdsc.add_index(["tyc1", "tyc2", "tyc3"]) tdsc["epoch"] = (tdsc["EpRA"] + tdsc["EpDE"]) / 2 # Join Tycho2 and TDSC # TYC3 ID is changed between Tycho2 and TDSC, but TYC1 and TYC2 are the same, # so we can join on those. tycho_tdsc_join = join( tycho[ [ "ra", "dec", "ra_icrs_", "de_icrs_", "tyc1", "tyc2", "tyc3", "mag", "pm_ra", "pm_dec", "epoch", "has_pm_tycho", "epram", "epdem", ] ], tdsc[ "ra", "dec", "tyc1", "tyc2", "tyc3", "mag", "pm_ra", "pm_dec", "epoch", "EpRA", "EpDE", ], keys=["tyc1", "tyc2"], join_type="inner", table_names=["tycho", "tdsc"], metadata_conflicts="silent", ) # BUG in astropy # this is to turn pm_ra_tdsc and pm_dec_tdsc into columns without mask # units fail with the masked column has_pm = ~tycho_tdsc_join["pm_ra_tdsc"].mask & ~tycho_tdsc_join["pm_dec_tdsc"].mask tycho_tdsc_join["has_pm_tdsc"] = has_pm tycho_tdsc_join["pm_ra_tdsc"][~has_pm] = 0 tycho_tdsc_join["pm_dec_tdsc"][~has_pm] = 0 tycho_tdsc_join["pm_dec_tdsc"] = tycho_tdsc_join["pm_dec_tdsc"].data.data tycho_tdsc_join["pm_ra_tdsc"] = tycho_tdsc_join["pm_ra_tdsc"].data.data # this is to turn pm_ra_tycho and pm_dec_tycho into columns without mask # units fail with the masked column (I think this ia a BUG in astropy) has_pm = ( ~tycho_tdsc_join["pm_ra_tycho"].mask & ~tycho_tdsc_join["pm_dec_tycho"].mask ) tycho_tdsc_join["has_pm_tycho"] = has_pm tycho_tdsc_join["pm_ra_tycho"][~has_pm] = 0 tycho_tdsc_join["pm_dec_tycho"][~has_pm] = 0 tycho_tdsc_join["pm_dec_tycho"] = tycho_tdsc_join["pm_dec_tycho"].data.data tycho_tdsc_join["pm_ra_tycho"] = tycho_tdsc_join["pm_ra_tycho"].data.data # otherwise, THE FOLLOWING GETS THE WRONG UNITS and causes SkyCoord to fail # dec = ( # tycho_tdsc['dec_tdsc'] + (tycho_tdsc['pm_dec_tdsc'] \ # * (2000. - tycho_tdsc['epoch_tdsc'])) / 1000 / 3600 # ) # add angular separation and magnitude difference between Tycho and TDSC tycho_tdsc_join["coord_tdsc"] = SkyCoord( ra=tycho_tdsc_join["ra_tdsc"] + (tycho_tdsc_join["pm_ra_tdsc"] * (2000.0 - tycho_tdsc_join["epoch_tdsc"])) / 1000 / 3600 / np.cos(np.deg2rad(tycho_tdsc_join["dec_tdsc"])), dec=tycho_tdsc_join["dec_tdsc"] + (tycho_tdsc_join["pm_dec_tdsc"] * (2000.0 - tycho_tdsc_join["epoch_tdsc"])) / 1000 / 3600, unit="deg", ) tycho_tdsc_join["coord_tycho"] = SkyCoord( ra=tycho_tdsc_join["ra_tycho"] + (tycho_tdsc_join["pm_ra_tycho"] * (2000.0 - tycho_tdsc_join["epoch_tycho"])) / 1000 / 3600 / np.cos(np.deg2rad(tycho_tdsc_join["dec_tycho"])), dec=tycho_tdsc_join["dec_tycho"] + (tycho_tdsc_join["pm_dec_tycho"] * (2000.0 - tycho_tdsc_join["epoch_tycho"])) / 1000 / 3600, unit="deg", ) tycho_tdsc_join["separation"] = ( tycho_tdsc_join["coord_tdsc"] .separation(tycho_tdsc_join["coord_tycho"]) .to("arcsec") ) tycho_tdsc_join["mag_diff"] = np.abs( tycho_tdsc_join["mag_tdsc"] - tycho_tdsc_join["mag_tycho"] ) tycho_tdsc_join["xmatch_weight"] = tycho2_tdsc_mag_diff_prob( tycho_tdsc_join["mag_diff"] ) * tycho2_tdsc_separation_prob(tycho_tdsc_join["separation"]) tycho_tdsc_join_groups = tycho_tdsc_join.group_by(["tyc1", "tyc2"]) arr = tycho_tdsc_join_groups[ ["tyc3_tycho", "tyc3_tdsc", "separation", "mag_diff", "xmatch_weight"] ].as_array() i_result = np.concatenate( [ tycho2_tdsc_get_best_matches(arr[i1:i2]) + i1 for i1, i2 in zip( tycho_tdsc_join_groups.groups.indices[:-1], tycho_tdsc_join_groups.groups.indices[1:], ) ] ) # This is the intersection of Tycho2 and TDSC with columns tyc3_tycho and tyc3_tdsc tycho_tdsc = tycho_tdsc_join_groups[i_result] # in these two cases, there is no row tyc3_tdsc==1, as if it is missing # after the cross-match this leads to large angular and magnitude differences # so I think there is something wrong somewhere, and I will not use them. tdsc_bad = (tdsc["tyc1"] == 2673) & (tdsc["tyc2"] == 3845) tdsc_bad |= (tdsc["tyc1"] == 6461) & (tdsc["tyc2"] == 1120) tycho_tdsc_bad = (tycho_tdsc["tyc1"] == 2673) & (tycho_tdsc["tyc2"] == 3845) tycho_tdsc_bad |= (tycho_tdsc["tyc1"] == 6461) & (tycho_tdsc["tyc2"] == 1120) # This is the complement of Tycho2 (stars in Tycho and not in TDSC) # BUG in astropy # if I do not do .copy(), I get this exception after 4 minutes: # NotImplementedError: join requires masking column 'tyc3_agasc' but column \ # type MaskedColumn does not support masking tycho_tdsc_complement = join( tycho[["tyc1", "tyc2", "tyc3", "ra", "dec", "epoch"]], tdsc[["tyc1", "tyc2", "tyc3"]][~tdsc_bad].copy(), keys=["tyc1", "tyc2"], join_type="left", table_names=["tycho", "tdsc"], metadata_conflicts="silent", ) tycho_tdsc_complement.rename_columns( ["ra", "dec", "epoch"], ["ra_tycho", "dec_tycho", "epoch_tycho"] ) tycho_tdsc_complement = tycho_tdsc_complement[ tycho_tdsc_complement["tyc3_tdsc"].mask ] tycho_tdsc_complement["separation"] = 0 tycho_tdsc_complement["mag_diff"] = 0 # and this is the union of Tycho2 and TDSC cols = [ "tyc1", "tyc2", "tyc3_tycho", "tyc3_tdsc", "separation", "mag_diff", "ra_tycho", "dec_tycho", "epoch_tycho", ] tycho_tdsc_union = vstack( [tycho_tdsc_complement[cols], tycho_tdsc[cols][~tycho_tdsc_bad].copy()] ) tycho_tdsc_union["in_tdsc"] = ~tycho_tdsc_union["tyc3_tdsc"].mask tycho_tdsc_union["tyc3_tdsc"][~tycho_tdsc_union["in_tdsc"]] = tycho_tdsc_union[ ~tycho_tdsc_union["in_tdsc"] ]["tyc3_tycho"] tycho_tdsc_union["tycho_id"] = tycho_id( tycho_tdsc_union["tyc1"].astype(int), tycho_tdsc_union["tyc2"], tycho_tdsc_union["tyc3_tycho"], ) tycho_tdsc_union["tdsc_id"] = tycho_id( tycho_tdsc_union["tyc1"].astype(int), tycho_tdsc_union["tyc2"], tycho_tdsc_union["tyc3_tdsc"], ) tycho_tdsc_union.rename_columns( ["tyc3_tycho", "separation", "mag_diff"], ["tyc3", "separation_tdsc", "mag_diff_tdsc"], ) tycho_tdsc_bad = (tycho_tdsc_union["tyc1"] == 2673) & ( tycho_tdsc_union["tyc2"] == 3845 ) tycho_tdsc_bad |= (tycho_tdsc_union["tyc1"] == 6461) & ( tycho_tdsc_union["tyc2"] == 1120 ) tycho_tdsc_union = tycho_tdsc_union[~tycho_tdsc_bad] tycho_tdsc_union.sort(["tyc1", "tyc2", "tyc3"]) return tycho_tdsc_union
[docs]@utils.cached(name="agasc_summary") def get_agasc_summary(file_in="agasc1p7.h5"): """ Loads the AGASC and the Tycho2 and GSC 2.3 catalogs and joins them, using GSC 1.1 IDs as intermediates. The final result is sorted by agasc_id. This is convenient for later use, so one does not have to join this table with others, but one can use np.searchsorted, which is faster. This function does the following: - determine the Tycho2, Tycho2-TDSC, and GSC1.1 IDs (Tycho2 is cross-matched with TDSC so we have the Tycho2-TDSC IDs) - add a random index to each AGASC entry. This is useful for cross-validation later on. - add a column 'R' with the estimated angular distance moved since its epoch to the Gaia reference epoch - add the GSC2.3 ID using the vivzier 2.3 catalog - add observed magnitudes from AGASC supplement - add boolean flags to denote stars that are guide/acq candidates - remove a duplicated entry in the AGASC catalog - For tycho stars, take the original Tycho RA/dec instead of the ones at 2000 epoch - sort the final result by agasc_id """ np.random.seed(10001) # make sure the random shuffle is always the same gsc_23 = get_gsc23() print("got GSC 2.3") # gsc_11 = get_gsc11() # print('got GSC 1.1') with tables.open_file(str(Path(os.environ["SKA"], "data", "agasc", file_in))) as h5: agasc = Table(h5.root.data[:]) # AGASC ID 154534513 is repeated in the AGASC catalogue. agasc = agasc[~((agasc["AGASC_ID"] == 154534513) & (agasc["MAG_CATID"] == 1))] # add a random index for easy cross-validation random = agasc[["AGASC_ID"]].copy() random.sort("AGASC_ID") i = np.arange(len(agasc)) np.random.shuffle(i) random["random_index"] = i agasc = join(agasc, random, keys="AGASC_ID") assert np.all(agasc["AGASC_ID"] == agasc["AGASC_ID"]) print("added random index") agasc["TYC1"] = np.ma.masked_all(len(agasc), dtype=int) agasc["TYC2"] = np.ma.masked_all(len(agasc), dtype=int) agasc["TYC3"] = np.ma.masked_all(len(agasc), dtype=int) agasc["gsc1_region"] = MaskedColumn(length=len(agasc), dtype=int) agasc["gsc1_star"] = MaskedColumn(length=len(agasc), dtype=int) agasc["GSC1"] = MaskedColumn(length=len(agasc), dtype="<U10") agasc["tycho_id"] = np.ma.masked_all(len(agasc), dtype="<U12") # the mask will be set later agasc["gsc1_region"] = agasc["gsc1_region"].fill_value agasc["gsc1_star"] = agasc["gsc1_star"].fill_value agasc["GSC1"] = agasc["GSC1"].fill_value has_gsc = agasc["XREF_ID1"] != -9999 has_tyc = agasc["XREF_ID3"] != -9999 # these all have (agasc['XREF_ID4'] != -9999) # has_2mass = agasc['XREF_ID2'] != -9999 gsc_id = np.frompyfunc("{:04d}-{:05d}".format, 2, 1) agasc["gsc1_star"][has_gsc] = agasc["XREF_ID1"][has_gsc] agasc["gsc1_region"][has_gsc] = ( (agasc["AGASC_ID"][has_gsc] - 8 * agasc["XREF_ID1"][has_gsc]) / 131072 ).astype(int) agasc["GSC1"][has_gsc] = gsc_id( agasc["gsc1_region"][has_gsc], agasc["gsc1_star"][has_gsc] ) # print('adding GSC1 coords', len(agasc), len(gsc_11)) # gsc1_ra_dec = join( # agasc[['gsc1_star', 'gsc1_region']], # gsc_11[['gsc1_star', 'gsc1_region', 'RA_GSC11', 'DEC_GSC11']], # keys=['gsc1_star', 'gsc1_region'], # join_type='left' # ) # print('added GSC1 coords', len(agasc), len(gsc1_ra_dec)) # assert len(gsc1_ra_dec) == len(agasc) # gcs1_coords = SkyCoord(ra=gsc1_ra_dec['RA_GSC11'], dec=gsc1_ra_dec['DEC_GSC11'], unit="deg") # agasc_coords = SkyCoord(ra=agasc['RA'], dec=agasc['DEC'], unit="deg") # assert len(gcs1_coords) == len(agasc_coords) # gsc1_separation = agasc_coords.separation(gcs1_coords).arcsecond # agasc['GSC1'][(gsc1_separation > 1)] = agasc['GSC1'].fill_value # the GSC1 mask is set later agasc["gsc1_star"].mask = agasc["gsc1_star"] == agasc["gsc1_star"].fill_value agasc["gsc1_region"].mask = agasc["gsc1_region"] == agasc["gsc1_region"].fill_value print("GSC1.1 ID done") agasc["TYC2"][has_tyc] = agasc["XREF_ID3"][has_tyc] agasc["TYC3"][has_tyc] = agasc["XREF_ID4"][has_tyc] agasc["TYC1"][has_tyc] = ( agasc["AGASC_ID"][has_tyc] - agasc["TYC2"][has_tyc] * 8 - (agasc["TYC3"][has_tyc] - 1) ) / 131072 tycho_id = np.frompyfunc("{}-{}-{}".format, 3, 1) agasc["tycho_id"][has_tyc] = tycho_id( agasc["TYC1"][has_tyc], agasc["TYC2"][has_tyc], agasc["TYC3"][has_tyc] ) print("Tycho ID done") # add GSC2.3 ID agasc = join( agasc, gsc_23[ ["GSC1", "GSC2.3", "RA_GSC23", "DEC_GSC23", "Epoch_GSC23", "Vmag_GSC23"] ][~gsc_23["GSC1"].mask], keys=["GSC1"], join_type="left", ) agasc["GSC2.3"].mask = agasc["GSC2.3"] == agasc["GSC2.3"].fill_value agasc["GSC1"].mask = agasc["GSC1"] == agasc["GSC1"].fill_value print("GSC2.3 ID done") agasc_gsc = agasc[~agasc["GSC2.3"].mask] gcs2_coords = SkyCoord( ra=agasc_gsc["RA_GSC23"], dec=agasc_gsc["DEC_GSC23"], unit="deg" ) agasc_coords = SkyCoord( ra=agasc_gsc["RA"].data, dec=agasc_gsc["DEC"].data, unit="deg" ) separation = np.ma.masked_all(len(agasc)) separation[~agasc["GSC2.3"].mask] = agasc_coords.separation(gcs2_coords).arcsecond agasc["separation_gsc2"] = separation agasc["mag_diff_gsc2"] = agasc["Vmag_GSC23"] - agasc["MAG"] print("GSC2.3 separation done") agasc.rename_columns( *zip(*[(c, c.lower()) for c in agasc.colnames if c != c.lower()]) ) # Add observed magnitudes mags = get_supplement_table("mags").copy() mags.rename_columns(["mag_aca", "mag_aca_err"], ["mag_aca_obs", "mag_aca_err_obs"]) agasc = join( agasc, mags[["agasc_id", "mag_aca_obs", "mag_aca_err_obs"]], keys="agasc_id", join_type="left", ) print("added observed mags") agasc["guide"] = ( (agasc["class"] == 0) & (agasc["mag_aca"] > 5.2) & (agasc["mag_aca"] < 10.3) & (agasc["aspq1"] < 20) & (agasc["aspq2"] == 0) # & (agasc["POS_ERR"] < 1250) & ((agasc["var"] == -9999) | (agasc["var"] == 5)) ) agasc["acq"] = ( (agasc["class"] == 0) & (agasc["mag_aca"] > 5.3) & (agasc["mag_aca"] < 10.3) & (agasc["aspq1"] < 40) & (agasc["aspq2"] == 0) # & (agasc["POS_ERR"] < 3000) & ((agasc["var"] == -9999) | (agasc["var"] == 5)) ) # add Tycho-2 TDSC information (this is where Tycho2 and TDSC are cross-matched) tycho_tdsc = get_tycho2_tdsc() # BUG in astropy: join does not accept masked columns in the join columns # another BUG: while trying to deal with the above, I found yet another issue when trying yo do # the following # agasc['tyc1'][agasc['tyc1'].mask] = agasc['tyc1'].fill_value # agasc['tyc2'][agasc['tyc2'].mask] = agasc['tyc2'].fill_value # agasc['tyc3'][agasc['tyc3'].mask] = agasc['tyc3'].fill_value # so I did this instead: agasc["a"] = ( np.ones(len(agasc), dtype=agasc["tyc1"].dtype) * agasc["tyc1"].fill_value ) agasc["a"][~agasc["tyc1"].mask] = agasc["tyc1"][~agasc["tyc1"].mask] agasc["b"] = ( np.ones(len(agasc), dtype=agasc["tyc2"].dtype) * agasc["tyc2"].fill_value ) agasc["b"][~agasc["tyc2"].mask] = agasc["tyc2"][~agasc["tyc2"].mask] agasc["c"] = ( np.ones(len(agasc), dtype=agasc["tyc3"].dtype) * agasc["tyc3"].fill_value ) agasc["c"][~agasc["tyc3"].mask] = agasc["tyc3"][~agasc["tyc3"].mask] agasc.remove_columns(["tyc1", "tyc2", "tyc3"]) agasc.rename_columns(["a", "b", "c"], ["tyc1", "tyc2", "tyc3"]) agasc = join( agasc, tycho_tdsc, keys=["tyc1", "tyc2", "tyc3"], join_type="left", metadata_conflicts="silent", ) agasc["tyc1"] = MaskedColumn( data=agasc["tyc1"], mask=agasc["tyc1"].fill_value == agasc["tyc1"] ) agasc["tyc2"] = MaskedColumn( data=agasc["tyc2"], mask=agasc["tyc2"].fill_value == agasc["tyc2"] ) agasc["tyc3"] = MaskedColumn( data=agasc["tyc3"], mask=agasc["tyc3"].fill_value == agasc["tyc3"] ) has_tyc = ~agasc["tyc3_tdsc"].mask agasc["tycho_tdsc_id"] = np.ma.masked_all(len(agasc), dtype="<U12") agasc["tycho_tdsc_id"][has_tyc] = tycho_id( agasc["tyc1"][has_tyc], agasc["tyc2"][has_tyc], agasc["tyc3_tdsc"][has_tyc] ) print("Tycho-TDSC ID done") assert np.unique(agasc["agasc_id"]).shape[0] == len(agasc) agasc.remove_column("tycho_id_1") agasc.rename_column("tycho_id_2", "tycho_id") # For tycho stars, take the original Tycho RA/dec sel = agasc["tycho_id"].mask agasc["ra"] = np.where(sel, agasc["ra"], agasc["ra_tycho"]) agasc["dec"] = np.where(sel, agasc["dec"], agasc["dec_tycho"]) agasc["epoch"] = np.where(sel, agasc["epoch"], agasc["epoch_tycho"]) # the following calculates a "conservative" proper motion value for the purposes of the # cone search in Gaia (if both PM_RA and PM_DEC are missing, they are set to zero, # if only one is missing it is set to the value of the other) pm_dec = agasc["pm_dec"].astype(dtype=np.int64) pm_ra = agasc["pm_ra"].astype(np.int64) k = (pm_dec == -9999) & (pm_ra != -9999) pm_dec[k] = pm_ra[k] k = (pm_dec != -9999) & (pm_ra == -9999) pm_ra[k] = pm_dec[k] k = (pm_dec == -9999) & (pm_ra == -9999) pm_ra[k] = 0 pm_dec[k] = 0 pm = np.full(len(agasc), 0.0) nz = (np.abs(pm_ra) != 0) | (np.abs(pm_dec) != 0) pm[nz] = np.sqrt(pm_ra[nz] ** 2 + pm_dec[nz] ** 2) agasc["pm"] = pm # The reference epoch for Gaia DR3 (both Gaia EDR3 and the full Gaia DR3) is J2016.0. agasc["r"] = agasc["pm"] * (2016.0 - agasc["epoch"]) # arguably, this is inefficient, but downstream I often join with this table, and that takes # time. If I sort it here, I can just do np.searchsorted agasc.convert_bytestring_to_unicode() # bug in astropy will drop mask unless string is unicode agasc.sort("agasc_id") agasc.add_index(["tyc1", "tyc2", "tyc3"]) agasc.add_index(["agasc_id"]) return agasc
[docs]@utils.cached(name="agasc_duplicates") def get_potential_duplicates(): """ Return a list of potential duplicate stars in AGASC. Two stars are considered potential duplicates if they are within 1 arcsec of each other and their positions are from different base catalogs. To decide which one of the two might be a duplicate, they are sorted by pos_catid using the following precedence order (from high to low): - 5: Tycho-2 - 6: GSC-ACT - 4: ACT - 3: Tycho-1 - 2: PPM - 1: GSC1.1 - 0: No catalog Two stars within 1 arcsec from the same catalog does occur, because there are binaries and such. But two stars within 1 arcsec and from different catalogs is unlikely unless there was an update of positions, and only one star of a multiple system was updated. Most probably, this coincidence happened when two datasets were merged without first removing cross-matches. Galaxies, non-stars and potential artifacts are excluded from this list. All the other classes are kept. Parameters ---------- agasc : Table The AGASC summary table. Returns ------- Table A table with potential duplicate pairs of stars. The second star in the pair corresponds to potential duplicates. The first star in the pair can be a duplicate but it not always is. """ agasc = get_agasc_summary() # exclude galaxies, non-stars and potential artifacts exclude = [1, 3, 5] agasc = agasc[~np.in1d(agasc['class'], exclude)] # this will be used to determine duplicates # (e.g.: a star with pos_precedence=5 that is within 1 arcsec of a star with pos_precedence=6 # is marked as a duplicate) pos_precedence = np.zeros(len(agasc)) pos_precedence[agasc["pos_catid"] == 0] = 0 pos_precedence[agasc["pos_catid"] == 1] = 1 pos_precedence[agasc["pos_catid"] == 2] = 2 pos_precedence[agasc["pos_catid"] == 3] = 3 pos_precedence[agasc["pos_catid"] == 4] = 4 pos_precedence[agasc["pos_catid"] == 5] = 6 pos_precedence[agasc["pos_catid"] == 6] = 5 agasc["pos_precedence"] = pos_precedence # mag precedence is not used to determine duplicates, but it can be useful to find issues # i.e.: what if a duplicate got introduced when updating magnitudes, as oppose to when merging # catalogs? mag_precedence = np.zeros(len(agasc)) mag_precedence[agasc["mag_catid"] == 0] = 0 mag_precedence[agasc["mag_catid"] == 1] = 1 mag_precedence[agasc["mag_catid"] == 2] = 2 mag_precedence[agasc["mag_catid"] == 3] = 3 mag_precedence[agasc["mag_catid"] == 4] = 4 mag_precedence[agasc["mag_catid"] == 5] = 6 mag_precedence[agasc["mag_catid"] == 6] = 5 agasc["mag_precedence"] = mag_precedence # do the cross-match of AGASC with itself cols = [ 'agasc_id', 'mag_aca', 'class', 'pos_catid', 'mag_catid', 'mag_precedence', 'pos_precedence' ] coord = SkyCoord(ra=agasc["ra"].data, dec=agasc["dec"].data, unit="deg") idxc, idxcatalog, d2d, _ = coord.search_around_sky(coord, 100 * u.arcsec) pairs = hstack([agasc[cols][idxc], agasc[cols][idxcatalog]]) pairs["idx_1"] = idxc pairs["idx_2"] = idxcatalog pairs["d2d"] = d2d.to(u.arcsec) pairs["d_mag"] = pairs["mag_aca_2"] - pairs["mag_aca_1"] pairs = pairs[(pairs["agasc_id_1"] != pairs["agasc_id_2"])] # remove self-matches duplicates = pairs[ (np.abs(pairs["d2d"]) < 1) & (pairs["pos_precedence_1"] > pairs["pos_precedence_2"]) ] # group all pairs by the connected components in the graph of pairs. # These groups are what are called "equivalence classes", with the following definition: # "a pair of stars are "equivalent" if d2d < 1 and pos_precedence differ". # We do this because it can happen that star A is within 1 arcsec of star B, and star B is # within 1 arcsec of star C, but star A is not within 1 arcsec of star C. We still want A, B and # C in the same "group" graph = nx.Graph() graph.add_nodes_from(duplicates['agasc_id_1']) graph.add_nodes_from(duplicates['agasc_id_2']) for pair in duplicates: graph.add_edge(pair['agasc_id_1'], pair['agasc_id_2'], d2d=pair['d2d'], d_mag=pair['d_mag']) groups = Table() groups['agasc_id'] = list(graph.nodes()) groups.sort('agasc_id') groups['group'] = -1 for i, comp in enumerate(nx.connected_components(graph)): idx = np.searchsorted(groups['agasc_id'], list(comp)) groups['group'][idx] = i idx = np.searchsorted(groups['agasc_id'], duplicates['agasc_id_1']) duplicates['group'] = groups['group'][idx] # all agasc_id_2 are duplicates by construction, but some agasc_id_1 are duplicates too # (just to keep in mind) duplicates['duplicate_2'] = True duplicates['duplicate_1'] = np.in1d(duplicates['agasc_id_1'], duplicates['agasc_id_2']) return duplicates
[docs]@utils.cached(name="agasc_summary_csv") def get_agasc_summary_csv(): """Return a subset of the agasc summary table, in CSV format to be uploaded to Gaia.""" agasc_all = get_agasc_summary() summary = agasc_all[ [ "agasc_id", "ra", "dec", "epoch", "pm", "pm_ra", "pm_dec", "gsc2.3", "tycho_tdsc_id", "random_index", ] ] summary.rename_columns(["gsc2.3", "tycho_tdsc_id"], ["gsc2_3", "tycho_id"]) return summary
[docs]@utils.cached(name="agasc_background_summary_csv") def get_background_agasc_summary_csv(): """Return a subset of the agasc summary table, with positions randomly shifted 1.5 degree, in CSV format to be uploaded to Gaia.""" summary = get_agasc_summary_csv() # the angle we want to rotate alpha = np.deg2rad(1.5) sin_alpha = np.sin(alpha / 2) cos_alpha = np.cos(alpha / 2) # draw random point in unit sphere (a random direction around which to rotate) u_dec = (np.arccos(np.random.uniform(-1, 1, len(summary))),) u_ra = (np.random.uniform(0, 2 * np.pi, len(summary)),) # quaternion rotation around direction (in cartesian coordinates) dq = Quat( q=np.vstack( [ sin_alpha * np.cos(u_dec) * np.cos(u_ra), sin_alpha * np.cos(u_dec) * np.sin(u_ra), sin_alpha * np.sin(u_dec), cos_alpha * np.ones(len(summary)), ] ).T ) # the original direction p = np.vstack( [ np.cos(np.deg2rad(summary["dec"])) * np.cos(np.deg2rad(summary["ra"])), np.cos(np.deg2rad(summary["dec"])) * np.sin(np.deg2rad(summary["ra"])), np.sin(np.deg2rad(summary["dec"])), ] ).T # the rotated direction p2 = np.einsum("ijk,ik->ij", dq.transform, p) ra = np.rad2deg(np.arctan2(p2[:, 1], p2[:, 0])) % 360 dec = np.rad2deg(np.arcsin(p2[:, 2])) # assert np.all((ra <= 360) & (ra >= 0)) # assert np.all((dec <= 90) & (dec > -90)) summary["ra"] = ra summary["dec"] = dec return summary
@utils.cached(name="gaia_min_bias_train_test") def get_gaia_min_bias_train_test(): gaia_min_bias = Table.read("data/gaia_min_bias-result.fits.gz") gaia_min_bias.rename_columns( ["phot_g_mean_mag", "phot_bp_mean_mag", "phot_rp_mean_mag"], ["g_mag", "bp_mag", "rp_mag"], ) gaia_min_bias["has_mag"] = ( 1 * (~gaia_min_bias["g_mag"].mask) + 2 * (~gaia_min_bias["rp_mag"].mask) + 4 * (~gaia_min_bias["bp_mag"].mask) ) gaia_min_bias["ave_gaia_mag"] = MaskedColumn( np.zeros(len(gaia_min_bias)), mask=np.ones(len(gaia_min_bias)) ) gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 1] = gaia_min_bias[ "g_mag" ][gaia_min_bias["has_mag"] == 1] gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 2] = gaia_min_bias[ "rp_mag" ][gaia_min_bias["has_mag"] == 2] gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 4] = gaia_min_bias[ "bp_mag" ][gaia_min_bias["has_mag"] == 4] gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 3] = ( gaia_min_bias["g_mag"][gaia_min_bias["has_mag"] == 3] + gaia_min_bias["rp_mag"][gaia_min_bias["has_mag"] == 3] ) / 2 gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 5] = ( gaia_min_bias["g_mag"][gaia_min_bias["has_mag"] == 5] + gaia_min_bias["bp_mag"][gaia_min_bias["has_mag"] == 5] ) / 2 gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 6] = ( gaia_min_bias["rp_mag"][gaia_min_bias["has_mag"] == 6] + gaia_min_bias["bp_mag"][gaia_min_bias["has_mag"] == 6] ) / 2 gaia_min_bias["ave_gaia_mag"][gaia_min_bias["has_mag"] == 7] = ( gaia_min_bias["g_mag"][gaia_min_bias["has_mag"] == 7] + gaia_min_bias["rp_mag"][gaia_min_bias["has_mag"] == 7] + gaia_min_bias["bp_mag"][gaia_min_bias["has_mag"] == 7] ) / 3 return ( gaia_min_bias[gaia_min_bias["random_index"] < 5000000], gaia_min_bias[gaia_min_bias["random_index"] >= 5000000], ) def get_all(): get_tycho() get_tdsc() get_gsc23() get_gsc11() get_tycho2_tdsc() get_agasc_summary() get_agasc_summary_csv() get_gaia_min_bias_train_test()