#!/usr/bin/env python

#
# Copyright (C) 2013-2022, 2024-2025 
# Smithsonian Astrophysical Observatory
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

import os
import sys

import ciao_contrib.logger_wrapper as lw


toolname = "download_obsid_caldb"
__revision__ = "30 January 2025"

lw.initialize_logger(toolname)

lgr = lw.get_logger(toolname)
verb0 = lgr.verbose0
verb1 = lgr.verbose1
verb2 = lgr.verbose2
verb3 = lgr.verbose3
verb5 = lgr.verbose5

CDA_SERVER="https://cxc.cfa.harvard.edu/cdaftp/arcftp/ChandraCalDB"

def wrap_urlopen(url):
    """Wrap simple urlopen with a Request object w/ custom User-Agent

    The ciao-install version of python doesn't setup the ssl certificates
    correctly so we have to access the cxc site w/o verification.
    """
    import urllib.request as request
    import ssl as ssl
    no_context = ssl._create_unverified_context()
    rr = request.Request(url)
    rr.add_header("User-Agent", "{}/{}".format(toolname, __revision__))
    retval = request.urlopen(rr,context=no_context)
    return(retval)


def retrieve_config_files(baseurl, newver, rootdir, outdir):
    """
    Retrieve the instrument specific CALDB caldb.indx and key.config files.

    In classic mode, the caldb.indx and key.config files will be stored
    under data/chandra/{instrume}.

    In the non-classic mode, the caldb.indx and key.config files
    will be stored in

    config/{instrume}-{version}.indx and .config

    which allows the user to change versions without having to manipulate
    symbolic links.

    """

    verb1("Retrieving CALDB index files")

    url="{}/software/tools/caldb.config".format(baseurl)
    page=wrap_urlopen(url)
    p=page.read()
    with open("{}/caldb.config".format(outdir), "wb") as fp:
        fp.write(p)

    #
    # We store both the chandra/$inst/caldb.indx file to make
    # old-school caldb happy, but then we also store versioned
    # copies of the index and key.config files in the config/
    # directory.
    #
    init_output_dir("{}/config".format(outdir))

    for dd in ["acis", "hrc", "default"]:
        init_output_dir("{}/data/chandra/{}".format(outdir, dd))

        for ff in ["caldb.indx", "key.config"]:

            url = "{}/data/chandra/{}/{}".format(baseurl, dd, ff)
            page=wrap_urlopen(url)
            p = page.read()

            outfile="{}/data/chandra/{}/{}".format(outdir, dd,ff)
            with open(outfile, "wb") as out_indx:
                out_indx.write(p)

            ftype = "config" if ff.endswith("config") else "indx"
            outfile="{}/config/{}-{}.{}".format(outdir,dd,newver,ftype)
            with open(outfile, "wb") as out_indx2:
                out_indx2.write(p)

            verb2(ftype+" name :" + out_indx.name)



def filter_config(newver, outdir):
    """
    Rename/replace the generic CALDB config files with version specific
    files.

    The Chandra caldb uses a generic configuration files

        root/software/tools/caldb.config

    that points to each of the instrument specific index files and
    key config files:

        root/data/chandra/instrument/caldb.indx
        root/data/chandra/instrument/key.config

    Where these are symbolic links to "version" specific files.

        caldb.indx -> ./index/caldbN0400.indx

    This makes keeping track of versions, and if desired using multiple
    versions a real headache.

    Instead, this script will download the generic index and config
    files, and tweak them to be version specific.

        caldb.config -> CHANDRA-${version}.config
        caldb.indx -> INSTRUMENT-${version}.indx
        key.config -> INSTRUMENT-${version}.config

    This makes keeping old version of the CALDB a lot easier.

    In classic mode the caldb.config file is copied to the
    traditional software/tools/caldb.config location.   How boring is
    that.
    """

    with open("{}/caldb.config".format(outdir), "r") as fp:
        cfg = fp.readlines()
    os.unlink("{}/caldb.config".format(outdir))

    # Copy config file to standard/classic location
    init_output_dir("{}/software/tools".format(outdir))
    orig_config = "{}/software/tools/caldb.config".format(outdir)
    with open(orig_config, "w") as fp:
        fp.writelines(cfg)

    # Generic caldb.config is setup for all HEASARC mission, just get chandra.
    chandra = [x for x in cfg if x.startswith("CHANDRA")]

    # remove ephin, sim, pcad, pimms
    chandra = [x for x in chandra
               if x.split()[1] not in ['EPHIN', 'PCAD', 'SIM', 'PIMMS']]

    for ii, cc in enumerate(chandra):
        #
        # Rename files, replace values in the versioned config file to
        # use the versioned index file names.
        #
        inst = cc.split(" ")[3].split("/")[2]
        chandra[ii] = cc.replace("data/chandra/" + inst, "config/")
        chandra[ii] = chandra[ii].replace("caldb.indx", "{}-{}.indx".format(inst, newver))
        chandra[ii] = chandra[ii].replace("key.config", "{}-{}.config".format(inst, newver))

    outconfig = "{}/config/CHANDRA-{}.config".format(outdir, newver)
    with open(outconfig, "w") as fp:
        fp.writelines(chandra)

    outconfig = "{}/config/caldbinit-{}.unix".format(outdir, newver)
    with open(outconfig, "w") as fp:
        fp.write("setenv CALDB {}\n".format(outdir))
        fp.write("setenv CALDBCONFIG {}/config/CHANDRA-{}.config\n".format(outdir, newver))
        fp.write("setenv CALDBALIAS none\n")

    outconfig = "{}/config/caldbinit-{}.sh".format(outdir, newver)
    with open(outconfig, "w") as fp:
        fp.write("export CALDB={}\n".format(outdir))
        fp.write("export CALDBCONFIG={}/config/CHANDRA-{}.config\n".format(outdir, newver))
        fp.write("export CALDBALIAS=none\n")


    return outconfig


def get_version_file(baseurl, rootdir, outdir):
    """
    Read the Chandra caldb version file.  Get the old version if
    it exists then download the new file.

    """
    from pycrates import read_file

    verdir = "{}/docs/chandra/caldb_version/".format(outdir)
    init_output_dir(verdir)
    outfile = "{}/caldb_version.fits".format(verdir)

    try:
        tab = read_file(outfile)
        _oldver = tab.get_column("CALDB_VER").values[-1]  # last row
        try:
            oldver = _oldver.decode("ascii")
        except Exception:
            oldver = _oldver
    except Exception:
        oldver = None

    if baseurl:
        url="{}/docs/chandra/caldb_version/caldb_version.fits".format(baseurl)
        page = wrap_urlopen(url)
        p = page.read()
        with open(outfile,"wb") as fp:
            fp.write(p)

    elif os.path.exists(outfile):
        pass
    else:
        raise RuntimeError("Error: cannot locate caldb version file")

    tab = read_file(outfile)
    _newver = tab.get_column("CALDB_VER").values[-1]  # last row

    try:
        newver = _newver.decode("ascii")
    except Exception:
        newver = _newver

    if oldver and oldver != newver:
        verb0("Old CALDB version '{}', New version will be '{}'".format(oldver, newver))

    verb1("Retrieving files for CALDB_VER = {}".format(newver))
    return newver


def init_output_dir(outdir):
    import errno as errno

    try:
        os.makedirs(outdir)
    except OSError as ee:
        if ee.errno != errno.EEXIST:
            raise


def retrieve_caldb_file(baseurl, rootdir, looking_for, outdir, clobber, missing):
    """
    Retrieve the CALDB data files
    """
    if looking_for is None or 0 == len(looking_for):
        return

    # strip off the extension numbers, we want the whole file
    # get unique set of file names
    uniq = set([x.split('[')[0] for x in looking_for])
    uniq = list(uniq)
    uniq.sort()

    for products in uniq:
        # This is used to graft the CALDB directory path onto the
        # outdir path. The os.path.join() is used to get the
        # outdir path w/ the trailing "/" (but only if it's not
        # already there).
        hasdir = os.path.dirname(products)
        init_output_dir(hasdir)
        zz = hasdir.replace(os.path.join(outdir, ""), "", 1)
        hasnam = os.path.basename(products)

        outfile = "{}/{}".format(hasdir, hasnam)
        updated = os.path.exists(outfile)
        outfile_size = os.path.getsize(outfile) if updated else 0

        sys.stderr.write("    {:<40s}".format(hasnam))

        if missing:
            sys.stderr.write("." * 20)
            good = "online" if updated else "* MISSING *"
            sys.stderr.write(" {}".format(good))
        else:
            #
            # Note to future self:  Since FITS files are stored in 2880
            # blocks, we can't rely on file size to indicate that
            # files have changed.  Only by looking at checksum/datasum
            # which means we need to redownload the file (or come
            # up with a way to stream part of the file until enough
            # is read to get the CHECKSUM/DATASUM (in all extensions!)
            # to know file is unchanged.
            #
            # But, if ftp size != on-disk-size then we know it's
            # wrong so then download it.
            #

            url="{}/{}/{}".format(baseurl, zz, hasnam)
            page = wrap_urlopen(url)

            if 'Content-Length' in page.headers:
                fsize = int(page.headers['Content-Length'])
            else:
                fsize = None # We don't know

            if (fsize == outfile_size) and not clobber:
                sys.stderr.write("." * 20)
                sys.stderr.write(" (skipped)\n")
                continue

            outfp = open(outfile, "wb")

            if fsize is None:
                # page len unknown, skip hashes
                outfp.write(page.read())
            else:
                blk = int(fsize/20.0)
                for k in range(20):
                    outfp.write(page.read(blk))
                    sys.stderr.write("#")
                    sys.stderr.flush()

            outfp.close()

            if updated:
                sys.stderr.write(' (updated)')

        sys.stderr.write("\n")


def retrieve_files_by_tool(baseurl, rootdir, tool, infile, outdir, clobber, missing):
    """
    Use the CALDB to query for each of the files to know which to
    retrieve.
    """
    from caldb4 import Caldb

    for prod in tool:
        verb3("Looking up CAL_CNAM={}".format(prod))

        # Ideally, this would only be done once (per-infile) but there
        # look to be some memory wackiness when the same Caldb object
        # has the .product changed -- it doesn't reset/reinit cleanly.
        cc = Caldb(infile=infile)
        cc.product = prod
        if tool[prod]:
            for constraint in tool[prod]:
                verb3("Overrding value {}={}".format(constraint, tool[prod][constraint]))
                setattr(cc, constraint, tool[prod][constraint])

        products = cc.search
        retrieve_caldb_file(baseurl, rootdir, products, outdir, clobber, missing)

        # The CALDB interface is a little different to normal Python
        # modules, with some attribute accesses acting like method calls.
        #
        cc.close


def get_all_products():
    """
    Not in use.

    This is a different way to find the caldb products -- it looks for
    all the index files and gets the unique set of CAL_CNAM values.

    The problem with this is that the same single fits file can have
    multiple extensions, and each extension may have a different
    CAL_CNAM (eg ACIS CTI file p2_resp files).  This then makes the retreival
    look more messy since the file will be 'skipped' even in a clean caldb tree.
    Guess we could filter out certain ones -- but not sure any more/less
    complicated then the get_by_tool method below
    """
    from pycrates import read_file

    cidx = os.environ["CALDBCONFIG"]
    cdir = os.environ["CALDB"]
    with open(cidx, "r+") as fp:
        lines = fp.readlines()

    idxfiles = []
    for line in lines:
        if not line.startswith('CHANDRA'):
            continue
        if line.split()[1] in ['EPHIN', 'PCAD', 'SIM']:
            continue
        if 'pimms' in line.lower():
            continue

        paths = line.replace(' CALDB ', ' {} '.format(cdir)).split()
        path = os.path.join(paths[2], paths[3], paths[4])
        idxfiles.append(path)

    retval = {}
    for idx in idxfiles:
        # Quality of 5 is bad, not retrieved
        tab = read_file(idx + "[cal_qual=:4.9]")
        # get list of unique code-names
        _prods = list(set(tab.get_column("cal_cnam").values))
        try:
            prods = [x.decode("ascii") for x in _prods]
        except Exception:
            prods = _prods

        for p in prods:
            retval[p] = None

    return [retval]


def get_tools(infile):
    """
    Identify the CALDB data product codes by tool.

    This list is not complete.  Duplicates are not included or they
    would be retrieve twice.

    So while OSIP is needed by mkgarf and tg_resolve_events, it
    only should up in tg_resolve_events.  Same thing with badpixel
    and gain files.

    TODO:  HRC RMF, others?

    """

    from pycrates import read_file
    in_crate = read_file(infile,mode='r')
    grating = in_crate.get_key_value("GRATING")

    # If not set this probably indicates that the input file is
    # not an event file, or has been edited by an external process,
    # so error out.
    #
    if grating is None:
        raise ValueError(f'GRATING keyword is missing in {infile}')

    acis_process_events = { 'grade' : None,        # 'gradefile'
                            'grdimg' : None,       # 'grade_image_file'
                            'det_gain' : None,     # 'gainfile'
                            'evtsplt' : None,      # 'threshfile'
                            'cti' : None,          # 'ctifile'
                            't_gain' : None,      # 'tgainfile'
                            'subpix' : None         # 'subpixfile'
                            }
    pixlib = { "geom" : None,                # 'instruments' : None,
               "aimpts" : None,              # 'aimpoints' : None,
               "tdet" : None,                # 'tdet' : None,
               "sky" : None,                 # 'sky' : None,
               "sgeom" : None                # 'shell' : None,
             }
    #acis_build_badpix = { 'badpix' : None }   # 'infile'
    ardlib = { 'axeffa' :  None,        # 'effarea_file'
               'vignet' : None,         # 'vignet_file'
               'qe' : None,             # 'qe'
               'qeu' : None,            # 'qeu'
               'badpix' : { 'ccd_id' : "" },         # 'badpix'
               'contam' : None,          # 'acis_contam'
               'greff' : None,          # 'tg_eff'
               'lsfparm' : None,       # 'lsf'
               ### 'rmf_file' : None,  # ???
               ### 'gain_file' : None # ???
               }
    temps = ",".join([str(x) for x in range(153,178,2)])
    mkacisrmf = {'sc_matrix' : {'fp_temp': temps, 'fidelity': '0-999'} }  # infile
    mkarf = { 'dead_area' :   None  }  # 'dafile'
    mkwarf = { 'fef_pha' : None }  # + dead_area
    mkpsfmap = {'reef' : None }
    tg_create_mask = {'wpsf' : None  }  # input_pos_tab
    tg_resolve_events = {'osip' : None}
    tgextract2 = {'DISP_REG' : None }  # 'region_file'
    tgextract = {'TGMASK2' : None, }  # 'inregion_file'
    hrc_process_events = {
            'GAPLOOKUP' : None,
            'DEGAP' : None,
            'T_GMAP' : None,
            'GMAP' : None,
            'ADC' : None,
            'FPTEST' : None,
            'TAPRINGTEST' : None,
            'EFTEST' : None,
            'SATTEST' : None,
            'AMP_SF_COR' : None,
        }
    misc = {
        'GTI_LIM' : None,  # dmgti (mtl_build_gti)
        'MATRIX' : None,    # HRC RMF files
        }

    misc_tg = {
        'lsfparm' : { 'grattype' : 'HEG' },
        'TGPIMASK2' : None
        }


    retval = [pixlib, ardlib, acis_process_events, mkwarf, mkarf,
              mkpsfmap, mkacisrmf, hrc_process_events, misc]

    if grating.lower() in ['hetg', 'letg']:
        tgtools = [tg_create_mask, tg_resolve_events, tgextract,
                   tgextract2, misc_tg]
        retval.extend(tgtools)

    return retval


def create_config_files(baseurl, rootdir, outdir):
    """
    Setup the config and index files for the partial CALDB directory.
    """
    init_output_dir(outdir)

    newver = get_version_file(baseurl, rootdir, outdir)
    retrieve_config_files(baseurl, newver, rootdir, outdir)
    newconfig = filter_config(newver, outdir)
    oldver = setup_caldb_environment(outdir, newconfig)

    return oldver


def setup_caldb_environment(outdir, newconfig):
    """
    Setup the new CALDB environment variables.
    """

    if "CALDB" in os.environ:
        oldcaldb = os.environ["CALDB"]
    else:
        oldcaldb = None

    os.environ["CALDB"] = outdir
    os.environ["CALDBCONFIG"] = "{}/software/tools/caldb.config".format(outdir)  # newconfig
    os.environ["CALDBALIAS"] = "None"

    return oldcaldb


def get_background_files(baseurl, rootdir, infile, outdir, clobber, background_files, missing):
    """
    """

    import stk as stk
    from pycrates import read_file
    tab = read_file(infile)

    inst = tab.get_key_value("INSTRUME")
    if 'ACIS' == inst:
        from ciao_contrib.runtool import acis_bkgrnd_lookup
        acis_bkgrnd_lookup.punlearn()
        try:
            acis_bkgrnd_lookup(infile)
            bkgfiles = stk.build(acis_bkgrnd_lookup.outfile)
            retrieve_caldb_file(baseurl, rootdir, bkgfiles, outdir, clobber, missing)
        except Exception as e:
            pass

    elif 'HRC' == inst:
        from ciao_contrib.runtool import hrc_bkgrnd_lookup
        hrc_bkgrnd_lookup.punlearn()
        try:
            hrc_bkgrnd_lookup(infile, "event")
            bkgfiles = stk.build(hrc_bkgrnd_lookup.outfile)
            retrieve_caldb_file(baseurl, rootdir, bkgfiles, outdir, clobber, missing)
        except Exception:
            pass

        try:
            hrc_bkgrnd_lookup(infile, "spectrum")
            bkgfiles = stk.build(hrc_bkgrnd_lookup.outfile)
            retrieve_caldb_file(baseurl, rootdir, bkgfiles, outdir, clobber, missing)
        except Exception as e:
            pass
    else:
        # Should have failed much sooner than now!
        raise RuntimeError("Unknown instrument")


def get_caldb_files(baseurl, rootdir, infile, outdir, clobber, background_files, missing):
    """
    Get all possible CALDB files needed for this observation.

    """

    verb1("Retrieving CALDB data files")
    sys.stderr.write("    {:<40s}0------------------1\n".format('Filename:'))

    for tool in get_tools(infile):
        retrieve_files_by_tool(baseurl, rootdir, tool, infile, outdir, clobber, missing)

    if background_files:
        get_background_files(baseurl, rootdir, infile, outdir, clobber, background_files, missing)


def print_new_setup(oldcaldb, outdir):
    """
    Let user know if new partial CALDB is different from their CALDB
    setup.
    """

    if (oldcaldb and os.path.abspath(oldcaldb) != os.path.abspath(os.environ["CALDB"])) or not oldcaldb:
        verb0("\nBe sure to source the new setup scripts to")
        verb0("use these CALDB files.")
        verb0("")
        verb0("(t)csh:")
        tcsh="setenv CALDB " + os.path.abspath(os.environ["CALDB"])+"\n"
        tcsh=tcsh+"setenv CALDBCONFIG " + os.path.abspath(os.environ["CALDBCONFIG"])+"\n"
        tcsh=tcsh+"setenv CALDBALIAS none"+"\n"
        with open(outdir+"/software/tools/caldbinit.unix", "w") as fp:
            fp.write(tcsh)
        verb0("source {}/software/tools/caldbinit.unix".format(outdir))

        verb0("")
        verb0("bash:")
        bash="export CALDB=" + os.path.abspath(os.environ["CALDB"])+"\n"
        bash=bash+"export CALDBCONFIG=" + os.path.abspath(os.environ["CALDBCONFIG"])+"\n"
        bash=bash+"export CALDBALIAS=none"+"\n"
        with open(outdir+"/software/tools/caldbinit.sh", "w") as fp:
            fp.write(bash)
        verb0("source {}/software/tools/caldbinit.sh".format(outdir))
        verb0("")









def locate_infile(infile):
    """
    Try to locate an event file if given a directory name
    """

    import glob as glob

    infile = infile.split("[")[0]  # strip off any filters

    if os.path.isfile(infile):
        return infile

    if os.path.isfile(infile + ".gz"):
        return infile + ".gz"

    if os.path.isdir(infile):

        for suffix in ['', '/repro', '/primary', '/secondary']:
            dd = infile + suffix
            gg = glob.glob(dd + "/*evt*")
            if len(gg) == 0:
                continue
            if len(gg) > 1:
                verb1("Multiple event file found, using {}".format(gg[0]))
                return gg[0]
            verb2("Using event file {}".format(gg[0]))
            return gg[0]

    raise IOError("Cannot locate event file in '{}'".format(infile))


def setup_check_missing(outdir):
    """
    setup CALDB environment variable for new output dir
    """
    if not os.path.exists(outdir):
        raise ValueError("ERROR: The output directory must already exist in missing mode.")

    if not os.path.exists(outdir + "/software/tools/caldb.config"):
        raise ValueError("ERROR: The CALDB configuration file must already exist in missing mode.")

    os.environ["CALDB"] = os.path.abspath(outdir)
    os.environ["CALDBCONFIG"] = os.environ["CALDB"] + "/software/tools/caldb.config"
    return os.environ["CALDB"]


def is_par_set(value):
    """Convert a yes to True, anything else to False"""
    return value == 'yes'


class CaldbIOError(IOError):
    pass


def validate_infile(infile):
    'Some basic checks on infile'
    from pycrates import read_file
    
    try:
        block = read_file(infile)
    except:
        raise CaldbIOError("WARNING: Cannot open file '{}'. Skipping it.".format(infile))
    
    telescope = block.get_key_value("TELESCOP")
    if telescope is None:
        raise CaldbIOError("WARNING: Cannot find TELESCOP keyword in file '{}'.  Skipping it.".format(infile))
    
    if telescope != "CHANDRA":
        raise CaldbIOError("WARNING: '{}' does not appear to be a Chandra dataset. Skipping it.".format(infile))
        

#
# Main Routine
#
@lw.handle_ciao_errors(toolname, __revision__)
def main():
    # get parameters
    from ciao_contrib.param_soaker import get_params

    # Load parameters
    pars = get_params(toolname, "rw", sys.argv,
                      verbose={"set": lw.set_verbosity, "cmd": verb1})

    infiles = pars["infile"]
    outdir = pars["outdir"]
    missing = is_par_set(pars["missing"])
    background_files = is_par_set(pars["background"])
    clobber = is_par_set(pars["clobber"])

    if missing:
        baseurl = None
        rootdir = None
        oldcaldb = setup_check_missing(outdir)
        get_version_file(baseurl, rootdir, outdir)
    else:
        baseurl=CDA_SERVER
        rootdir=""
        oldcaldb = create_config_files(baseurl, rootdir, outdir)

    any_success = False
    import stk as stk
    for infile in stk.build(infiles):
        try:
            infile = locate_infile(infile)
            verb1("Processing infile={}".format(infile))
        except Exception:
            verb0("WARNING: Unable to locate evt2 file in '{}'.  Skipping it.".format(infile))
            continue
            
        try:
            validate_infile(infile)
        except CaldbIOError as failed:
            print(failed)
            continue

        get_caldb_files(baseurl, rootdir, infile, outdir, clobber, background_files, missing)
        any_success = True

    if any_success:
        print_new_setup(oldcaldb,outdir)
    else:
        raise RuntimeError("Could not find any valid input files")

if __name__ == "__main__":
    try:
        main()
    except Exception as E:
        print("\n# "+toolname+" ("+__revision__+"): ERROR "+str(E)+"\n", file=sys.stderr)
        sys.exit(1)
    sys.exit(0)
