Source code for rubix.galaxy.input_handler.api.illustris_api

import os
import requests
import h5py
from typing import List, Union
from rubix import config



[docs]
class IllustrisAPI:
    """
    This class is used to load data from the Illustris API.

    It loads both subhalo data and particle data from a given simulation, snapshot, and subhalo ID.

    Check the source for the API documentation for more information: https://www.tng-project.org/data/docs/api/
    """

    URL = "http://www.tng-project.org/api/"
    DEFAULT_FIELDS = config["IllustrisAPI"]["DEFAULT_FIELDS"]

    def __init__(
        self,
        api_key,
        particle_type: list = ["stars", "gas"],
        simulation="TNG50-1",
        snapshot=99,
        save_data_path="./api_data",
        logger=None,
    ):
        """Illustris API class.

        Class to load data from the Illustris API.

        Parameters
        ----------
        api_key : str
            API key for the Illustris API.
        particle_type : str
            Particle type to load. Default is "stars".
        simulation : str
            Simulation to load from. Default is "TNG100-1".
        snapshot : int
            Snapshot to load from. Default is 99.
        """

        if api_key is None:
            raise ValueError("Please set the API key.")

        self.headers = {"api-key": api_key}
        self.particle_type = particle_type
        self.snapshot = snapshot
        self.simulation = simulation
        self.baseURL = f"{self.URL}{self.simulation}/snapshots/{self.snapshot}"
        self.DATAPATH = save_data_path
        if logger is None:
            import logging

            self.logger = logging.getLogger(__name__)
        else:
            self.logger = logger

    def _get(self, path, params=None, name=None):
        """Get data from the Illustris API.

        Parameters
        ----------
        path : str
            Path to load from.
        params : dict
            Parameters to pass to the API.
        name : str
            Name to save the file as. If None, the name will be taken from the content-disposition header.
        Returns
        -------
        r : requests object
            The requests object.

        """

        os.makedirs(self.DATAPATH, exist_ok=True)
        try:
            self.logger.debug(
                f"Performing GET request from {path}, with parameters {params}"
            )
            r = requests.get(path, params=params, headers=self.headers)
            # raise exception if response code is not HTTP SUCCESS (200)
            r.raise_for_status()
        except requests.exceptions.HTTPError as err:
            raise ValueError(err)

        if r.headers["content-type"] == "application/json":
            return r.json()  # parse json responses automatically
        if "content-disposition" not in r.headers:
            raise ValueError("No content-disposition header found. Cannot save file.")
        filename = (
            r.headers["content-disposition"].split("filename=")[1]
            if name is None
            else name
        )
        file_path = os.path.join(self.DATAPATH, f"{filename}.hdf5")
        with open(file_path, "wb") as f:
            f.write(r.content)
        return filename  # return the filename string


[docs]
    def get_subhalo(self, id):
        """
        Get subhalo data from the Illustris API.

        Returns the subhalo data for the given subhalo ID.

        Args:
            id (int): Subhalo ID to load.

        Returns:
            The subhalo data as a dictionary (r).

        """

        if not isinstance(id, int):
            raise ValueError("ID should be an integer.")
        return self._get(f"{self.baseURL}/subhalos/{id}")


    def _load_hdf5(self, filename):
        """Load HDF5 file.

        Loads the HDF5 file with the given filename.

        Parameters
        ----------
        filename : str
            Filename to load.
        Returns
        -------
        returndict : dict
            Dictionary containing the data from the HDF5 file.
        """
        # Check if filename ends with .hdf5
        if filename.endswith(".hdf5"):
            filename = filename[:-5]
        returndict = dict()
        file_path = os.path.join(self.DATAPATH, f"{filename}.hdf5")
        if not os.path.exists(file_path):
            raise ValueError(f"File {file_path} does not exist.")

        with h5py.File(file_path, "r") as f:
            for type in f.keys():
                if type == "Header":
                    continue
                # create new dictionary for each type
                returndict[type] = dict()
                for fields in f[type].keys():  # type: ignore
                    returndict[type][fields] = f[type][fields][()]  # type: ignore

        return returndict


[docs]
    def get_particle_data(self, id: int, particle_type, fields: Union[str, List[str]]):
        """
        Get particle data from the Illustris API.

        Returns the particle data for the given subhalo ID.

        Args:
            id (int): Subhalo ID to load.
            fields (str or list): Fields to load. If a string, the fields should be comma-separated.

        Returns:
            Dictionary containing the particle data in the given fields (data).
        """
        # Get fields in the right format
        if isinstance(fields, str):
            if fields == "":
                raise ValueError("Fields should not be empty.")
            fields = [fields]

        if not isinstance(id, int):
            raise ValueError("ID should be an integer.")
        fields = ",".join(fields)

        if particle_type not in ["stars", "gas", "dm"]:
            raise ValueError("Particle type should be 'stars', 'gas', or 'dm'.")
        url = f"{self.baseURL}/subhalos/{id}/cutout.hdf5?{particle_type}={fields}"
        self._get(url, name="cutout")
        data = self._load_hdf5("cutout")
        return data



[docs]
    def load_galaxy(self, id: int, overwrite: bool = False, reuse: bool = False):
        """
        Download Galaxy Data from the Illustris API.

        This function downloads both the subhalo data and the particle data for stars and gas particles, for the fields specified in DEFAULT_FIELDS.
        It saves the data in a HDF5 file.

        Args:
            id (int): The ID of the subhalo to download.
            overwrite (bool): Whether to overwrite the file if it already exists. Default is False.
            reuse (bool): Whether to reuse the file if it already exists. Default is False.

        Returns:
            The galaxy data as dictionary.

        Example
        --------
        >>> illustris_api = IllustrisAPI(api_key, simulation="TNG50-1", snapshot=99, particle_type=["stars", "gas"])
        >>> data = illustris_api.load_galaxy(id=0, verbose=True)
        """

        # Check if there is already a file with the same name
        if os.path.exists(os.path.join(self.DATAPATH, f"galaxy-id-{id}.hdf5")):
            # If file exists, check if we should overwrite it
            if not overwrite:
                # If we should not overwrite it, check if we should reuse it
                if reuse:
                    self.logger.info(
                        f"Reusing existing file galaxy-id-{id}.hdf5. If you want to download the data again, set reuse=False."
                    )
                    return self._load_hdf5(filename=f"galaxy-id-{id}")
                else:
                    # If we should not reuse it, raise an error
                    raise ValueError(
                        f"File with name galaxy-id-{id}.hdf5 already exists. Please remove it before downloading the data, or set overwrite=True, or reuse=True to load the data."
                    )
            else:
                self.logger.info(
                    f"Found existing file galaxy-id-{id}.hdf5, but overwrite is set to True. Overwriting the file."
                )

        # Check which particles we want to load

        self.logger.debug(f"Loading galaxy with ID {id}")
        url = f"{self.baseURL}/subhalos/{id}/cutout.hdf5?"

        for particle_type in self.particle_type:
            # Check if particle type is valid
            if particle_type not in self.DEFAULT_FIELDS.keys():
                raise ValueError(
                    f"Got unsupported particle type. Supported types are {self.DEFAULT_FIELDS.keys()} and we got {particle_type}."
                )

            fields = self.DEFAULT_FIELDS[particle_type]
            # Check if fields is a list
            if isinstance(fields, list):
                fields = ",".join(fields)
            url += f"{particle_type}={fields}&"

        # Remove the last "&" from the url
        if url[-1] == "&":
            url = url[:-1]

        self._get(url, name=f"galaxy-id-{id}")
        subhalo_data = self.get_subhalo(id)
        self._append_subhalo_data(subhalo_data, id)
        data = self._load_hdf5(filename=f"galaxy-id-{id}")
        return data


    def _append_subhalo_data(self, subhalo_data, id):
        self.logger.debug(f"Appending subhalo data for subhalo {id}")
        # Append subhalo data to the HDF5 file
        file_path = os.path.join(self.DATAPATH, f"galaxy-id-{id}.hdf5")
        with h5py.File(file_path, "a") as f:
            f.create_group("SubhaloData")
            for key in subhalo_data.keys():
                if isinstance(subhalo_data[key], dict):
                    continue
                f["SubhaloData"].create_dataset(key, data=subhalo_data[key])  # type: ignore

    def __str__(self) -> str:
        return f"IllustrisAPI: Simulation {self.simulation}, Snapshot {self.snapshot}, Particle Type {self.particle_type}"