Source code for rubix.galaxy.input_handler.api.illustris_api

import os
import requests
import h5py
from typing import List, Union
from rubix import config


[docs] class IllustrisAPI: """ This class is used to load data from the Illustris API. It loads both subhalo data and particle data from a given simulation, snapshot, and subhalo ID. Check the source for the API documentation for more information: https://www.tng-project.org/data/docs/api/ """ URL = "http://www.tng-project.org/api/" DEFAULT_FIELDS = config["IllustrisAPI"]["DEFAULT_FIELDS"] def __init__( self, api_key, particle_type: list = ["stars", "gas"], simulation="TNG50-1", snapshot=99, save_data_path="./api_data", logger=None, ): """Illustris API class. Class to load data from the Illustris API. Parameters ---------- api_key : str API key for the Illustris API. particle_type : str Particle type to load. Default is "stars". simulation : str Simulation to load from. Default is "TNG100-1". snapshot : int Snapshot to load from. Default is 99. """ if api_key is None: raise ValueError("Please set the API key.") self.headers = {"api-key": api_key} self.particle_type = particle_type self.snapshot = snapshot self.simulation = simulation self.baseURL = f"{self.URL}{self.simulation}/snapshots/{self.snapshot}" self.DATAPATH = save_data_path if logger is None: import logging self.logger = logging.getLogger(__name__) else: self.logger = logger def _get(self, path, params=None, name=None): """Get data from the Illustris API. Parameters ---------- path : str Path to load from. params : dict Parameters to pass to the API. name : str Name to save the file as. If None, the name will be taken from the content-disposition header. Returns ------- r : requests object The requests object. """ os.makedirs(self.DATAPATH, exist_ok=True) try: self.logger.debug( f"Performing GET request from {path}, with parameters {params}" ) r = requests.get(path, params=params, headers=self.headers) # raise exception if response code is not HTTP SUCCESS (200) r.raise_for_status() except requests.exceptions.HTTPError as err: raise ValueError(err) if r.headers["content-type"] == "application/json": return r.json() # parse json responses automatically if "content-disposition" not in r.headers: raise ValueError("No content-disposition header found. Cannot save file.") filename = ( r.headers["content-disposition"].split("filename=")[1] if name is None else name ) file_path = os.path.join(self.DATAPATH, f"{filename}.hdf5") with open(file_path, "wb") as f: f.write(r.content) return filename # return the filename string
[docs] def get_subhalo(self, id): """ Get subhalo data from the Illustris API. Returns the subhalo data for the given subhalo ID. Args: id (int): Subhalo ID to load. Returns: The subhalo data as a dictionary (r). """ if not isinstance(id, int): raise ValueError("ID should be an integer.") return self._get(f"{self.baseURL}/subhalos/{id}")
def _load_hdf5(self, filename): """Load HDF5 file. Loads the HDF5 file with the given filename. Parameters ---------- filename : str Filename to load. Returns ------- returndict : dict Dictionary containing the data from the HDF5 file. """ # Check if filename ends with .hdf5 if filename.endswith(".hdf5"): filename = filename[:-5] returndict = dict() file_path = os.path.join(self.DATAPATH, f"{filename}.hdf5") if not os.path.exists(file_path): raise ValueError(f"File {file_path} does not exist.") with h5py.File(file_path, "r") as f: for type in f.keys(): if type == "Header": continue # create new dictionary for each type returndict[type] = dict() for fields in f[type].keys(): # type: ignore returndict[type][fields] = f[type][fields][()] # type: ignore return returndict
[docs] def get_particle_data(self, id: int, particle_type, fields: Union[str, List[str]]): """ Get particle data from the Illustris API. Returns the particle data for the given subhalo ID. Args: id (int): Subhalo ID to load. fields (str or list): Fields to load. If a string, the fields should be comma-separated. Returns: Dictionary containing the particle data in the given fields (data). """ # Get fields in the right format if isinstance(fields, str): if fields == "": raise ValueError("Fields should not be empty.") fields = [fields] if not isinstance(id, int): raise ValueError("ID should be an integer.") fields = ",".join(fields) if particle_type not in ["stars", "gas", "dm"]: raise ValueError("Particle type should be 'stars', 'gas', or 'dm'.") url = f"{self.baseURL}/subhalos/{id}/cutout.hdf5?{particle_type}={fields}" self._get(url, name="cutout") data = self._load_hdf5("cutout") return data
[docs] def load_galaxy(self, id: int, overwrite: bool = False, reuse: bool = False): """ Download Galaxy Data from the Illustris API. This function downloads both the subhalo data and the particle data for stars and gas particles, for the fields specified in DEFAULT_FIELDS. It saves the data in a HDF5 file. Args: id (int): The ID of the subhalo to download. overwrite (bool): Whether to overwrite the file if it already exists. Default is False. reuse (bool): Whether to reuse the file if it already exists. Default is False. Returns: The galaxy data as dictionary. Example -------- >>> illustris_api = IllustrisAPI(api_key, simulation="TNG50-1", snapshot=99, particle_type=["stars", "gas"]) >>> data = illustris_api.load_galaxy(id=0, verbose=True) """ # Check if there is already a file with the same name if os.path.exists(os.path.join(self.DATAPATH, f"galaxy-id-{id}.hdf5")): # If file exists, check if we should overwrite it if not overwrite: # If we should not overwrite it, check if we should reuse it if reuse: self.logger.info( f"Reusing existing file galaxy-id-{id}.hdf5. If you want to download the data again, set reuse=False." ) return self._load_hdf5(filename=f"galaxy-id-{id}") else: # If we should not reuse it, raise an error raise ValueError( f"File with name galaxy-id-{id}.hdf5 already exists. Please remove it before downloading the data, or set overwrite=True, or reuse=True to load the data." ) else: self.logger.info( f"Found existing file galaxy-id-{id}.hdf5, but overwrite is set to True. Overwriting the file." ) # Check which particles we want to load self.logger.debug(f"Loading galaxy with ID {id}") url = f"{self.baseURL}/subhalos/{id}/cutout.hdf5?" for particle_type in self.particle_type: # Check if particle type is valid if particle_type not in self.DEFAULT_FIELDS.keys(): raise ValueError( f"Got unsupported particle type. Supported types are {self.DEFAULT_FIELDS.keys()} and we got {particle_type}." ) fields = self.DEFAULT_FIELDS[particle_type] # Check if fields is a list if isinstance(fields, list): fields = ",".join(fields) url += f"{particle_type}={fields}&" # Remove the last "&" from the url if url[-1] == "&": url = url[:-1] self._get(url, name=f"galaxy-id-{id}") subhalo_data = self.get_subhalo(id) self._append_subhalo_data(subhalo_data, id) data = self._load_hdf5(filename=f"galaxy-id-{id}") return data
def _append_subhalo_data(self, subhalo_data, id): self.logger.debug(f"Appending subhalo data for subhalo {id}") # Append subhalo data to the HDF5 file file_path = os.path.join(self.DATAPATH, f"galaxy-id-{id}.hdf5") with h5py.File(file_path, "a") as f: f.create_group("SubhaloData") for key in subhalo_data.keys(): if isinstance(subhalo_data[key], dict): continue f["SubhaloData"].create_dataset(key, data=subhalo_data[key]) # type: ignore def __str__(self) -> str: return f"IllustrisAPI: Simulation {self.simulation}, Snapshot {self.snapshot}, Particle Type {self.particle_type}"