Source code for dc2_sampler

import os
import numpy as np
import pandas as pd


[docs]class DC2Sampler: def __init__(self, seed=123, bandpasses=list('ugrizy'), idx=None): self.seed = seed self.rng = np.random.default_rng(self.seed) self.bandpasses = bandpasses self.zp = -2.5*np.log10(3631) self.to_Jy = 1.e-9 # nanoJy to Jy self.idx = idx import magnificat.input_data as in_data self.in_data = in_data.__path__[0] self.flux_cols = [f'flux_{bp}' for bp in self.bandpasses] self.mag_cols = [f'mag_{bp}' for bp in self.bandpasses] self._load_catalog() self.count = 0
[docs] def _load_param_cat(self): df = pd.read_csv(os.path.join(self.in_data, 'joined_0.csv')) cols = ['blackHoleAccretionRate', 'blackHoleEddingtonRatio', 'redshift_x', 'galaxy_id', 'blackHoleMass', 'M_i', 'agn_sf_u', 'agn_sf_g', 'agn_sf_r', 'agn_sf_i', 'agn_sf_z', 'agn_sf_y', 'agn_tau_u', 'agn_tau_g', 'agn_tau_r', 'agn_tau_i', 'agn_tau_z', 'agn_tau_y'] df = df[cols].copy() rename_dict = {'blackHoleAccretionRate': 'accretion_rate', 'blackHoleEddingtonRatio': 'edd_ratio', 'redshift_x': 'redshift', 'blackHoleMass': 'BH_mass'} for bp in list('ugrizy'): # Log DRW params, since logged is closer to normal df[f'log_sf_inf_{bp}'] = np.log10(df[f'agn_sf_{bp}'].values) df[f'log_rf_tau_{bp}'] = np.log10(df[f'agn_tau_{bp}'].values) df.drop([f'agn_sf_{bp}', f'agn_tau_{bp}'], columns=True, inplace=True) df.rename(rename_dict, inplace=True, axis=1) return df
[docs] def _load_truth_cat(self): truth_cols = ['host_galaxy'] + self.flux_cols truth = pd.read_csv(os.path.join(self.in_data, 'dc2_truth.csv'), index_col=None, usecols=truth_cols ) # Convert flux to mag flux = truth[self.flux_cols].values truth[self.mag_cols] = self.flux_to_mag(flux*self.to_Jy) return truth[['host_galaxy'] + self.mag_cols]
[docs] def _load_catalog(self): params = self._load_param_cat() truth = self._load_truth_cat() cat = params.merge(truth, how='inner', suffixes=['', '_duplicate'], left_on='galaxy_id', right_on='host_galaxy', validate='one_to_one') master_cols = ['galaxy_id', 'redshift', 'BH_mass', 'M_i'] # 'accretion_rate', 'edd_ratio' master_cols += self.mag_cols master_cols += [f'SF_inf_{bp}' for bp in self.bandpasses] master_cols += [f'tau_{bp}' for bp in self.bandpasses] cat.reset_index(drop=True, inplace=True) self.cat = cat[master_cols].iloc[self.idx].copy().reset_index(drop=True) self.cat['BH_mass'] = np.log10(self.cat['BH_mass'].values) self.size = self.cat.shape[0]
[docs] def sample(self): # i = self.rng.integers(low=0, high=self.size) sample = self.cat.iloc[self.count] self.count += 1 return sample
[docs] def flux_to_mag(self, flux): mag = -2.5*np.log10(flux) - self.zp return mag
if __name__ == '__main__':
[docs] sampler = DC2Sampler(123, ['i'], idx=[5, 6, 8])
params = sampler.sample() print(params) params = sampler.sample() print(params) assert params['redshift'].dtype == np.float64