Source code for macauff.photometric_likelihood

# Licensed under a 3-clause BSD style license - see LICENSE
'''
This module provides the framework for the creation of the photometric likelihoods
used in the cross-matching of the two catalogues.
'''

import datetime
import sys
import warnings

import numpy as np
from scipy.optimize import minimize

# pylint: disable=import-error,no-name-in-module
from macauff.misc_functions import coord_inside_convex_hull
from macauff.misc_functions_fortran import misc_functions_fortran as mff
from macauff.photometric_likelihood_fortran import photometric_likelihood_fortran as plf

# pylint: enable=import-error,no-name-in-module

__all__ = ['compute_photometric_likelihoods']


# pylint: disable-next=too-many-locals
[docs] def compute_photometric_likelihoods(cm): ''' Derives the photometric likelihoods and priors for use in the catalogue cross-match process. Parameters ---------- cm : Class The cross-match wrapper, containing all of the necessary metadata to perform the cross-match and determine photometric likelihoods. ''' t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Creating c(m, m) and f(m)...") t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Distributing sources into sky slices...") sys.stdout.flush() a_astro = cm.a_astro b_astro = cm.b_astro a_photo = cm.a_photo b_photo = cm.b_photo auf_cdf_a = cm.auf_cdf_a auf_cdf_b = cm.auf_cdf_b a_sky_inds = mff.find_nearest_point(a_astro[:, 0], a_astro[:, 1], cm.cf_region_points[:, 0], cm.cf_region_points[:, 1]) b_sky_inds = mff.find_nearest_point(b_astro[:, 0], b_astro[:, 1], cm.cf_region_points[:, 0], cm.cf_region_points[:, 1]) t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Making bins...") sys.stdout.flush() abinlengths, abinsarray, longabinlen = create_magnitude_bins(cm.cf_region_points, cm.a_filt_names, a_photo, a_sky_inds) bbinlengths, bbinsarray, longbbinlen = create_magnitude_bins(cm.cf_region_points, cm.b_filt_names, b_photo, b_sky_inds) t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Calculating PDFs...") sys.stdout.flush() c_priors = np.zeros(dtype=float, shape=(len(cm.b_filt_names), len(cm.a_filt_names), len(cm.cf_region_points)), order='F') fa_priors = np.zeros(dtype=float, shape=(len(cm.b_filt_names), len(cm.a_filt_names), len(cm.cf_region_points)), order='F') fb_priors = np.zeros(dtype=float, shape=(len(cm.b_filt_names), len(cm.a_filt_names), len(cm.cf_region_points)), order='F') c_array = np.zeros(dtype=float, shape=(longbbinlen-1, longabinlen-1, len(cm.b_filt_names), len(cm.a_filt_names), len(cm.cf_region_points)), order='F') fa_array = np.zeros(dtype=float, shape=(longabinlen-1, len(cm.b_filt_names), len(cm.a_filt_names), len(cm.cf_region_points)), order='F') fb_array = np.zeros(dtype=float, shape=(longbbinlen-1, len(cm.b_filt_names), len(cm.a_filt_names), len(cm.cf_region_points)), order='F') a_in_joint_area = np.array([ coord_inside_convex_hull([a_astro[p, 0] + cm.a_hull_x_shift, a_astro[p, 1]], cm.a_hull_points) & coord_inside_convex_hull([a_astro[p, 0] + cm.b_hull_x_shift, a_astro[p, 1]], cm.b_hull_points) for p in range(len(a_astro))]) b_in_joint_area = np.array([ coord_inside_convex_hull([b_astro[p, 0] + cm.a_hull_x_shift, b_astro[p, 1]], cm.a_hull_points) & coord_inside_convex_hull([b_astro[p, 0] + cm.b_hull_x_shift, b_astro[p, 1]], cm.b_hull_points) for p in range(len(b_astro))]) for m in range(0, len(cm.cf_region_points)): area = cm.cf_areas[m] a_sky_cut = (a_sky_inds == m) & a_in_joint_area a_photo_cut = a_photo[a_sky_cut] if cm.include_phot_like or cm.use_phot_priors: a_auf_cdf = auf_cdf_a[:, a_sky_cut] a_b_area_cut, a_f_area_cut, a_inds_cut, a_size_cut = ( cm.ab_area[a_sky_cut], cm.af_area[a_sky_cut], cm.ainds[:, a_sky_cut], cm.asize[a_sky_cut]) b_sky_cut = (b_sky_inds == m) & b_in_joint_area b_photo_cut = b_photo[b_sky_cut] if cm.include_phot_like or cm.use_phot_priors: b_auf_cdf = auf_cdf_b[:, b_sky_cut] b_b_area_cut, b_f_area_cut, b_inds_cut, b_size_cut = ( cm.bb_area[b_sky_cut], cm.bf_area[b_sky_cut], cm.binds[:, b_sky_cut], cm.bsize[b_sky_cut]) for i in range(0, len(cm.a_filt_names)): # pylint: disable=consider-using-enumerate if not cm.include_phot_like and not cm.use_phot_priors: a_num_photo_cut = np.sum(~np.isnan(a_photo_cut[:, i])) na = a_num_photo_cut / area else: a_bins = abinsarray[:abinlengths[i, m], i, m] a_mag = a_photo[:, i] a_mag_cut = a_photo_cut[:, i] a_flags = ~np.isnan(a_mag) a_flags_cut = a_flags[a_sky_cut] for j in range(0, len(cm.b_filt_names)): # pylint: disable=consider-using-enumerate if not cm.include_phot_like and not cm.use_phot_priors: b_num_photo_cut = np.sum(~np.isnan(b_photo_cut[:, j])) nb = b_num_photo_cut / area # Without using photometric-based priors, all we can # do is set the prior on one catalogue to 0.5 -- that # is, equal chance of match or non-match; for this we # use the less dense of the two catalogues as our # "one-sided" match. Then, accordingly, we update the # "field" source density of the more dense catalogue # with its corresponding density, based on the input # density and the counterpart density calculated. c_prior = min(na, nb) / 2 fa_prior = na - c_prior fb_prior = nb - c_prior # To fake no photometric likelihoods, simply set all # values to one, to cancel in the ratio later. c_like, fa_like, fb_like = (1-1e-10)**2, 1-1e-10, 1-1e-10 else: b_bins = bbinsarray[:bbinlengths[j, m], j, m] b_mag = b_photo[:, j] b_mag_cut = b_photo_cut[:, j] b_flags = ~np.isnan(b_mag) b_flags_cut = b_flags[b_sky_cut] # Before calling c&f creation routines, check if either # catalogue's band is significantly under-populated # relative to the main catalogue. The obvious case is the # verification of "empty" bands where surveys are made of # portions of sky where bands were not used and others, # but more generally cases where a band detects very few # objects from the overall survey will be caught here too. if (np.sum(b_flags_cut) / len(b_mag_cut) < 0.001 or np.sum(a_flags_cut) / len(a_mag_cut) < 0.001): warnings.warn(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Catalogue a's " f"{cm.a_filt_names[i]} and b's {cm.b_filt_names[j]} have poor overlap, " "skipping c and f generation and falling back to default values.") a_num_photo_cut = np.sum(~np.isnan(a_photo_cut[:, i])) # Pad densities against a literally empty band, just in # case. It should cancel in the c/f ratio of the smaller # of the two -- the one we actually want to pad. na = a_num_photo_cut / area + 1e-10 b_num_photo_cut = np.sum(~np.isnan(b_photo_cut[:, j])) nb = b_num_photo_cut / area + 1e-10 c_prior = min(na, nb) / 2 fa_prior = na - c_prior fb_prior = nb - c_prior c_like, fa_like, fb_like = (1-1e-10)**2, 1-1e-10, 1-1e-10 continue bright_frac, field_frac = cm.int_fracs[[0, 1]] c_prior, c_like, fa_prior, fa_like, fb_prior, fb_like = create_c_and_f( # pylint: disable-next=possibly-used-before-assignment a_mag, b_mag, a_mag_cut, b_mag_cut, a_inds_cut, a_size_cut, b_inds_cut, b_size_cut, # pylint: disable-next=possibly-used-before-assignment a_b_area_cut, b_b_area_cut, a_f_area_cut, b_f_area_cut, a_auf_cdf, b_auf_cdf, a_bins, b_bins, bright_frac, field_frac, a_flags, b_flags, a_flags_cut, b_flags_cut, area) if cm.use_phot_priors and not cm.include_phot_like: # If we only used the create_c_and_f routine to derive # priors, then quickly update likelihoods here. c_like, fa_like, fb_like = (1-1e-10)**2, 1-1e-10, 1-1e-10 # Have to add a very small "fire extinguisher" value to all # likelihoods and priors, to avoid ever having exactly zero # value in either, which would mean all island permutations # were rejected. c_priors[j, i, m] = c_prior + 1e-100 fa_priors[j, i, m] = fa_prior + 1e-10 fb_priors[j, i, m] = fb_prior + 1e-10 # c should be equal to f^2 in the limit of indifference, so # add 1e-10 and (1e-10)^2 to f and c respectively. c_array[:bbinlengths[j, m]-1, :abinlengths[i, m]-1, j, i, m] = c_like + 1e-100 fa_array[:abinlengths[i, m]-1, j, i, m] = fa_like + 1e-10 fb_array[:bbinlengths[j, m]-1, j, i, m] = fb_like + 1e-10 cm.abinsarray = abinsarray cm.abinlengths = abinlengths cm.bbinsarray = bbinsarray cm.bbinlengths = bbinlengths cm.a_sky_inds = a_sky_inds cm.b_sky_inds = b_sky_inds cm.c_priors = c_priors cm.c_array = c_array cm.fa_priors = fa_priors cm.fa_array = fa_array cm.fb_priors = fb_priors cm.fb_array = fb_array
def create_magnitude_bins(cf_points, filts, a_photo, sky_inds): ''' Creates the N-dimensional arrays of single-band photometric bins, and corresponding array lengths. Parameters ---------- cf_points : numpy.ndarray List of the two-dimensional on-sky coordinates defining the centers of each cutout for which the photometric likelihoods should be calculated. filts : list of strings List of the filters to create magnitude bins for in this catalogue. a_photo : numpy.ndarray Photometric detections from which to create magnitude bins. sky_inds : numpy.ndarray Array of indices, showing which on-sky photometric point, from ``cf_points``, each source in the catalogue is closest to. Returns ------- binlengths : numpy.ndarray Two-dimensional array, indicating the length of the magnitude bins for each filter-sky coordinate combination. binsarray : numpy.ndarray Three-dimensional array, containing the values of the magnitude bins for the filter-sky combinations. longbinlen : integer Value of the largest of all filter-sky combinations of ``binlengths``. ''' binlengths = np.empty((len(filts), len(cf_points)), int) for m in range(0, len(cf_points)): sky_cut = sky_inds == m for i in range(0, len(filts)): a = a_photo[sky_cut, i] if np.sum(~np.isnan(a)) > 0: f = make_bins(a[~np.isnan(a)]) else: f = np.array([0]) del a binlengths[i, m] = len(f) longbinlen = np.amax(binlengths) binsarray = np.full(dtype=float, shape=(longbinlen, len(filts), len(cf_points)), fill_value=-1, order='F') for m in range(0, len(cf_points)): sky_cut = sky_inds == m for i in range(0, len(filts)): a = a_photo[sky_cut, i] if np.sum(~np.isnan(a)) > 0: f = make_bins(a[~np.isnan(a)]) else: f = np.array([0]) del a binsarray[:binlengths[i, m], i, m] = f return binlengths, binsarray, longbinlen def make_bins(input_mags): ''' Calculate bins for a catalogue's magnitude distribution, ensuring all stars are in histogram bins of sufficient number statistics. Parameters ---------- input_mags : numpy.ndarray Array of magnitudes of given filter from the specific catalogue, to be placed in a histogram. Returns ------- output_bins : numpy.ndarray Bins for the given catalogue-filter combination that produce robust numbers of sources within each magnitude interval. ''' minamag = np.amin(input_mags) maxamag = np.amax(input_mags) da = 0.1 maxa = min(35, da*np.ceil(maxamag/da)) mina = max(-5, da*np.floor(minamag/da)) na = int(np.ceil((maxa - mina)/da) + 1) output_bins = np.linspace(mina, maxa, na) # If min/max magnitudes that define magnitude bins happen to lie exactly # on a bin edge (i.e., maxamag % da == 0), then just pad bin edge slightly. if np.abs(mina - minamag) < 1e-5: output_bins[0] -= 1e-4 if np.abs(maxa - maxamag) < 1e-5: output_bins[-1] += 1e-4 hist, output_bins = np.histogram(input_mags, bins=output_bins) smalllist = [] # Minimum number statistics in each 1-D bin. minnum = 250 for i in range(0, len(output_bins)-1): if hist[i] < minnum: smalllist.extend([i]) smalllist = np.array(smalllist) dellist = [] if len(smalllist) > 0: for i in smalllist: if i not in dellist: flag = 0 for j in range(i+1, len(output_bins)-1): if np.sum(hist[i:j+1]) > minnum: dellist.extend(list(range(i+1, j+1))) flag = 1 break if flag == 0: dellist.extend(list(range(i+1, len(output_bins)-1))) output_bins = np.delete(output_bins, dellist) return output_bins # pylint: disable-next=too-many-locals,too-many-arguments,too-many-statements def create_c_and_f(a_mag, b_mag, a_mag_cut, b_mag_cut, a_inds, a_size, b_inds, b_size, a_b_area, b_b_area, a_f_area, b_f_area, auf_cdf_a, auf_cdf_b, a_bins, b_bins, bright_frac, field_frac, a_flags, b_flags, a_flags_cut, b_flags_cut, area): ''' Functionality to create the photometric likelihood and priors from a set of photometric data in a given pair of filters. Parameters ---------- a_mag : numpy.ndarray Catalogue "a" magnitudes for the full catalogue, such that indexing from e.g. ``b_inds`` is correct. b_mag : numpy.ndarray Catalogue "b" magnitudes for indexing purposes. a_mag_cut : numpy.ndarray Catalogue "a" magnitudes for sky area in which to determine photometric priors and likelihoods. b_mag_cut : numpy.ndarray Catalogue "b" magnitudes for sources in sky region. a_inds : numpy.ndarray Indices into catalogue "b" for each "a" object, indicating potential overlaps in counterparts. a_size : numpy.ndarray The number of potential overlapping sources in catalogue "b", for each catalogue "a" object. b_inds : numpy.ndarray Overlap indices into catalogue "a" for each catalogue "b" object. b_size : numpy.ndarray Number of overlaps from catalogue "b" into catalogue "a". a_b_area : numpy.ndarray The "bright" error circle area, integrating the joint AUF convolution out to ``expected_frac`` for largest "a"-"b" potential pairing, for each catalogue "a" object. b_b_area : numpy.narray Catalogue b's "bright" error circle area. a_f_area : numpy.ndarray The "field" error circle area, integrating the joint AUF convolution out to ``expected_frac`` for largest "a"-"b" potential pairing, for each catalogue "a" object. b_f_area : numpy.ndarray Catalogue b's "field" error circle area. auf_cdf_a : numpy.ndarray Evaluations of the astrometric uncertainty function, integrated out to the separation of the potential overlaps to all potential counterparts to all catalogue "a" objects. auf_cdf_b : numpy.ndarray Evaluations of the CDF of the AUF, for all potential counterparts in catalogue "a", for each catalogue "b" source. a_bins : numpy.ndarray Array containing the magnitude bins into which to place catalogue "a" sources. b_bins : numpy.ndarray Array containing the magnitude bins into which to place catalogue "b" sources. bright_frac : float Fraction of total probability integral to consider potential counterparts out to, when considering potential overlaps between catalogue-catalogue pairings. field_frac : float Fraction of total probability integral out to which sources are removed from consideration as "field" sources (i.e., they are not assumed to be ruled out as potential counterparts), when considering potential overlaps in pairs of objects between the two catalogues. a_flags : numpy.ndarray Boolean flags for whether a source in catalogue "a" has a detected magnitude in ``a_mag``. b_flags : numpy.ndarray Detection flags for catalogue "b" sources in ``b_mag``. a_flags_cut : numpy.ndarray Boolean flags for whether a source in catalogue "a" has a detected magnitude in ``a_mag_cut``. b_flags_cut : numpy.ndarray Detection flags for catalogue "b" sources in ``b_mag_cut``. area : float Area of sky region for which photometric likelihood and prior are being calculated, in square degrees. Returns ------- nc : float The prior density of counterpart sources between the catalogues. cdmdm : numpy.ndarray Two-dimensional array of the photometric likelihood of counterpart between the two catalogues. nfa : float So-called "field" source density in catalogue "a". fa : numpy.ndarray Probability density array of field sources for catalogue "a". nfb : float Field source density prior for catalogue "b". fb : numpy.ndarray Field source PDF for catalogue "b". ''' a_hist, a_bins = np.histogram(a_mag_cut[a_flags_cut], bins=a_bins) pa = a_hist/(np.sum(a_hist)*np.diff(a_bins)) a_cuts = plf.find_mag_bin_inds(a_mag_cut, a_flags_cut, a_bins) # get_field_dists allows for magnitude slicing, to get f(m | m) instead of f(m), # but when we do want f(m) we just pass two impossible magnitudes as the limits. # Note that the "primary" catalogue passed is the *_mag_cut array, matching # *_inds in length, but the second catalogue has to be the non-cut array such that # counterpart indexes are valid. a_masks = plf.get_field_dists(auf_cdf_a, a_inds, a_size, np.array([bright_frac, field_frac]), a_flags_cut, a_mag_cut, b_flags, b_mag, -999, 999, -999, 999) b_masks = plf.get_field_dists(auf_cdf_b, b_inds, b_size, np.array([bright_frac, field_frac]), b_flags_cut, b_mag_cut, a_flags, a_mag, -999, 999, -999, 999) # Generate our chosen field-frac's field source histograms. a_mask = a_masks[:, 1].astype(bool) b_mask = b_masks[:, 1].astype(bool) a_left = a_mag_cut[a_mask] b_left = b_mag_cut[b_mask] hist, a_bins = np.histogram(a_left, bins=a_bins) fa = hist / (np.sum(hist)*np.diff(a_bins)) hist, b_bins = np.histogram(b_left, bins=b_bins) fb = hist / (np.sum(hist)*np.diff(b_bins)) # Calculate the Nc and two Nf values from our biased densities. # These are generated from the measured density being counterparts (X) # and field density (Y), mitigated by deliberated removed objects inside # our chosen counterpart CDF fraction F and unintentional removal due to # random-chance alignment. measured_density_a = np.sum(a_masks, axis=0) / area measured_density_b = np.sum(b_masks, axis=0) / area # rho = x / a, sig_x = sqrt(x), sig_rho = sig_x * drho/dx = sig_x / a # sig_rho = sqrt(x) / a = sqrt(x / a) / sqrt(a) = sqrt(rho) / sqrt(a) # = sqrt(rho / a). meas_dens_a_uncert = np.sqrt(measured_density_a / area) meas_dens_b_uncert = np.sqrt(measured_density_b / area) # Values for the case where we don't remove anything, and F = 0. This # is used to get a handle on the total combined value of our two contributing # densities, X and Y. tot_density_a = np.sum(a_flags_cut) / area tot_dens_a_uncert = np.sqrt(tot_density_a / area) tot_density_b = np.sum(b_flags_cut) / area tot_dens_b_uncert = np.sqrt(tot_density_b / area) measured_density_a = np.array([tot_density_a, *measured_density_a]) measured_density_b = np.array([tot_density_b, *measured_density_b]) # Add a floor of 1/sq deg in quadrature to avoid empty measurements # causing NaNs. meas_dens_a_uncert = np.sqrt(np.array([tot_dens_a_uncert, *meas_dens_a_uncert])**2 + 1**2) meas_dens_b_uncert = np.sqrt(np.array([tot_dens_b_uncert, *meas_dens_b_uncert])**2 + 1**2) # Filter for completely isolated sources, which will have zero area, # to take a meaningful sample. pc = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95] if np.sum(a_b_area > 0) > 0: aba_perc = np.percentile(a_b_area[a_b_area > 0], pc) else: aba_perc = np.zeros(len(pc), float) if np.sum(a_f_area > 0) > 0: afa_perc = np.percentile(a_f_area[a_f_area > 0], pc) else: afa_perc = np.zeros(len(pc), float) if np.sum(b_b_area > 0) > 0: bba_perc = np.percentile(b_b_area[b_b_area > 0], pc) else: bba_perc = np.zeros(len(pc), float) if np.sum(b_f_area > 0) > 0: bfa_perc = np.percentile(b_f_area[b_f_area > 0], pc) else: bfa_perc = np.zeros(len(pc), float) avg_a_areas = np.array([aba_perc, afa_perc]).T avg_b_areas = np.array([bba_perc, bfa_perc]).T # With a percentile in steps of 10 percentage points, each area will be # equally weighted with 1/10th of the objects. wht_a_areas = 0.1 * np.ones_like(avg_a_areas) wht_b_areas = 0.1 * np.ones_like(avg_b_areas) res = minimize(calculate_prior_densities, args=(measured_density_a, measured_density_b, meas_dens_a_uncert, meas_dens_b_uncert, np.array([bright_frac, field_frac]), avg_a_areas, avg_b_areas, wht_a_areas, wht_b_areas), x0=[1, tot_density_a, tot_density_b], jac=True, method='L-BFGS-B', bounds=[(0, None), (0, None), (0, None)]) nc, nfa, nfb = res.x bm = np.empty((len(b_bins)-1, len(a_bins)-1), float, order='F') z = np.empty(len(a_bins)-1, float) # Average area here might be wrong too, but since we deal in small magnitude # slices the approximations should hold. Again, both a-catalogue magnitude # arrays need to be the "cut" versions to match a_size's shape, but catalogue # b versions need to be full to ensure cross-catalogue indexing works. mag_mask, aa = plf.brightest_mag(auf_cdf_a, a_mag_cut, b_mag, a_inds, a_size, bright_frac, a_b_area, a_flags_cut, b_flags, a_bins) mag_mask = mag_mask.astype(bool) for i in range(0, len(a_bins)-1): hist, b_bins = np.histogram(b_mag[mag_mask[:, i]], bins=b_bins) q = np.sum(mag_mask[:, i]) if q > 0: bm[:, i] = hist/(np.diff(b_bins)*q) else: bm[:, i] = 0 z[i] = np.sum(hist)/np.sum(a_cuts[i]) cdmdm = np.empty((len(b_bins)-1, len(a_bins)-1), float, order='F') for i in range(0, len(a_bins)-1): # Within the loop, we only want to consider a catalogue, effectively, of # "a" sources with magitude [a_bins[i], a_bins[i+1]]. We therefore # get_field_dists with a slice in catalogue a, but no slice in b. b_masks = plf.get_field_dists( auf_cdf_b, b_inds, b_size, np.array([bright_frac, field_frac]), b_flags_cut, b_mag_cut, a_flags, a_mag, -999, 999, a_bins[i], a_bins[i+1]) # However, to calculate the opposite masking -- and get biased-density # measurements of the "field" density of surviving objects in catalogue # "a" with narrow-range brightnesses -- we slice in a still, and not in b, # remembering that in the function call a and b will be swapped, with "a" # now the first-passed catalogue (either a or b). a_masks = plf.get_field_dists( auf_cdf_a, a_inds, a_size, np.array([bright_frac, field_frac]), a_flags_cut, a_mag_cut, b_flags, b_mag, a_bins[i], a_bins[i+1], -999, 999) a_mag_filter = (a_mag_cut >= a_bins[i]) & (a_mag_cut <= a_bins[i+1]) measured_density_a = np.sum(a_masks, axis=0) / area measured_density_b = np.sum(b_masks, axis=0) / area meas_dens_a_uncert = np.sqrt(measured_density_a / area) meas_dens_b_uncert = np.sqrt(measured_density_b / area) tot_density_a = np.sum(a_flags_cut & a_mag_filter) / area tot_dens_a_uncert = np.sqrt(tot_density_a / area) tot_density_b = np.sum(b_flags_cut) / area tot_dens_b_uncert = np.sqrt(tot_density_b / area) measured_density_a = np.array([tot_density_a, *measured_density_a]) measured_density_b = np.array([tot_density_b, *measured_density_b]) meas_dens_a_uncert = np.sqrt(np.array([tot_dens_a_uncert, *meas_dens_a_uncert])**2 + 1**2) meas_dens_b_uncert = np.sqrt(np.array([tot_dens_b_uncert, *meas_dens_b_uncert])**2 + 1**2) # Also filter the a-catalogue objects for being within our magnitude # range, but don't do anything to b, so re-use the previous one. if np.sum((a_b_area > 0) & a_mag_filter) > 0: aba_perc = np.percentile(a_b_area[(a_b_area > 0) & a_mag_filter], pc) else: aba_perc = np.zeros(len(pc), float) if np.sum((a_f_area > 0) & a_mag_filter) > 0: afa_perc = np.percentile(a_f_area[(a_f_area > 0) & a_mag_filter], pc) else: afa_perc = np.zeros(len(pc), float) avg_a_areas = np.array([aba_perc, afa_perc]).T wht_a_areas = 0.1 * np.ones_like(avg_a_areas) res = minimize(calculate_prior_densities, args=(measured_density_a, measured_density_b, meas_dens_a_uncert, meas_dens_b_uncert, np.array([bright_frac, field_frac]), avg_a_areas, avg_b_areas, wht_a_areas, wht_b_areas), x0=[1, tot_density_a, tot_density_b], jac=True, method='L-BFGS-B', bounds=[(0, None), (0, None), (0, None)]) _, _, _nfb = res.x bmask = b_masks[:, 1].astype(bool) b_left = b_mag_cut[bmask] hist, b_bins = np.histogram(b_left, bins=b_bins) _fb = hist / (np.sum(hist)*np.diff(b_bins)) fm = np.append(0, np.cumsum(_fb[:-1] * np.diff(b_bins[:-1]))) for j in range(0, len(b_bins)-1): cm = np.sum(cdmdm[:j, i]*np.diff(b_bins[:j+1])) cdmdm[j, i] = max(0, z[i]*bm[j, i]*np.exp(aa[i]*_nfb*fm[j]) - (1-cm)*aa[i]*_nfb*_fb[j]) # What we get at the end is, as per Naylor et al. (2013), Zc c. They # convert directly to X c, but we have split that out above into Nc # already. Here we therefore don't care about the constant in front # of c(m | m), and simply normalise. if np.sum(cdmdm[:, i] * np.diff(b_bins)) > 0: cdmdm[:, i] /= np.sum(cdmdm[:, i] * np.diff(b_bins)) integral = 0 for i in range(0, len(a_bins)-1): cdmdm[:, i] *= pa[i] integral = integral + np.sum(cdmdm[:, i]*np.diff(b_bins))*(a_bins[i+1] - a_bins[i]) if integral > 0: cdmdm /= integral # If any density is NaN, or Nc and at least one of Nfa or Nfb are # non-positive, then we have to warn and quit, since we won't be able to # recover that below. Similarly, we can't allow for a dependency-based # density condition where Nc is set based on one Nf and then the other Nf # is set based on Nc, since we don't know which counterpart density to use. # pylint: disable-next=too-many-boolean-expressions if (np.any(np.isnan([nc, nfa, nfb])) or (nc <= 0 and (nfa <= 0 or nfb <= 0)) or (nfa <= 0.01 * nc and nc <= 0.01 * nfb) or (nfb <= 0.01 * nc and nc <= 0.01 * nfa)): raise ValueError("Incorrect prior densities, unable to process chunk.") # Otherwise, if simply set to 1% the lowest of the (valid) field densities. if nfa > 0 and nfb > 0 and nc <= 0.01 * min(nfa, nfb): nc = 0.01 * min(nfa, nfb) # We would have conditions something like nfb <= 0 and nc <= 0.01 * nfa # and nfa > 0, to test the asymmetric cases, implicitly requiring nc > 0 to # avoid the raise above, but these will always trigger one of the latter # two error-criteria above. # If either field density is too small, set to 1% the counterpart density, # similarly. nfa <= 0.01 * nc implies nc > 0.01 * nfb or we'd've triggered # the latter two error-criteria again, though, so nc > 0 requires nfb > 0. if nfa <= 0.01 * nc and nc > 0: nfa = 0.01 * nc if nfb <= 0.01 * nc and nc > 0: nfb = 0.01 * nc return nc, cdmdm, nfa, fa, nfb, fb def calculate_prior_densities(model_densities, rho, phi, o_rho, o_phi, fs, as_rho, as_phi, ws_rho, ws_phi): ''' Calculate the joint-catalogue counterpart density and separate catalogue non-matched ("field") densities of two catalogues based on a series of measured densities. In each case, objects within a particular integrated fraction of object pairs being counterparts given their separation and corresponding positional uncertainties are removed from consideration, and "surviving" object densities are computed, to break the degeneracy between counterparts and non-counterparts. Parameters ---------- model_densities : list or numpy.ndarray Three-element array with counterpart, catalogue a's field density, and catalogue b's field density in, evaluated by the minimisation. rho : list or numpy.ndarray Measured densities for catalogue a, at each fraction in ``fs``. phi : list or numpy.ndarray Catalogue b density measurements, corresponding to ``fs``. o_rho : list or numpy.ndarray Uncertainty in measured ``rho`` densities. o_phi : list or numpy.ndarray ``phi`` uncertainties. fs : list or numpy.ndarray Each CDF fraction used to remove potential counterparts (true or otherwise) in measuring ``rho`` and ``phi``. as_rho : list of lists or numpy.ndarray Average "error circle" area, the average area for a radius out to which all objects in catalogue a integrated to get to each ``fs`` CDF, for each CDF fraction. A weighted distribution is given for each ``fs``, corresponding to ``ws_rho``. as_phi : list of lists or numpy.ndarray Average catalogue b error circle area, corresponding to each ``fs`` CDF with an associated weight in ``ws_phi``. ws_rho : list of lists or numpy.ndarray Array of shape ``(X, Y)``, with fractions ``fs`` of length ``Y``. Each fraction has a set of error circle areas with corresponding weights, such that a sum along ``ws_rho[:, i]`` is unity, weighting the ``i``th fraction's error circles. ws_phi : list of lists or numpy.ndarray The joint error circle-fraction weights for catalogue ``phi``. Returns ------- lst_sq : float The chi-squared fit between the model measured-densities, evaluated at each ``fs``, and ``rho`` and ``phi``. lst_sq_grad : numpy.ndarray The gradient of ``lst_sq`` with respect to each element of ``model_densities``. ''' def calculate_dens_and_grad(t, u, v, f_s, a_s, w_s): ''' Calculate the model for measured number counts based on joint-counterpart and randomly aligned source densities of two catalogues. Parameters ---------- t : float Density, sources per square degree, of counterparts u : float Catalogue a's field density. v : float Catalogue b's field density. f_s : list or numpy.ndarray Set of CDF fractions at which catalogue densities have been sampled, removing some percentage of counterparts from the measured densities. a_s : list of lists or numpy.ndarray Set of average "error circle" areas, shape ``(X, Y)``, with ``X`` the number of different error circle areas to weight by and ``Y`` corresponding to each ``f_s`` integral, which accounts for some percentage of removed chance-alignment sources from the measured density. w_s : list of lists or numpy.ndarray Two-dimensional array, shape ``(X, Y)``, the weights for each area for each set of fractions ``f_s``. Returns ------- weighted_model : float The expected calculated overlap density between the two catalogues based on counterpart and non-matching densities and percentile integration for removal from measurements. weighted_model_grad : numpy.ndarray The gradient of the model with respect to ``t``, ``u``, and ``v``. ''' weighted_model = np.zeros((len(f_s)), float) weighted_model_grad = np.zeros((3, len(f_s)), float) for a, w in zip(a_s, w_s): # d/d[tv] o_m_e = -pi r^2 o_m_e = -a o_m_e o_m_e = 1 - e(t, v, a) model = w * (((1 - f_s) * t + u) * o_m_e) dmodel_dt = w * ((1 - f_s) * o_m_e - a * model) dmodel_du = w * o_m_e dmodel_dv = w * -a * model weighted_model += model weighted_model_grad += np.array([dmodel_dt, dmodel_du, dmodel_dv]) return weighted_model, weighted_model_grad def e(g, h, a): ''' Calculate CDF of randomly aligned nearest-neighbour distribution. Parameters ---------- g : float Density of one dataset. h : float Density of second dataset. a : float Area enclosed by radius out to which to consider potential neighbours. Returns ------- float CDF, 1 - exp(-pi r^2 (g + h)). ''' return 1 - np.exp(-a * (g + h)) x, y, z = model_densities # The first measurement we make is for F=0, not passed through the fs array, # but IS passed through rho and phi as their first elements. This is a # simple function, and gives P = X + Y, Q = X + Z. p_0 = x + y q_0 = x + z p_0_grad = np.array([1, 1, 0]) q_0_grad = np.array([1, 0, 1]) # Flip y and z for the two calls! p, p_grad = calculate_dens_and_grad(x, y, z, fs, as_rho, ws_rho) q, q_grad = calculate_dens_and_grad(x, z, y, fs, as_phi, ws_phi) # Reverse the q_grad order, so that what is actually y is the 2nd element. q_grad = np.array([q_grad[0], q_grad[2], q_grad[1]]) p = np.array([p_0, *p]) q = np.array([q_0, *q]) p_grad = np.array([[p_0_grad[0], *p_grad[0]], [p_0_grad[1], *p_grad[1]], [p_0_grad[2], *p_grad[2]]]) q_grad = np.array([[q_0_grad[0], *q_grad[0]], [q_0_grad[1], *q_grad[1]], [q_0_grad[2], *q_grad[2]]]) lst_sq = np.sum((p - rho)**2 / o_rho**2 + (q - phi)**2 / o_phi**2) lst_sq_grad = np.empty(3, float) lst_sq_grad[0] = np.sum(2 * (p - rho) / o_rho**2 * p_grad[0] + 2 * (q - phi) / o_phi**2 * q_grad[0]) lst_sq_grad[1] = np.sum(2 * (p - rho) / o_rho**2 * p_grad[1] + 2 * (q - phi) / o_phi**2 * q_grad[1]) lst_sq_grad[2] = np.sum(2 * (p - rho) / o_rho**2 * p_grad[2] + 2 * (q - phi) / o_phi**2 * q_grad[2]) return lst_sq, lst_sq_grad