Source code for macauff.photometric_likelihood

# Licensed under a 3-clause BSD style license - see LICENSE
'''
This module provides the framework for the creation of the photometric likelihoods
used in the cross-matching of the two catalogues.
'''

import datetime
import sys
import warnings

import numpy as np
from scipy.optimize import minimize

# pylint: disable=import-error,no-name-in-module
from macauff.misc_functions import coord_inside_convex_hull
from macauff.misc_functions_fortran import misc_functions_fortran as mff
from macauff.photometric_likelihood_fortran import photometric_likelihood_fortran as plf

# pylint: enable=import-error,no-name-in-module

__all__ = ['compute_photometric_likelihoods']


# pylint: disable-next=too-many-locals

[docs]
def compute_photometric_likelihoods(cm):
    '''
    Derives the photometric likelihoods and priors for use in the catalogue
    cross-match process.

    Parameters
    ----------
    cm : Class
        The cross-match wrapper, containing all of the necessary metadata to
        perform the cross-match and determine photometric likelihoods.
    '''

    t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Creating c(m, m) and f(m)...")

    t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Distributing sources into sky slices...")
    sys.stdout.flush()

    a_astro = cm.a_astro
    b_astro = cm.b_astro
    a_photo = cm.a_photo
    b_photo = cm.b_photo
    auf_cdf_a = cm.auf_cdf_a
    auf_cdf_b = cm.auf_cdf_b

    a_sky_inds = mff.find_nearest_point(a_astro[:, 0], a_astro[:, 1], cm.cf_region_points[:, 0],
                                        cm.cf_region_points[:, 1])
    b_sky_inds = mff.find_nearest_point(b_astro[:, 0], b_astro[:, 1], cm.cf_region_points[:, 0],
                                        cm.cf_region_points[:, 1])

    t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Making bins...")
    sys.stdout.flush()

    abinlengths, abinsarray, longabinlen = create_magnitude_bins(cm.cf_region_points, cm.a_filt_names,
                                                                 a_photo, a_sky_inds)
    bbinlengths, bbinsarray, longbbinlen = create_magnitude_bins(cm.cf_region_points, cm.b_filt_names,
                                                                 b_photo, b_sky_inds)

    t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Calculating PDFs...")
    sys.stdout.flush()

    c_priors = np.zeros(dtype=float, shape=(len(cm.b_filt_names), len(cm.a_filt_names),
                                            len(cm.cf_region_points)), order='F')
    fa_priors = np.zeros(dtype=float, shape=(len(cm.b_filt_names), len(cm.a_filt_names),
                                             len(cm.cf_region_points)), order='F')
    fb_priors = np.zeros(dtype=float, shape=(len(cm.b_filt_names), len(cm.a_filt_names),
                                             len(cm.cf_region_points)), order='F')
    c_array = np.zeros(dtype=float, shape=(longbbinlen-1, longabinlen-1, len(cm.b_filt_names),
                                           len(cm.a_filt_names), len(cm.cf_region_points)), order='F')
    fa_array = np.zeros(dtype=float, shape=(longabinlen-1, len(cm.b_filt_names), len(cm.a_filt_names),
                                            len(cm.cf_region_points)), order='F')
    fb_array = np.zeros(dtype=float, shape=(longbbinlen-1, len(cm.b_filt_names), len(cm.a_filt_names),
                                            len(cm.cf_region_points)), order='F')

    a_in_joint_area = np.array([
        coord_inside_convex_hull([a_astro[p, 0] + cm.a_hull_x_shift, a_astro[p, 1]], cm.a_hull_points) &
        coord_inside_convex_hull([a_astro[p, 0] + cm.b_hull_x_shift, a_astro[p, 1]], cm.b_hull_points)
        for p in range(len(a_astro))])
    b_in_joint_area = np.array([
        coord_inside_convex_hull([b_astro[p, 0] + cm.a_hull_x_shift, b_astro[p, 1]], cm.a_hull_points) &
        coord_inside_convex_hull([b_astro[p, 0] + cm.b_hull_x_shift, b_astro[p, 1]], cm.b_hull_points)
        for p in range(len(b_astro))])
    for m in range(0, len(cm.cf_region_points)):
        area = cm.cf_areas[m]
        a_sky_cut = (a_sky_inds == m) & a_in_joint_area
        a_photo_cut = a_photo[a_sky_cut]
        if cm.include_phot_like or cm.use_phot_priors:
            a_auf_cdf = auf_cdf_a[:, a_sky_cut]
            a_b_area_cut, a_f_area_cut, a_inds_cut, a_size_cut = (
                cm.ab_area[a_sky_cut], cm.af_area[a_sky_cut], cm.ainds[:, a_sky_cut], cm.asize[a_sky_cut])

        b_sky_cut = (b_sky_inds == m) & b_in_joint_area
        b_photo_cut = b_photo[b_sky_cut]
        if cm.include_phot_like or cm.use_phot_priors:
            b_auf_cdf = auf_cdf_b[:, b_sky_cut]
            b_b_area_cut, b_f_area_cut, b_inds_cut, b_size_cut = (
                cm.bb_area[b_sky_cut], cm.bf_area[b_sky_cut], cm.binds[:, b_sky_cut], cm.bsize[b_sky_cut])

        for i in range(0, len(cm.a_filt_names)):  # pylint: disable=consider-using-enumerate
            if not cm.include_phot_like and not cm.use_phot_priors:
                a_num_photo_cut = np.sum(~np.isnan(a_photo_cut[:, i]))
                na = a_num_photo_cut / area
            else:
                a_bins = abinsarray[:abinlengths[i, m], i, m]
                a_mag = a_photo[:, i]
                a_mag_cut = a_photo_cut[:, i]
                a_flags = ~np.isnan(a_mag)
                a_flags_cut = a_flags[a_sky_cut]
            for j in range(0, len(cm.b_filt_names)):  # pylint: disable=consider-using-enumerate
                if not cm.include_phot_like and not cm.use_phot_priors:
                    b_num_photo_cut = np.sum(~np.isnan(b_photo_cut[:, j]))
                    nb = b_num_photo_cut / area
                    # Without using photometric-based priors, all we can
                    # do is set the prior on one catalogue to 0.5 -- that
                    # is, equal chance of match or non-match; for this we
                    # use the less dense of the two catalogues as our
                    # "one-sided" match. Then, accordingly, we update the
                    # "field" source density of the more dense catalogue
                    # with its corresponding density, based on the input
                    # density and the counterpart density calculated.
                    c_prior = min(na, nb) / 2
                    fa_prior = na - c_prior
                    fb_prior = nb - c_prior
                    # To fake no photometric likelihoods, simply set all
                    # values to one, to cancel in the ratio later.
                    c_like, fa_like, fb_like = (1-1e-10)**2, 1-1e-10, 1-1e-10
                else:
                    b_bins = bbinsarray[:bbinlengths[j, m], j, m]
                    b_mag = b_photo[:, j]
                    b_mag_cut = b_photo_cut[:, j]
                    b_flags = ~np.isnan(b_mag)
                    b_flags_cut = b_flags[b_sky_cut]

                    # Before calling c&f creation routines, check if either
                    # catalogue's band is significantly under-populated
                    # relative to the main catalogue. The obvious case is the
                    # verification of "empty" bands where surveys are made of
                    # portions of sky where bands were not used and others,
                    # but more generally cases where a band detects very few
                    # objects from the overall survey will be caught here too.
                    if (np.sum(b_flags_cut) / len(b_mag_cut) < 0.001 or
                            np.sum(a_flags_cut) / len(a_mag_cut) < 0.001):
                        warnings.warn(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Catalogue a's "
                                      f"{cm.a_filt_names[i]} and b's {cm.b_filt_names[j]} have poor overlap, "
                                      "skipping c and f generation and falling back to default values.")
                        a_num_photo_cut = np.sum(~np.isnan(a_photo_cut[:, i]))
                        # Pad densities against a literally empty band, just in
                        # case. It should cancel in the c/f ratio of the smaller
                        # of the two -- the one we actually want to pad.
                        na = a_num_photo_cut / area + 1e-10
                        b_num_photo_cut = np.sum(~np.isnan(b_photo_cut[:, j]))
                        nb = b_num_photo_cut / area + 1e-10
                        c_prior = min(na, nb) / 2
                        fa_prior = na - c_prior
                        fb_prior = nb - c_prior
                        c_like, fa_like, fb_like = (1-1e-10)**2, 1-1e-10, 1-1e-10
                        continue

                    bright_frac, field_frac = cm.int_fracs[[0, 1]]
                    c_prior, c_like, fa_prior, fa_like, fb_prior, fb_like = create_c_and_f(
                        # pylint: disable-next=possibly-used-before-assignment
                        a_mag, b_mag, a_mag_cut, b_mag_cut, a_inds_cut, a_size_cut, b_inds_cut, b_size_cut,
                        # pylint: disable-next=possibly-used-before-assignment
                        a_b_area_cut, b_b_area_cut, a_f_area_cut, b_f_area_cut, a_auf_cdf, b_auf_cdf, a_bins,
                        b_bins, bright_frac, field_frac, a_flags, b_flags, a_flags_cut, b_flags_cut, area)
                if cm.use_phot_priors and not cm.include_phot_like:
                    # If we only used the create_c_and_f routine to derive
                    # priors, then quickly update likelihoods here.
                    c_like, fa_like, fb_like = (1-1e-10)**2, 1-1e-10, 1-1e-10

                # Have to add a very small "fire extinguisher" value to all
                # likelihoods and priors, to avoid ever having exactly zero
                # value in either, which would mean all island permutations
                # were rejected.
                c_priors[j, i, m] = c_prior + 1e-100
                fa_priors[j, i, m] = fa_prior + 1e-10
                fb_priors[j, i, m] = fb_prior + 1e-10
                # c should be equal to f^2 in the limit of indifference, so
                # add 1e-10 and (1e-10)^2 to f and c respectively.
                c_array[:bbinlengths[j, m]-1,
                        :abinlengths[i, m]-1, j, i, m] = c_like + 1e-100
                fa_array[:abinlengths[i, m]-1, j, i, m] = fa_like + 1e-10
                fb_array[:bbinlengths[j, m]-1, j, i, m] = fb_like + 1e-10
    cm.abinsarray = abinsarray
    cm.abinlengths = abinlengths
    cm.bbinsarray = bbinsarray
    cm.bbinlengths = bbinlengths
    cm.a_sky_inds = a_sky_inds
    cm.b_sky_inds = b_sky_inds
    cm.c_priors = c_priors
    cm.c_array = c_array
    cm.fa_priors = fa_priors
    cm.fa_array = fa_array
    cm.fb_priors = fb_priors
    cm.fb_array = fb_array



def create_magnitude_bins(cf_points, filts, a_photo, sky_inds):
    '''
    Creates the N-dimensional arrays of single-band photometric bins, and
    corresponding array lengths.

    Parameters
    ----------
    cf_points : numpy.ndarray
        List of the two-dimensional on-sky coordinates defining the centers
        of each cutout for which the photometric likelihoods should be
        calculated.
    filts : list of strings
        List of the filters to create magnitude bins for in this catalogue.
    a_photo : numpy.ndarray
        Photometric detections from which to create magnitude bins.
    sky_inds : numpy.ndarray
         Array of indices, showing which on-sky photometric point, from
        ``cf_points``, each source in the catalogue is closest to.

    Returns
    -------
    binlengths : numpy.ndarray
        Two-dimensional array, indicating the length of the magnitude bins for
        each filter-sky coordinate combination.
    binsarray : numpy.ndarray
        Three-dimensional array, containing the values of the magnitude bins for
        the filter-sky combinations.
    longbinlen : integer
        Value of the largest of all filter-sky combinations of ``binlengths``.
    '''
    binlengths = np.empty((len(filts), len(cf_points)), int)

    for m in range(0, len(cf_points)):
        sky_cut = sky_inds == m
        for i in range(0, len(filts)):
            a = a_photo[sky_cut, i]
            if np.sum(~np.isnan(a)) > 0:
                f = make_bins(a[~np.isnan(a)])
            else:
                f = np.array([0])
            del a
            binlengths[i, m] = len(f)

    longbinlen = np.amax(binlengths)

    binsarray = np.full(dtype=float, shape=(longbinlen, len(filts), len(cf_points)), fill_value=-1, order='F')
    for m in range(0, len(cf_points)):
        sky_cut = sky_inds == m
        for i in range(0, len(filts)):
            a = a_photo[sky_cut, i]
            if np.sum(~np.isnan(a)) > 0:
                f = make_bins(a[~np.isnan(a)])
            else:
                f = np.array([0])
            del a
            binsarray[:binlengths[i, m], i, m] = f

    return binlengths, binsarray, longbinlen


def make_bins(input_mags):
    '''
    Calculate bins for a catalogue's magnitude distribution, ensuring all stars
    are in histogram bins of sufficient number statistics.

    Parameters
    ----------
    input_mags : numpy.ndarray
        Array of magnitudes of given filter from the specific catalogue, to be
        placed in a histogram.

    Returns
    -------
    output_bins : numpy.ndarray
        Bins for the given catalogue-filter combination that produce robust
        numbers of sources within each magnitude interval.
    '''
    minamag = np.amin(input_mags)
    maxamag = np.amax(input_mags)
    da = 0.1
    maxa = min(35, da*np.ceil(maxamag/da))
    mina = max(-5, da*np.floor(minamag/da))
    na = int(np.ceil((maxa - mina)/da) + 1)
    output_bins = np.linspace(mina, maxa, na)
    # If min/max magnitudes that define magnitude bins happen to lie exactly
    # on a bin edge (i.e., maxamag % da == 0), then just pad bin edge slightly.
    if np.abs(mina - minamag) < 1e-5:
        output_bins[0] -= 1e-4
    if np.abs(maxa - maxamag) < 1e-5:
        output_bins[-1] += 1e-4

    hist, output_bins = np.histogram(input_mags, bins=output_bins)
    smalllist = []
    # Minimum number statistics in each 1-D bin.
    minnum = 250

    for i in range(0, len(output_bins)-1):
        if hist[i] < minnum:
            smalllist.extend([i])
    smalllist = np.array(smalllist)
    dellist = []
    if len(smalllist) > 0:
        for i in smalllist:
            if i not in dellist:
                flag = 0
                for j in range(i+1, len(output_bins)-1):
                    if np.sum(hist[i:j+1]) > minnum:
                        dellist.extend(list(range(i+1, j+1)))
                        flag = 1
                        break
                if flag == 0:
                    dellist.extend(list(range(i+1, len(output_bins)-1)))
    output_bins = np.delete(output_bins, dellist)

    return output_bins


# pylint: disable-next=too-many-locals,too-many-arguments,too-many-statements
def create_c_and_f(a_mag, b_mag, a_mag_cut, b_mag_cut, a_inds, a_size, b_inds, b_size, a_b_area, b_b_area,
                   a_f_area, b_f_area, auf_cdf_a, auf_cdf_b, a_bins, b_bins, bright_frac, field_frac, a_flags,
                   b_flags, a_flags_cut, b_flags_cut, area):
    '''
    Functionality to create the photometric likelihood and priors from a set
    of photometric data in a given pair of filters.

    Parameters
    ----------
    a_mag : numpy.ndarray
        Catalogue "a" magnitudes for the full catalogue, such that indexing from
        e.g. ``b_inds`` is correct.
    b_mag : numpy.ndarray
        Catalogue "b" magnitudes for indexing purposes.
    a_mag_cut : numpy.ndarray
        Catalogue "a" magnitudes for sky area in which to determine photometric
        priors and likelihoods.
    b_mag_cut : numpy.ndarray
        Catalogue "b" magnitudes for sources in sky region.
    a_inds : numpy.ndarray
        Indices into catalogue "b" for each "a" object, indicating potential
        overlaps in counterparts.
    a_size : numpy.ndarray
        The number of potential overlapping sources in catalogue "b", for each
        catalogue "a" object.
    b_inds : numpy.ndarray
        Overlap indices into catalogue "a" for each catalogue "b" object.
    b_size : numpy.ndarray
        Number of overlaps from catalogue "b" into catalogue "a".
    a_b_area : numpy.ndarray
        The "bright" error circle area, integrating the joint AUF convolution
        out to ``expected_frac`` for largest "a"-"b" potential pairing, for each
        catalogue "a" object.
    b_b_area : numpy.narray
        Catalogue b's "bright" error circle area.
    a_f_area : numpy.ndarray
        The "field" error circle area, integrating the joint AUF convolution
        out to ``expected_frac`` for largest "a"-"b" potential pairing, for each
        catalogue "a" object.
    b_f_area : numpy.ndarray
        Catalogue b's "field" error circle area.
    auf_cdf_a : numpy.ndarray
        Evaluations of the astrometric uncertainty function, integrated out to
        the separation of the potential overlaps to all potential counterparts
        to all catalogue "a" objects.
    auf_cdf_b : numpy.ndarray
        Evaluations of the CDF of the AUF, for all potential counterparts in
        catalogue "a", for each catalogue "b" source.
    a_bins : numpy.ndarray
        Array containing the magnitude bins into which to place catalogue "a"
        sources.
    b_bins : numpy.ndarray
        Array containing the magnitude bins into which to place catalogue "b"
        sources.
    bright_frac : float
        Fraction of total probability integral to consider potential counterparts
        out to, when considering potential overlaps between catalogue-catalogue
        pairings.
    field_frac : float
        Fraction of total probability integral out to which sources are removed
        from consideration as "field" sources (i.e., they are not assumed to be
        ruled out as potential counterparts), when considering potential overlaps
        in pairs of objects between the two catalogues.
    a_flags : numpy.ndarray
        Boolean flags for whether a source in catalogue "a" has a detected
        magnitude in ``a_mag``.
    b_flags : numpy.ndarray
        Detection flags for catalogue "b" sources in ``b_mag``.
    a_flags_cut : numpy.ndarray
        Boolean flags for whether a source in catalogue "a" has a detected
        magnitude in ``a_mag_cut``.
    b_flags_cut : numpy.ndarray
        Detection flags for catalogue "b" sources in ``b_mag_cut``.
    area : float
        Area of sky region for which photometric likelihood and prior are
        being calculated, in square degrees.

    Returns
    -------
    nc : float
        The prior density of counterpart sources between the catalogues.
    cdmdm : numpy.ndarray
        Two-dimensional array of the photometric likelihood of counterpart between
        the two catalogues.
    nfa : float
        So-called "field" source density in catalogue "a".
    fa : numpy.ndarray
        Probability density array of field sources for catalogue "a".
    nfb : float
        Field source density prior for catalogue "b".
    fb : numpy.ndarray
        Field source PDF for catalogue "b".
    '''
    a_hist, a_bins = np.histogram(a_mag_cut[a_flags_cut], bins=a_bins)
    pa = a_hist/(np.sum(a_hist)*np.diff(a_bins))

    a_cuts = plf.find_mag_bin_inds(a_mag_cut, a_flags_cut, a_bins)

    # get_field_dists allows for magnitude slicing, to get f(m | m) instead of f(m),
    # but when we do want f(m) we just pass two impossible magnitudes as the limits.
    # Note that the "primary" catalogue passed is the *_mag_cut array, matching
    # *_inds in length, but the second catalogue has to be the non-cut array such that
    # counterpart indexes are valid.
    a_masks = plf.get_field_dists(auf_cdf_a, a_inds, a_size, np.array([bright_frac, field_frac]), a_flags_cut,
                                  a_mag_cut, b_flags, b_mag, -999, 999, -999, 999)
    b_masks = plf.get_field_dists(auf_cdf_b, b_inds, b_size, np.array([bright_frac, field_frac]), b_flags_cut,
                                  b_mag_cut, a_flags, a_mag, -999, 999, -999, 999)

    # Generate our chosen field-frac's field source histograms.
    a_mask = a_masks[:, 1].astype(bool)
    b_mask = b_masks[:, 1].astype(bool)
    a_left = a_mag_cut[a_mask]
    b_left = b_mag_cut[b_mask]

    hist, a_bins = np.histogram(a_left, bins=a_bins)
    fa = hist / (np.sum(hist)*np.diff(a_bins))

    hist, b_bins = np.histogram(b_left, bins=b_bins)
    fb = hist / (np.sum(hist)*np.diff(b_bins))

    # Calculate the Nc and two Nf values from our biased densities.
    # These are generated from the measured density being counterparts (X)
    # and field density (Y), mitigated by deliberated removed objects inside
    # our chosen counterpart CDF fraction F and unintentional removal due to
    # random-chance alignment.
    measured_density_a = np.sum(a_masks, axis=0) / area
    measured_density_b = np.sum(b_masks, axis=0) / area
    # rho = x / a, sig_x = sqrt(x), sig_rho = sig_x * drho/dx = sig_x / a
    # sig_rho = sqrt(x) / a = sqrt(x / a) / sqrt(a) = sqrt(rho) / sqrt(a)
    # = sqrt(rho / a).
    meas_dens_a_uncert = np.sqrt(measured_density_a / area)
    meas_dens_b_uncert = np.sqrt(measured_density_b / area)

    # Values for the case where we don't remove anything, and F = 0. This
    # is used to get a handle on the total combined value of our two contributing
    # densities, X and Y.
    tot_density_a = np.sum(a_flags_cut) / area
    tot_dens_a_uncert = np.sqrt(tot_density_a / area)
    tot_density_b = np.sum(b_flags_cut) / area
    tot_dens_b_uncert = np.sqrt(tot_density_b / area)

    measured_density_a = np.array([tot_density_a, *measured_density_a])
    measured_density_b = np.array([tot_density_b, *measured_density_b])
    # Add a floor of 1/sq deg in quadrature to avoid empty measurements
    # causing NaNs.
    meas_dens_a_uncert = np.sqrt(np.array([tot_dens_a_uncert, *meas_dens_a_uncert])**2 + 1**2)
    meas_dens_b_uncert = np.sqrt(np.array([tot_dens_b_uncert, *meas_dens_b_uncert])**2 + 1**2)

    # Filter for completely isolated sources, which will have zero area,
    # to take a meaningful sample.
    pc = [5, 15, 25, 35, 45, 55, 65, 75, 85, 95]
    if np.sum(a_b_area > 0) > 0:
        aba_perc = np.percentile(a_b_area[a_b_area > 0], pc)
    else:
        aba_perc = np.zeros(len(pc), float)
    if np.sum(a_f_area > 0) > 0:
        afa_perc = np.percentile(a_f_area[a_f_area > 0], pc)
    else:
        afa_perc = np.zeros(len(pc), float)
    if np.sum(b_b_area > 0) > 0:
        bba_perc = np.percentile(b_b_area[b_b_area > 0], pc)
    else:
        bba_perc = np.zeros(len(pc), float)
    if np.sum(b_f_area > 0) > 0:
        bfa_perc = np.percentile(b_f_area[b_f_area > 0], pc)
    else:
        bfa_perc = np.zeros(len(pc), float)
    avg_a_areas = np.array([aba_perc, afa_perc]).T
    avg_b_areas = np.array([bba_perc, bfa_perc]).T

    # With a percentile in steps of 10 percentage points, each area will be
    # equally weighted with 1/10th of the objects.
    wht_a_areas = 0.1 * np.ones_like(avg_a_areas)
    wht_b_areas = 0.1 * np.ones_like(avg_b_areas)

    res = minimize(calculate_prior_densities, args=(measured_density_a, measured_density_b,
                   meas_dens_a_uncert, meas_dens_b_uncert, np.array([bright_frac, field_frac]), avg_a_areas,
                   avg_b_areas, wht_a_areas, wht_b_areas), x0=[1, tot_density_a, tot_density_b], jac=True,
                   method='L-BFGS-B', bounds=[(0, None), (0, None), (0, None)])

    nc, nfa, nfb = res.x

    bm = np.empty((len(b_bins)-1, len(a_bins)-1), float, order='F')
    z = np.empty(len(a_bins)-1, float)

    # Average area here might be wrong too, but since we deal in small magnitude
    # slices the approximations should hold. Again, both a-catalogue magnitude
    # arrays need to be the "cut" versions to match a_size's shape, but catalogue
    # b versions need to be full to ensure cross-catalogue indexing works.
    mag_mask, aa = plf.brightest_mag(auf_cdf_a, a_mag_cut, b_mag, a_inds, a_size, bright_frac, a_b_area,
                                     a_flags_cut, b_flags, a_bins)
    mag_mask = mag_mask.astype(bool)
    for i in range(0, len(a_bins)-1):
        hist, b_bins = np.histogram(b_mag[mag_mask[:, i]], bins=b_bins)
        q = np.sum(mag_mask[:, i])
        if q > 0:
            bm[:, i] = hist/(np.diff(b_bins)*q)
        else:
            bm[:, i] = 0
        z[i] = np.sum(hist)/np.sum(a_cuts[i])
    cdmdm = np.empty((len(b_bins)-1, len(a_bins)-1), float, order='F')
    for i in range(0, len(a_bins)-1):
        # Within the loop, we only want to consider a catalogue, effectively, of
        # "a" sources with magitude [a_bins[i], a_bins[i+1]]. We therefore
        # get_field_dists with a slice in catalogue a, but no slice in b.
        b_masks = plf.get_field_dists(
            auf_cdf_b, b_inds, b_size, np.array([bright_frac, field_frac]), b_flags_cut, b_mag_cut, a_flags,
            a_mag, -999, 999, a_bins[i], a_bins[i+1])
        # However, to calculate the opposite masking -- and get biased-density
        # measurements of the "field" density of surviving objects in catalogue
        # "a" with narrow-range brightnesses -- we slice in a still, and not in b,
        # remembering that in the function call a and b will be swapped, with "a"
        # now the first-passed catalogue (either a or b).
        a_masks = plf.get_field_dists(
            auf_cdf_a, a_inds, a_size, np.array([bright_frac, field_frac]), a_flags_cut, a_mag_cut, b_flags,
            b_mag, a_bins[i], a_bins[i+1], -999, 999)

        a_mag_filter = (a_mag_cut >= a_bins[i]) & (a_mag_cut <= a_bins[i+1])

        measured_density_a = np.sum(a_masks, axis=0) / area
        measured_density_b = np.sum(b_masks, axis=0) / area
        meas_dens_a_uncert = np.sqrt(measured_density_a / area)
        meas_dens_b_uncert = np.sqrt(measured_density_b / area)
        tot_density_a = np.sum(a_flags_cut & a_mag_filter) / area
        tot_dens_a_uncert = np.sqrt(tot_density_a / area)
        tot_density_b = np.sum(b_flags_cut) / area
        tot_dens_b_uncert = np.sqrt(tot_density_b / area)

        measured_density_a = np.array([tot_density_a, *measured_density_a])
        measured_density_b = np.array([tot_density_b, *measured_density_b])
        meas_dens_a_uncert = np.sqrt(np.array([tot_dens_a_uncert, *meas_dens_a_uncert])**2 + 1**2)
        meas_dens_b_uncert = np.sqrt(np.array([tot_dens_b_uncert, *meas_dens_b_uncert])**2 + 1**2)

        # Also filter the a-catalogue objects for being within our magnitude
        # range, but don't do anything to b, so re-use the previous one.
        if np.sum((a_b_area > 0) & a_mag_filter) > 0:
            aba_perc = np.percentile(a_b_area[(a_b_area > 0) & a_mag_filter], pc)
        else:
            aba_perc = np.zeros(len(pc), float)
        if np.sum((a_f_area > 0) & a_mag_filter) > 0:
            afa_perc = np.percentile(a_f_area[(a_f_area > 0) & a_mag_filter], pc)
        else:
            afa_perc = np.zeros(len(pc), float)
        avg_a_areas = np.array([aba_perc, afa_perc]).T
        wht_a_areas = 0.1 * np.ones_like(avg_a_areas)

        res = minimize(calculate_prior_densities, args=(measured_density_a, measured_density_b,
                       meas_dens_a_uncert, meas_dens_b_uncert, np.array([bright_frac, field_frac]),
                       avg_a_areas, avg_b_areas, wht_a_areas, wht_b_areas),
                       x0=[1, tot_density_a, tot_density_b], jac=True, method='L-BFGS-B',
                       bounds=[(0, None), (0, None), (0, None)])

        _, _, _nfb = res.x

        bmask = b_masks[:, 1].astype(bool)
        b_left = b_mag_cut[bmask]
        hist, b_bins = np.histogram(b_left, bins=b_bins)
        _fb = hist / (np.sum(hist)*np.diff(b_bins))

        fm = np.append(0, np.cumsum(_fb[:-1] * np.diff(b_bins[:-1])))
        for j in range(0, len(b_bins)-1):
            cm = np.sum(cdmdm[:j, i]*np.diff(b_bins[:j+1]))
            cdmdm[j, i] = max(0, z[i]*bm[j, i]*np.exp(aa[i]*_nfb*fm[j]) - (1-cm)*aa[i]*_nfb*_fb[j])
        # What we get at the end is, as per Naylor et al. (2013), Zc c. They
        # convert directly to X c, but we have split that out above into Nc
        # already. Here we therefore don't care about the constant in front
        # of c(m | m), and simply normalise.
        if np.sum(cdmdm[:, i] * np.diff(b_bins)) > 0:
            cdmdm[:, i] /= np.sum(cdmdm[:, i] * np.diff(b_bins))

    integral = 0
    for i in range(0, len(a_bins)-1):
        cdmdm[:, i] *= pa[i]
        integral = integral + np.sum(cdmdm[:, i]*np.diff(b_bins))*(a_bins[i+1] - a_bins[i])
    if integral > 0:
        cdmdm /= integral

    # If any density is NaN, or Nc and at least one of Nfa or Nfb are
    # non-positive, then we have to warn and quit, since we won't be able to
    # recover that below. Similarly, we can't allow for a dependency-based
    # density condition where Nc is set based on one Nf and then the other Nf
    # is set based on Nc, since we don't know which counterpart density to use.
    # pylint: disable-next=too-many-boolean-expressions
    if (np.any(np.isnan([nc, nfa, nfb])) or (nc <= 0 and (nfa <= 0 or nfb <= 0)) or
            (nfa <= 0.01 * nc and nc <= 0.01 * nfb) or (nfb <= 0.01 * nc and nc <= 0.01 * nfa)):
        raise ValueError("Incorrect prior densities, unable to process chunk.")
    # Otherwise, if simply set to 1% the lowest of the (valid) field densities.
    if nfa > 0 and nfb > 0 and nc <= 0.01 * min(nfa, nfb):
        nc = 0.01 * min(nfa, nfb)
    # We would have conditions something like nfb <= 0 and nc <= 0.01 * nfa
    # and nfa > 0, to test the asymmetric cases, implicitly requiring nc > 0 to
    # avoid the raise above, but these will always trigger one of the latter
    # two error-criteria above.

    # If either field density is too small, set to 1% the counterpart density,
    # similarly. nfa <= 0.01 * nc implies nc > 0.01 * nfb or we'd've triggered
    # the latter two error-criteria again, though, so nc > 0 requires nfb > 0.
    if nfa <= 0.01 * nc and nc > 0:
        nfa = 0.01 * nc
    if nfb <= 0.01 * nc and nc > 0:
        nfb = 0.01 * nc

    return nc, cdmdm, nfa, fa, nfb, fb


def calculate_prior_densities(model_densities, rho, phi, o_rho, o_phi, fs, as_rho, as_phi, ws_rho, ws_phi):
    '''
    Calculate the joint-catalogue counterpart density and separate catalogue
    non-matched ("field") densities of two catalogues based on a series of
    measured densities. In each case, objects within a particular integrated
    fraction of object pairs being counterparts given their separation and
    corresponding positional uncertainties are removed from consideration,
    and "surviving" object densities are computed, to break the degeneracy
    between counterparts and non-counterparts.

    Parameters
    ----------
    model_densities : list or numpy.ndarray
        Three-element array with counterpart, catalogue a's field density,
        and catalogue b's field density in, evaluated by the minimisation.
    rho : list or numpy.ndarray
        Measured densities for catalogue a, at each fraction in ``fs``.
    phi : list or numpy.ndarray
        Catalogue b density measurements, corresponding to ``fs``.
    o_rho : list or numpy.ndarray
        Uncertainty in measured ``rho`` densities.
    o_phi : list or numpy.ndarray
        ``phi`` uncertainties.
    fs : list or numpy.ndarray
        Each CDF fraction used to remove potential counterparts (true or
        otherwise) in measuring ``rho`` and ``phi``.
    as_rho : list of lists or numpy.ndarray
        Average "error circle" area, the average area for a radius out to which
        all objects in catalogue a integrated to get to each ``fs`` CDF, for
        each CDF fraction. A weighted distribution is given for each ``fs``,
        corresponding to ``ws_rho``.
    as_phi : list of lists or numpy.ndarray
        Average catalogue b error circle area, corresponding to each
        ``fs`` CDF with an associated weight in ``ws_phi``.
    ws_rho : list of lists or numpy.ndarray
        Array of shape ``(X, Y)``, with fractions ``fs`` of length ``Y``. Each
        fraction has a set of error circle areas with corresponding weights,
        such that a sum along ``ws_rho[:, i]`` is unity, weighting the ``i``th
        fraction's error circles.
    ws_phi : list of lists or numpy.ndarray
        The joint error circle-fraction weights for catalogue ``phi``.

    Returns
    -------
    lst_sq : float
        The chi-squared fit between the model measured-densities, evaluated at
        each ``fs``, and ``rho`` and ``phi``.
    lst_sq_grad : numpy.ndarray
        The gradient of ``lst_sq`` with respect to each element of
        ``model_densities``.
    '''
    def calculate_dens_and_grad(t, u, v, f_s, a_s, w_s):
        '''
        Calculate the model for measured number counts based on
        joint-counterpart and randomly aligned source densities of two
        catalogues.

        Parameters
        ----------
        t : float
            Density, sources per square degree, of counterparts
        u : float
            Catalogue a's field density.
        v : float
            Catalogue b's field density.
        f_s : list or numpy.ndarray
            Set of CDF fractions at which catalogue densities have been sampled,
            removing some percentage of counterparts from the measured densities.
        a_s : list of lists or numpy.ndarray
            Set of average "error circle" areas, shape ``(X, Y)``, with ``X`` the
            number of different error circle areas to weight by and ``Y``
            corresponding to each ``f_s`` integral, which accounts for some
            percentage of removed chance-alignment sources from the measured
            density.
        w_s : list of lists or numpy.ndarray
            Two-dimensional array, shape ``(X, Y)``, the weights for each area
            for each set of fractions ``f_s``.

        Returns
        -------
        weighted_model : float
            The expected calculated overlap density between the two catalogues
            based on counterpart and non-matching densities and percentile
            integration for removal from measurements.
        weighted_model_grad : numpy.ndarray
            The gradient of the model with respect to ``t``, ``u``, and ``v``.
        '''
        weighted_model = np.zeros((len(f_s)), float)
        weighted_model_grad = np.zeros((3, len(f_s)), float)
        for a, w in zip(a_s, w_s):
            # d/d[tv] o_m_e = -pi r^2 o_m_e = -a o_m_e
            o_m_e = 1 - e(t, v, a)
            model = w * (((1 - f_s) * t + u) * o_m_e)

            dmodel_dt = w * ((1 - f_s) * o_m_e - a * model)
            dmodel_du = w * o_m_e
            dmodel_dv = w * -a * model

            weighted_model += model
            weighted_model_grad += np.array([dmodel_dt, dmodel_du, dmodel_dv])

        return weighted_model, weighted_model_grad

    def e(g, h, a):
        '''
        Calculate CDF of randomly aligned nearest-neighbour distribution.

        Parameters
        ----------
        g : float
            Density of one dataset.
        h : float
            Density of second dataset.
        a : float
            Area enclosed by radius out to which to consider potential
            neighbours.

        Returns
        -------
        float
            CDF, 1 - exp(-pi r^2 (g + h)).
        '''
        return 1 - np.exp(-a * (g + h))

    x, y, z = model_densities
    # The first measurement we make is for F=0, not passed through the fs array,
    # but IS passed through rho and phi as their first elements. This is a
    # simple function, and gives P = X + Y, Q = X + Z.
    p_0 = x + y
    q_0 = x + z
    p_0_grad = np.array([1, 1, 0])
    q_0_grad = np.array([1, 0, 1])
    # Flip y and z for the two calls!
    p, p_grad = calculate_dens_and_grad(x, y, z, fs, as_rho, ws_rho)
    q, q_grad = calculate_dens_and_grad(x, z, y, fs, as_phi, ws_phi)
    # Reverse the q_grad order, so that what is actually y is the 2nd element.
    q_grad = np.array([q_grad[0], q_grad[2], q_grad[1]])

    p = np.array([p_0, *p])
    q = np.array([q_0, *q])
    p_grad = np.array([[p_0_grad[0], *p_grad[0]], [p_0_grad[1], *p_grad[1]], [p_0_grad[2], *p_grad[2]]])
    q_grad = np.array([[q_0_grad[0], *q_grad[0]], [q_0_grad[1], *q_grad[1]], [q_0_grad[2], *q_grad[2]]])

    lst_sq = np.sum((p - rho)**2 / o_rho**2 + (q - phi)**2 / o_phi**2)
    lst_sq_grad = np.empty(3, float)
    lst_sq_grad[0] = np.sum(2 * (p - rho) / o_rho**2 * p_grad[0] + 2 * (q - phi) / o_phi**2 * q_grad[0])
    lst_sq_grad[1] = np.sum(2 * (p - rho) / o_rho**2 * p_grad[1] + 2 * (q - phi) / o_phi**2 * q_grad[1])
    lst_sq_grad[2] = np.sum(2 * (p - rho) / o_rho**2 * p_grad[2] + 2 * (q - phi) / o_phi**2 * q_grad[2])

    return lst_sq, lst_sq_grad