# Licensed under a 3-clause BSD style license - see LICENSE
'''
This module provides the framework for grouping sources from two photometric catalogues into
distinct "islands" of sources, along with calculating whether they are within overlap for
various photometric integral purposes.
'''
import datetime
import sys
import numpy as np
# pylint: disable=import-error,no-name-in-module
from macauff.group_sources_fortran import group_sources_fortran as gsf
from macauff.make_set_list import set_list
from macauff.misc_functions import calculate_overlap_counts, convex_hull_area
from macauff.misc_functions_fortran import misc_functions_fortran as mff
# pylint: enable=import-error,no-name-in-module
__all__ = ['make_island_groupings']
# pylint: disable-next=too-many-locals,too-many-statements
[docs]
def make_island_groupings(cm):
'''
Function to handle the creation of "islands" of astrometrically coeval
sources, and identify which overlap to some probability based on their
combined AUFs.
Parameters
----------
cm : Class
The cross-match wrapper, containing all of the necessary metadata to
perform the cross-match and determine match islands.
'''
# Convert from arcseconds to degrees internally.
max_sep = np.copy(cm.pos_corr_dist) / 3600
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Calculating maximum overlap...")
sys.stdout.flush()
# Load the astrometry of each catalogue.
a_full = cm.a_astro
b_full = cm.b_astro
# The initial step to create island groupings is to find the largest number
# of overlaps for a single source, to minimise the size of the array of
# overlap indices.
_ainds = calculate_overlap_counts(a_full, b_full, -999, 999, max_sep, cm.n_pool, np.nan, 0, 1,
cm.cf_region_frame, 'array', cm.chunk_id)
_binds = calculate_overlap_counts(b_full, a_full, -999, 999, max_sep, cm.n_pool, np.nan, 0, 1,
cm.cf_region_frame, 'array', cm.chunk_id)
amaxsize = int(np.amax([len(x) for x in _ainds]))
bmaxsize = int(np.amax([len(x) for x in _binds]))
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Truncating star overlaps by AUF integral...")
sys.stdout.flush()
ainds = np.ones(dtype=int, shape=(amaxsize, len(a_full)), order='F') * -1
binds = np.ones(dtype=int, shape=(bmaxsize, len(b_full)), order='F') * -1
# Populate the indices into a fortran-acceptable grid, rather than nested list.
for i, x in enumerate(_ainds):
ainds[:len(x), i] = x
for i, x in enumerate(_binds):
binds[:len(x), i] = x
asize = np.array([np.sum(ainds[:, i] >= 0) for i in range(ainds.shape[1])])
bsize = np.array([np.sum(binds[:, i] >= 0) for i in range(binds.shape[1])])
ainds, binds, asize, bsize, auf_cdf_a, auf_cdf_b = gsf.get_overlap_indices(
a_full[:, 0], a_full[:, 1], b_full[:, 0], b_full[:, 1], ainds, asize, binds, bsize, amaxsize,
bmaxsize, a_full[:, 2], b_full[:, 2], cm.r[:-1]+cm.dr/2, cm.rho[:-1], cm.drho, cm.j1s,
cm.a_perturb_auf_outputs['fourier_grid'], cm.b_perturb_auf_outputs['fourier_grid'], cm.a_modelrefinds,
cm.b_modelrefinds, cm.int_fracs[2])
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Cleaning overlaps...")
sys.stdout.flush()
# Clean arrays for any potential unnecessary extra rows caused by the largest
# overlap number being reduced due to the additional criteria for match in
# get_overlap_indices vs a naive radius-based search. In some cases we will
# then have e.g. np.all(ainds[-1, :] == -1) evaluating to True, and might
# as well get rid of the extraneous column.
ainds = np.asfortranarray(ainds[:np.amax(asize), :])
binds = np.asfortranarray(binds[:np.amax(bsize), :])
amaxsize = int(np.amax(asize))
bmaxsize = int(np.amax(bsize))
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Calculating integral lengths...")
sys.stdout.flush()
if cm.include_phot_like or cm.use_phot_priors:
a_err = a_full[:, 2]
b_err = b_full[:, 2]
a_fouriergrid = cm.a_perturb_auf_outputs['fourier_grid']
b_fouriergrid = cm.b_perturb_auf_outputs['fourier_grid']
a_int_areas = gsf.get_integral_length(
a_err, b_err, cm.r[:-1]+cm.dr/2, cm.rho[:-1], cm.drho, cm.j1s, a_fouriergrid, b_fouriergrid,
cm.a_modelrefinds, cm.b_modelrefinds, ainds, asize, cm.int_fracs[0:2])
ab_area = a_int_areas[:, 0]
af_area = a_int_areas[:, 1]
b_int_areas = gsf.get_integral_length(
b_err, a_err, cm.r[:-1]+cm.dr/2, cm.rho[:-1], cm.drho, cm.j1s, b_fouriergrid, a_fouriergrid,
cm.b_modelrefinds, cm.a_modelrefinds, binds, bsize, cm.int_fracs[0:2])
bb_area = b_int_areas[:, 0]
bf_area = b_int_areas[:, 1]
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Maximum overlaps are:", amaxsize, bmaxsize)
sys.stdout.flush()
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Finding unique sets...")
sys.stdout.flush()
set_list_items = set_list(ainds, binds, asize, bsize, cm.n_pool)
if len(set_list_items) == 6:
alist, blist, agrplen, bgrplen, areject, breject = set_list_items
reject_flag = True
else:
alist, blist, agrplen, bgrplen = set_list_items # pylint: disable=unbalanced-tuple-unpacking
reject_flag = False
# The final act of creating island groups is to clear out any sources too
# close to the edge of the catalogue -- defined by its rectangular extend.
# pylint: disable-next=fixme
# TODO: add flag for allowing the keeping of potentially incomplete islands
# in the main catalogue; here we default to, and only allow, their removal.
passed_check = np.zeros(dtype=bool, shape=(alist.shape[1],))
failed_check = np.ones(dtype=bool, shape=(alist.shape[1],))
num_a_failed_checks = 0
num_b_failed_checks = 0
_, a_hull_points, a_hull_x_shift = convex_hull_area(
a_full[:, 0], a_full[:, 1], return_hull=True)
_, b_hull_points, b_hull_x_shift = convex_hull_area(
b_full[:, 0], b_full[:, 1], return_hull=True)
cm.a_hull_points = a_hull_points
cm.a_hull_x_shift = a_hull_x_shift
cm.b_hull_points = b_hull_points
cm.b_hull_x_shift = b_hull_x_shift
# "Pad factor" to allow for a percentage of missing search circle in cases
# where e.g. we are 0-360 wrapped and have a "missing" meridian slice.
# acceptable_lost_circle_frac here is equal to A_segment / A_circle, giving
# an inflation factor for our search radius x \equiv d/r.
acceptable_lost_circle_frac = 0.1
x = np.linspace(0, 1, 1001)
f = np.arccos(x) - x * np.sqrt(1 - x**2)
ind = np.argmin(np.abs(f - np.pi * acceptable_lost_circle_frac))
new_max_sep = max_sep / x[ind]
seed = np.random.default_rng().choice(100000, size=(mff.get_random_seed_size(), len(a_full)))
a_hull_radius_overlap = mff.get_circle_area_overlap(
a_full[:, 0] + a_hull_x_shift, a_full[:, 1], new_max_sep,
np.append(a_hull_points[:, 0], a_hull_points[0, 0]),
np.append(a_hull_points[:, 1], a_hull_points[0, 1]), seed)
a_hull_overlap_frac = a_hull_radius_overlap / (np.pi * new_max_sep**2)
seed = np.random.default_rng().choice(100000, size=(mff.get_random_seed_size(), len(b_full)))
b_hull_radius_overlap = mff.get_circle_area_overlap(
b_full[:, 0] + b_hull_x_shift, b_full[:, 1], new_max_sep,
np.append(b_hull_points[:, 0], b_hull_points[0, 0]),
np.append(b_hull_points[:, 1], b_hull_points[0, 1]), seed)
b_hull_overlap_frac = b_hull_radius_overlap / (np.pi * new_max_sep**2)
for i in range(alist.shape[1]):
ahof = a_hull_overlap_frac[alist[:agrplen[i], i]]
bhof = b_hull_overlap_frac[blist[:bgrplen[i], i]]
# A failed check would be any where *hof is too small, and hence we
# lost some of the circle to being outside of the hull. If no objects
# have fractions that are below some critical value then we can keep
# the island.
dist_check = np.all(ahof > (1 - acceptable_lost_circle_frac)) & np.all(
bhof > (1 - acceptable_lost_circle_frac))
if dist_check:
passed_check[i] = 1
failed_check[i] = 0
else:
# While "good" islands just need their total number incrementing
# for the group, "failed" islands we need to track the number of
# sources in each catalogue for.
num_a_failed_checks += agrplen[i]
num_b_failed_checks += bgrplen[i]
# If set_list returned any rejected sources, then add any sources too close
# to match extent to those now. Ensure that we only reject the unique source IDs
# across each island group, ignoring the "default" -1 index.
if reject_flag:
a_first_rejected_len = len(areject) # pylint: disable=possibly-used-before-assignment
else:
a_first_rejected_len = 0
if num_a_failed_checks + a_first_rejected_len > 0:
reject_a = np.zeros(dtype=int, shape=(num_a_failed_checks+a_first_rejected_len,))
if reject_flag:
reject_a[num_a_failed_checks:] = areject # pylint: disable=used-before-assignment
if reject_flag:
b_first_rejected_len = len(breject) # pylint: disable=possibly-used-before-assignment
else:
b_first_rejected_len = 0
if num_b_failed_checks + b_first_rejected_len > 0:
reject_b = np.zeros(dtype=int, shape=(num_b_failed_checks+b_first_rejected_len,))
if reject_flag:
reject_b[num_b_failed_checks:] = breject # pylint: disable=used-before-assignment
if reject_flag:
alist_reject = alist[:, failed_check]
reject_a[:num_a_failed_checks] = alist_reject[alist_reject > -1]
blist_reject = blist[:, failed_check]
reject_b[:num_b_failed_checks] = blist_reject[blist_reject > -1]
else:
reject_a = alist[:, failed_check]
reject_a = reject_a[reject_a > -1]
reject_b = blist[:, failed_check]
reject_b = reject_b[reject_b > -1]
# This should basically be alist = alist[:, passed_check] and
# agrplen = agrplen[passed_check], simply removing those above failed
# islands from the list, analagous to the same functionality in set_list.
alist = alist[:, passed_check]
agrplen = agrplen[passed_check]
blist = blist[:, passed_check]
bgrplen = bgrplen[passed_check]
# Only return a[bf]_area and b[bf]_area if they were created
if not (cm.include_phot_like or cm.use_phot_priors):
ab_area = bb_area = None
af_area = bf_area = None
auf_cdf_a = auf_cdf_b = None
# Only return reject counts if they were created
if num_a_failed_checks + a_first_rejected_len > 0:
lenrejecta = len(reject_a)
# Save rejects output files.
cm.reject_a = reject_a
else:
lenrejecta = 0
cm.reject_a = None
if num_b_failed_checks + b_first_rejected_len > 0:
lenrejectb = len(reject_b)
cm.reject_b = reject_b
else:
lenrejectb = 0
cm.reject_b = None
cm.ab_area = ab_area # pylint: disable=possibly-used-before-assignment
cm.bb_area = bb_area # pylint: disable=possibly-used-before-assignment
cm.ainds = ainds
cm.binds = binds
cm.asize = asize
cm.bsize = bsize
cm.af_area = af_area # pylint: disable=possibly-used-before-assignment
cm.bf_area = bf_area # pylint: disable=possibly-used-before-assignment
cm.alist = alist
cm.blist = blist
cm.agrplen = agrplen
cm.bgrplen = bgrplen
cm.lenrejecta = lenrejecta
cm.lenrejectb = lenrejectb
cm.auf_cdf_a = auf_cdf_a
cm.auf_cdf_b = auf_cdf_b