Source code for macauff.group_sources

# Licensed under a 3-clause BSD style license - see LICENSE
'''
This module provides the framework for grouping sources from two photometric catalogues into
distinct "islands" of sources, along with calculating whether they are within overlap for
various photometric integral purposes.
'''

import datetime
import sys

import numpy as np

# pylint: disable=import-error,no-name-in-module
from macauff.group_sources_fortran import group_sources_fortran as gsf
from macauff.make_set_list import set_list
from macauff.misc_functions import calculate_overlap_counts, convex_hull_area
from macauff.misc_functions_fortran import misc_functions_fortran as mff

# pylint: enable=import-error,no-name-in-module

__all__ = ['make_island_groupings']


# pylint: disable-next=too-many-locals,too-many-statements
[docs] def make_island_groupings(cm): ''' Function to handle the creation of "islands" of astrometrically coeval sources, and identify which overlap to some probability based on their combined AUFs. Parameters ---------- cm : Class The cross-match wrapper, containing all of the necessary metadata to perform the cross-match and determine match islands. ''' # Convert from arcseconds to degrees internally. max_sep = np.copy(cm.pos_corr_dist) / 3600 t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Calculating maximum overlap...") sys.stdout.flush() # Load the astrometry of each catalogue. a_full = cm.a_astro b_full = cm.b_astro # The initial step to create island groupings is to find the largest number # of overlaps for a single source, to minimise the size of the array of # overlap indices. _ainds = calculate_overlap_counts(a_full, b_full, -999, 999, max_sep, cm.n_pool, np.nan, 0, 1, cm.cf_region_frame, 'array', cm.chunk_id) _binds = calculate_overlap_counts(b_full, a_full, -999, 999, max_sep, cm.n_pool, np.nan, 0, 1, cm.cf_region_frame, 'array', cm.chunk_id) amaxsize = int(np.amax([len(x) for x in _ainds])) bmaxsize = int(np.amax([len(x) for x in _binds])) t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Truncating star overlaps by AUF integral...") sys.stdout.flush() ainds = np.ones(dtype=int, shape=(amaxsize, len(a_full)), order='F') * -1 binds = np.ones(dtype=int, shape=(bmaxsize, len(b_full)), order='F') * -1 # Populate the indices into a fortran-acceptable grid, rather than nested list. for i, x in enumerate(_ainds): ainds[:len(x), i] = x for i, x in enumerate(_binds): binds[:len(x), i] = x asize = np.array([np.sum(ainds[:, i] >= 0) for i in range(ainds.shape[1])]) bsize = np.array([np.sum(binds[:, i] >= 0) for i in range(binds.shape[1])]) ainds, binds, asize, bsize, auf_cdf_a, auf_cdf_b = gsf.get_overlap_indices( a_full[:, 0], a_full[:, 1], b_full[:, 0], b_full[:, 1], ainds, asize, binds, bsize, amaxsize, bmaxsize, a_full[:, 2], b_full[:, 2], cm.r[:-1]+cm.dr/2, cm.rho[:-1], cm.drho, cm.j1s, cm.a_perturb_auf_outputs['fourier_grid'], cm.b_perturb_auf_outputs['fourier_grid'], cm.a_modelrefinds, cm.b_modelrefinds, cm.int_fracs[2]) t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Cleaning overlaps...") sys.stdout.flush() # Clean arrays for any potential unnecessary extra rows caused by the largest # overlap number being reduced due to the additional criteria for match in # get_overlap_indices vs a naive radius-based search. In some cases we will # then have e.g. np.all(ainds[-1, :] == -1) evaluating to True, and might # as well get rid of the extraneous column. ainds = np.asfortranarray(ainds[:np.amax(asize), :]) binds = np.asfortranarray(binds[:np.amax(bsize), :]) amaxsize = int(np.amax(asize)) bmaxsize = int(np.amax(bsize)) t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Calculating integral lengths...") sys.stdout.flush() if cm.include_phot_like or cm.use_phot_priors: a_err = a_full[:, 2] b_err = b_full[:, 2] a_fouriergrid = cm.a_perturb_auf_outputs['fourier_grid'] b_fouriergrid = cm.b_perturb_auf_outputs['fourier_grid'] a_int_areas = gsf.get_integral_length( a_err, b_err, cm.r[:-1]+cm.dr/2, cm.rho[:-1], cm.drho, cm.j1s, a_fouriergrid, b_fouriergrid, cm.a_modelrefinds, cm.b_modelrefinds, ainds, asize, cm.int_fracs[0:2]) ab_area = a_int_areas[:, 0] af_area = a_int_areas[:, 1] b_int_areas = gsf.get_integral_length( b_err, a_err, cm.r[:-1]+cm.dr/2, cm.rho[:-1], cm.drho, cm.j1s, b_fouriergrid, a_fouriergrid, cm.b_modelrefinds, cm.a_modelrefinds, binds, bsize, cm.int_fracs[0:2]) bb_area = b_int_areas[:, 0] bf_area = b_int_areas[:, 1] t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Maximum overlaps are:", amaxsize, bmaxsize) sys.stdout.flush() t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"{t} Rank {cm.rank}, chunk {cm.chunk_id}: Finding unique sets...") sys.stdout.flush() set_list_items = set_list(ainds, binds, asize, bsize, cm.n_pool) if len(set_list_items) == 6: alist, blist, agrplen, bgrplen, areject, breject = set_list_items reject_flag = True else: alist, blist, agrplen, bgrplen = set_list_items # pylint: disable=unbalanced-tuple-unpacking reject_flag = False # The final act of creating island groups is to clear out any sources too # close to the edge of the catalogue -- defined by its rectangular extend. # pylint: disable-next=fixme # TODO: add flag for allowing the keeping of potentially incomplete islands # in the main catalogue; here we default to, and only allow, their removal. passed_check = np.zeros(dtype=bool, shape=(alist.shape[1],)) failed_check = np.ones(dtype=bool, shape=(alist.shape[1],)) num_a_failed_checks = 0 num_b_failed_checks = 0 _, a_hull_points, a_hull_x_shift = convex_hull_area( a_full[:, 0], a_full[:, 1], return_hull=True) _, b_hull_points, b_hull_x_shift = convex_hull_area( b_full[:, 0], b_full[:, 1], return_hull=True) cm.a_hull_points = a_hull_points cm.a_hull_x_shift = a_hull_x_shift cm.b_hull_points = b_hull_points cm.b_hull_x_shift = b_hull_x_shift # "Pad factor" to allow for a percentage of missing search circle in cases # where e.g. we are 0-360 wrapped and have a "missing" meridian slice. # acceptable_lost_circle_frac here is equal to A_segment / A_circle, giving # an inflation factor for our search radius x \equiv d/r. acceptable_lost_circle_frac = 0.1 x = np.linspace(0, 1, 1001) f = np.arccos(x) - x * np.sqrt(1 - x**2) ind = np.argmin(np.abs(f - np.pi * acceptable_lost_circle_frac)) new_max_sep = max_sep / x[ind] seed = np.random.default_rng().choice(100000, size=(mff.get_random_seed_size(), len(a_full))) a_hull_radius_overlap = mff.get_circle_area_overlap( a_full[:, 0] + a_hull_x_shift, a_full[:, 1], new_max_sep, np.append(a_hull_points[:, 0], a_hull_points[0, 0]), np.append(a_hull_points[:, 1], a_hull_points[0, 1]), seed) a_hull_overlap_frac = a_hull_radius_overlap / (np.pi * new_max_sep**2) seed = np.random.default_rng().choice(100000, size=(mff.get_random_seed_size(), len(b_full))) b_hull_radius_overlap = mff.get_circle_area_overlap( b_full[:, 0] + b_hull_x_shift, b_full[:, 1], new_max_sep, np.append(b_hull_points[:, 0], b_hull_points[0, 0]), np.append(b_hull_points[:, 1], b_hull_points[0, 1]), seed) b_hull_overlap_frac = b_hull_radius_overlap / (np.pi * new_max_sep**2) for i in range(alist.shape[1]): ahof = a_hull_overlap_frac[alist[:agrplen[i], i]] bhof = b_hull_overlap_frac[blist[:bgrplen[i], i]] # A failed check would be any where *hof is too small, and hence we # lost some of the circle to being outside of the hull. If no objects # have fractions that are below some critical value then we can keep # the island. dist_check = np.all(ahof > (1 - acceptable_lost_circle_frac)) & np.all( bhof > (1 - acceptable_lost_circle_frac)) if dist_check: passed_check[i] = 1 failed_check[i] = 0 else: # While "good" islands just need their total number incrementing # for the group, "failed" islands we need to track the number of # sources in each catalogue for. num_a_failed_checks += agrplen[i] num_b_failed_checks += bgrplen[i] # If set_list returned any rejected sources, then add any sources too close # to match extent to those now. Ensure that we only reject the unique source IDs # across each island group, ignoring the "default" -1 index. if reject_flag: a_first_rejected_len = len(areject) # pylint: disable=possibly-used-before-assignment else: a_first_rejected_len = 0 if num_a_failed_checks + a_first_rejected_len > 0: reject_a = np.zeros(dtype=int, shape=(num_a_failed_checks+a_first_rejected_len,)) if reject_flag: reject_a[num_a_failed_checks:] = areject # pylint: disable=used-before-assignment if reject_flag: b_first_rejected_len = len(breject) # pylint: disable=possibly-used-before-assignment else: b_first_rejected_len = 0 if num_b_failed_checks + b_first_rejected_len > 0: reject_b = np.zeros(dtype=int, shape=(num_b_failed_checks+b_first_rejected_len,)) if reject_flag: reject_b[num_b_failed_checks:] = breject # pylint: disable=used-before-assignment if reject_flag: alist_reject = alist[:, failed_check] reject_a[:num_a_failed_checks] = alist_reject[alist_reject > -1] blist_reject = blist[:, failed_check] reject_b[:num_b_failed_checks] = blist_reject[blist_reject > -1] else: reject_a = alist[:, failed_check] reject_a = reject_a[reject_a > -1] reject_b = blist[:, failed_check] reject_b = reject_b[reject_b > -1] # This should basically be alist = alist[:, passed_check] and # agrplen = agrplen[passed_check], simply removing those above failed # islands from the list, analagous to the same functionality in set_list. alist = alist[:, passed_check] agrplen = agrplen[passed_check] blist = blist[:, passed_check] bgrplen = bgrplen[passed_check] # Only return a[bf]_area and b[bf]_area if they were created if not (cm.include_phot_like or cm.use_phot_priors): ab_area = bb_area = None af_area = bf_area = None auf_cdf_a = auf_cdf_b = None # Only return reject counts if they were created if num_a_failed_checks + a_first_rejected_len > 0: lenrejecta = len(reject_a) # Save rejects output files. cm.reject_a = reject_a else: lenrejecta = 0 cm.reject_a = None if num_b_failed_checks + b_first_rejected_len > 0: lenrejectb = len(reject_b) cm.reject_b = reject_b else: lenrejectb = 0 cm.reject_b = None cm.ab_area = ab_area # pylint: disable=possibly-used-before-assignment cm.bb_area = bb_area # pylint: disable=possibly-used-before-assignment cm.ainds = ainds cm.binds = binds cm.asize = asize cm.bsize = bsize cm.af_area = af_area # pylint: disable=possibly-used-before-assignment cm.bf_area = bf_area # pylint: disable=possibly-used-before-assignment cm.alist = alist cm.blist = blist cm.agrplen = agrplen cm.bgrplen = bgrplen cm.lenrejecta = lenrejecta cm.lenrejectb = lenrejectb cm.auf_cdf_a = auf_cdf_a cm.auf_cdf_b = auf_cdf_b