Source code for macauff.make_set_list

# Licensed under a 3-clause BSD style license - see LICENSE
'''
This module hosts the functions to create overlapping groups of "islands" of
objects, astrometrically in common based on relative sky separation compared to
their respective uncertainties across two catalogues.
'''

import itertools
import sys
import warnings

import numpy as np
import scipy.special

__all__ = ['set_list']

from macauff.misc_functions import make_pool


[docs] def set_list(aindices, bindices, aoverlap, boverlap, n_pool): ''' Creates the inter-catalogue groupings between catalogues "a" and "b", based on previously determined individual source "overlaps" in astrometry. Parameters ---------- aindices : numpy.ndarray The indices into catalogue "b", for each catalogue "a" source, that have been determined to be potentially positionally correlated. bindices : numpy.ndarray The equivalent to ``aindices``, but mapping the overlaps in catalogue "a" for each catalogue "b" source. aoverlap : numpy.ndarray The number of overlaps for each catalogue "a" source (i.e., the length of each row in ``aindices`` for each source). boverlap : numpy.ndarray The equivalent number of overlaps for each catalogue "b" object. n_pool : integer Number of multiprocessing pools to use when parallelising. Returns ------- alist : numpy.ndarray The indices of all catalogue "a" sources that are in a common "island" group together. Each row of ``alist`` indicates all "a" sources potentially positionally correlated. blist : numpy.ndarray The indices of all catalogue "b" objects in the same groups (as other "b" sources, as well as mapping to catalogue "a" objects). agrouplengths : numpy.ndarray The number of catalogue "a" sources in each unique "island". bgrouplengths : numpy.ndarray The number of catalogue "b" sources in each island grouping. ''' things = _initial_group_numbering(aindices, bindices, aoverlap, boverlap) if len(things) == 3: agroup, bgroup, no_fly_list = things else: agroup, bgroup = things groupmax = max(np.amax(agroup), np.amax(bgroup)) if len(things) == 3: no_fly_flag = True a_no_fly = no_fly_list[0] b_no_fly = no_fly_list[1] warnings.warn(f"{len(a_no_fly)}/{len(aoverlap)} catalogue a and {len(b_no_fly)}/{len(boverlap)} " "catalogue b stars were removed for failing to have their island solutions solved. " "Please check any results carefully.") sys.stdout.flush() else: no_fly_flag = False agrouplengths = np.zeros(dtype=int, shape=(groupmax,)) bgrouplengths = np.zeros(dtype=int, shape=(groupmax,)) for agrp in agroup: if agrp > 0: agrouplengths[agrp-1] += 1 for bgrp in bgroup: if bgrp > 0: bgrouplengths[bgrp-1] += 1 # Search for any island groupings which are too large to calculate the # permutations of reasonably (limiting at 50,000). When considering a set, # the total number of matches considered is: # sum k from 0 to min(len(a), len(b)) [#] of k-combinations of a times # [#] of k-permutations of b, with # number of k-combinations = len(a)! / (k! (len(a) - k)!) # and number of k-permutations = len(b)! / (len(b) - k)! # However we can't do x! / (x-k)! so we do prod(range(x-k+1, x, 1)) for # computational simplicity. # Since 21! is larger than a 64-bit integer (see factorial maths in # counterpart_pairing_fortran.find_single_island_prob), we also filter for # islands with more than 20 objects in both catalogues within # _calc_group_length_exceeded. maxiters = 5000000 grouplengthexceeded = np.zeros(dtype=bool, shape=(len(agrouplengths),)) counter = np.arange(0, len(agrouplengths)) iter_group = zip(counter, agrouplengths, bgrouplengths, itertools.repeat(maxiters)) with make_pool(n_pool) as pool: for return_items in pool.imap_unordered(_calc_group_length_exceeded, iter_group, chunksize=max(1, len(counter) // n_pool)): i, len_exceeded_flag = return_items grouplengthexceeded[i] = len_exceeded_flag pool.join() if np.any(grouplengthexceeded): nremoveisland = np.sum(grouplengthexceeded) nacatremove = np.sum(agrouplengths[grouplengthexceeded]) nbcatremove = np.sum(bgrouplengths[grouplengthexceeded]) warnings.warn(f"{nremoveisland} island{'' if nremoveisland == 1 else 's'}, containing " f"{nacatremove}/{len(aoverlap)} catalogue a and {nbcatremove}/{len(boverlap)} " f"catalogue b stars, {'was' if nremoveisland == 1 else 'were'} removed for having " f"more than {maxiters} possible iterations. Please check any results carefully.") sys.stdout.flush() rejectgroupnum = np.arange(1, groupmax+1)[grouplengthexceeded] areject = np.arange(0, len(aoverlap))[np.isin(agroup, rejectgroupnum)] breject = np.arange(0, len(boverlap))[np.isin(bgroup, rejectgroupnum)] reject_flag = True else: reject_flag = False # Combine the potential "no fly" rejections with the island-too-long ones. if reject_flag and no_fly_flag: areject = np.concatenate((areject, a_no_fly)) breject = np.concatenate((breject, b_no_fly)) elif no_fly_flag: # If no-fly rejections but no too-big-island rejections, move the # variables across. Update the flag variable to match. areject = a_no_fly breject = b_no_fly reject_flag = True # If no no-fly objects but we have some too-big-island rejected objects # then we can just do nothing, since we already set that up above. # Keep track of which sources have "good" group sizes, and the size of each # group in the two catalogues (e.g., group 1 has 2 "a" and 3 "b" sources). goodlength = np.logical_not(grouplengthexceeded) acounters = np.zeros(dtype=int, shape=(groupmax,)) bcounters = np.zeros(dtype=int, shape=(groupmax,)) if np.sum(goodlength) > 0: amaxlen = int(np.amax(agrouplengths[goodlength])) bmaxlen = int(np.amax(bgrouplengths[goodlength])) alist = np.full(dtype=int, shape=(amaxlen, groupmax), fill_value=-1, order='F') blist = np.full(dtype=int, shape=(bmaxlen, groupmax), fill_value=-1, order='F') # Remember that we started groups from one, so convert to zero-indexing. # Loop over each source in turn, skipping any which belong to an island # too large to run, updating alist or blist with the corresponding island # number the source belongs to. for i, agrp in enumerate(agroup): if agrp > 0 and goodlength[agrp-1]: alist[acounters[agrp-1], agrp-1] = i acounters[agrp-1] += 1 for i, bgrp in enumerate(bgroup): if bgrp > 0 and goodlength[bgrp-1]: blist[bcounters[bgrp-1], bgrp-1] = i bcounters[bgrp-1] += 1 # Now, we simply want to remove any sources from the list with islands too # large to run. alist = alist[:, goodlength] blist = blist[:, goodlength] agrouplengths = agrouplengths[goodlength] bgrouplengths = bgrouplengths[goodlength] else: # If we don't have any valid islands at all, then we just need to fake # some variables. alist, blist = [], [] agrouplengths, bgrouplengths = [], [] if reject_flag: # pylint: disable-next=possibly-used-before-assignment return alist, blist, agrouplengths, bgrouplengths, areject, breject return alist, blist, agrouplengths, bgrouplengths
def _initial_group_numbering(aindices, bindices, aoverlap, boverlap): ''' Iterates over the indices mapping overlaps between the two catalogues, assigning initial group numbers to sources to be placed in "islands". Loops through all "lonely", single-source islands in each catalogue, and those with a one-to-one mapping of a single "a" object and one "b" source, before iteratively grouping all multi-object islands together. Parameters ---------- aindices : numpy.ndarray The indices into catalogue "b", for each catalogue "a" source, that have been determined to be potentially positionally correlated. bindices : numpy.ndarray The equivalent to ``aindices``, but mapping the overlaps in catalogue "a" for each catalogue "b" source. aoverlap : numpy.ndarray The number of overlaps for each catalogue "a" source (i.e., the length of each row in ``aindices`` for each source). boverlap : numpy.ndarray The equivalent number of overlaps for each catalogue "b" object. Returns ------- agroup : numpy.ndarray Array detailing the group number of each catalogue "a" source. bgroup : numpy.ndarray Array detailing the group number of each catalogue "b" source. no_fly_list : list of lists, optional List containing any groups that hit a recursive limit and could not be assigned a group number. Only returned if something is in it. ''' agroup = np.zeros(dtype=int, shape=(len(aoverlap),)) bgroup = np.zeros(dtype=int, shape=(len(boverlap),)) group_num = 0 # First search for catalogue "a" sources that are either lonely, with no # catalogue "b" object near them, or those with only one corresponding # object in catalogue "b", then iteratively group from a -> b -> a -> ... # any remaining objects. for i in range(0, len(agroup)): # pylint: disable=consider-using-enumerate if aoverlap[i] == 0: group_num += 1 agroup[i] = group_num elif aoverlap[i] == 1 and boverlap[aindices[0, i]] == 1: group_num += 1 agroup[i] = group_num bgroup[aindices[0, i]] = group_num for i in range(0, len(bgroup)): # pylint: disable=consider-using-enumerate if boverlap[i] == 0: group_num += 1 bgroup[i] = group_num # Keep track of a "no fly" list, in which the python recursion limit # has been reached. Objects should be removed from it if they succeed # in getting put in an island in a future run. no_fly_list = [[], []] for i, agrp in enumerate(agroup): if agrp == 0: group_num += 1 try: _a_to_b(i, group_num, aoverlap[i], aindices, bindices, aoverlap, boverlap, agroup, bgroup) # If we succeed, check the no fly list for removals: successful_a = np.argwhere(agroup == group_num) successful_b = np.argwhere(bgroup == group_num) for j in successful_a: if j in no_fly_list[0]: k = np.nonzero(no_fly_list[0] == j)[0][0] no_fly_list[0] = np.concatenate((no_fly_list[0][:k], no_fly_list[0][k+1:])) for j in successful_b: if j in no_fly_list[1]: k = np.nonzero(no_fly_list[1] == j)[0][0] no_fly_list[1] = np.concatenate((no_fly_list[1][:k], no_fly_list[1][k+1:])) sys.stdout.flush() except RecursionError: too_many_a = np.nonzero(agroup == group_num)[0] too_many_b = np.nonzero(bgroup == group_num)[0] no_fly_list[0] = np.unique(np.concatenate((no_fly_list[0], too_many_a))) no_fly_list[1] = np.unique(np.concatenate((no_fly_list[1], too_many_b))) sys.stdout.flush() agroup[too_many_a] = 0 bgroup[too_many_b] = 0 group_num -= 1 # Usually the fact that we entirely iterate over agroup is fine, as we # should touch every detection in both catalogues. However, in cases where # we hit recursive depths we may either fail to hit a source, or mess around # with sources, in catalogue b. We then just need to assign any "missed" # catalogue b entries as no-fly list in these cases, as we can't do # anything else. if np.any(bgroup == 0): failed_b_indices = np.nonzero(bgroup == 0)[0] no_fly_list[1] = np.unique(np.concatenate((no_fly_list[1], failed_b_indices))) if len(no_fly_list[0]) > 0 or len(no_fly_list[1]) > 0: return agroup, bgroup, no_fly_list return agroup, bgroup def _a_to_b(ind, grp, n, aindices, bindices, aoverlap, boverlap, agroup, bgroup): ''' Iterative function, along with ``_b_to_a``, to assign all sources overlapping this catalogue "a" object in catalogue "b" as being in the same group as it. This subsequently calls ``_b_to_a`` for each of those "b" sources, to assign any "a" sources that overlap those objects to the same group, until there are no more overlaps. Parameters ---------- ind : integer The index into ``agroup``, the specific source in question to be assigned this group number. grp : integer The group number of this "island" to be assigned. n : integer The number of sources that overlap this catalogue "a" source. aindices : numpy.ndarray The indices into catalogue "b", for each catalogue "a" source, that have been determined to be potentially positionally correlated. bindices : numpy.ndarray The equivalent to ``aindices``, but mapping the overlaps in catalogue "a" for each catalogue "b" source. aoverlap : numpy.ndarray The number of overlaps for each catalogue "a" source (i.e., the length of each row in ``aindices`` for each source). boverlap : numpy.ndarray The equivalent number of overlaps for each catalogue "b" object. agroup : numpy.ndarray Array detailing the group number of each catalogue "a" source. bgroup : numpy.ndarray Array detailing the group number of each catalogue "b" source. ''' agroup[ind] = grp for q in aindices[:n, ind]: if bgroup[q] != grp: _b_to_a(q, grp, boverlap[q], aindices, bindices, aoverlap, boverlap, agroup, bgroup) def _b_to_a(ind, grp, n, aindices, bindices, aoverlap, boverlap, agroup, bgroup): ''' Iterative function, equivalent to ``_a_to_b``, to assign all sources overlapping this catalogue "b" object in catalogue "a" with its group number. Parameters ---------- ind : integer The index into ``bgroup``, the specific source in question to be assigned this group number. grp : integer The group number of this "island" to be assigned. n : integer The number of sources that overlap this catalogue "b" source. aindices : numpy.ndarray The indices into catalogue "b", for each catalogue "a" source, that have been determined to be potentially positionally correlated. bindices : numpy.ndarray The equivalent to ``aindices``, but mapping the overlaps in catalogue "a" for each catalogue "b" source. aoverlap : numpy.ndarray The number of overlaps for each catalogue "a" source (i.e., the length of each row in ``aindices`` for each source). boverlap : numpy.ndarray The equivalent number of overlaps for each catalogue "b" object. agroup : numpy.ndarray Array detailing the group number of each catalogue "a" source. bgroup : numpy.ndarray Array detailing the group number of each catalogue "b" source. ''' bgroup[ind] = grp for f in bindices[:n, ind]: if agroup[f] != grp: _a_to_b(f, grp, aoverlap[f], aindices, bindices, aoverlap, boverlap, agroup, bgroup) def _calc_group_length_exceeded(iterable): i, n_a, n_b, maxiters = iterable if max(n_a, n_b) > 20: return i, 1 counter = 0 for k in np.arange(0, min(n_a, n_b)+1e-10, 1): kcomb = np.prod(np.arange(n_a-k+1, n_a+1e-10, 1)) / scipy.special.factorial(k) kperm = np.prod(np.arange(n_b-k+1, n_b+1e-10, 1)) counter += kcomb*kperm if counter > maxiters: exceed_flag = 1 return i, exceed_flag exceed_flag = 0 return i, exceed_flag