# Licensed under a 3-clause BSD style license - see LICENSE
'''
This module hosts the functions to create overlapping groups of "islands" of
objects, astrometrically in common based on relative sky separation compared to
their respective uncertainties across two catalogues.
'''
import itertools
import sys
import warnings
import numpy as np
import scipy.special
__all__ = ['set_list']
from macauff.misc_functions import make_pool
[docs]
def set_list(aindices, bindices, aoverlap, boverlap, n_pool):
'''
Creates the inter-catalogue groupings between catalogues "a" and "b", based
on previously determined individual source "overlaps" in astrometry.
Parameters
----------
aindices : numpy.ndarray
The indices into catalogue "b", for each catalogue "a" source, that have
been determined to be potentially positionally correlated.
bindices : numpy.ndarray
The equivalent to ``aindices``, but mapping the overlaps in catalogue "a"
for each catalogue "b" source.
aoverlap : numpy.ndarray
The number of overlaps for each catalogue "a" source (i.e., the length
of each row in ``aindices`` for each source).
boverlap : numpy.ndarray
The equivalent number of overlaps for each catalogue "b" object.
n_pool : integer
Number of multiprocessing pools to use when parallelising.
Returns
-------
alist : numpy.ndarray
The indices of all catalogue "a" sources that are in a common "island"
group together. Each row of ``alist`` indicates all "a" sources
potentially positionally correlated.
blist : numpy.ndarray
The indices of all catalogue "b" objects in the same groups (as other
"b" sources, as well as mapping to catalogue "a" objects).
agrouplengths : numpy.ndarray
The number of catalogue "a" sources in each unique "island".
bgrouplengths : numpy.ndarray
The number of catalogue "b" sources in each island grouping.
'''
things = _initial_group_numbering(aindices, bindices, aoverlap, boverlap)
if len(things) == 3:
agroup, bgroup, no_fly_list = things
else:
agroup, bgroup = things
groupmax = max(np.amax(agroup), np.amax(bgroup))
if len(things) == 3:
no_fly_flag = True
a_no_fly = no_fly_list[0]
b_no_fly = no_fly_list[1]
warnings.warn(f"{len(a_no_fly)}/{len(aoverlap)} catalogue a and {len(b_no_fly)}/{len(boverlap)} "
"catalogue b stars were removed for failing to have their island solutions solved. "
"Please check any results carefully.")
sys.stdout.flush()
else:
no_fly_flag = False
agrouplengths = np.zeros(dtype=int, shape=(groupmax,))
bgrouplengths = np.zeros(dtype=int, shape=(groupmax,))
for agrp in agroup:
if agrp > 0:
agrouplengths[agrp-1] += 1
for bgrp in bgroup:
if bgrp > 0:
bgrouplengths[bgrp-1] += 1
# Search for any island groupings which are too large to calculate the
# permutations of reasonably (limiting at 50,000). When considering a set,
# the total number of matches considered is:
# sum k from 0 to min(len(a), len(b)) [#] of k-combinations of a times
# [#] of k-permutations of b, with
# number of k-combinations = len(a)! / (k! (len(a) - k)!)
# and number of k-permutations = len(b)! / (len(b) - k)!
# However we can't do x! / (x-k)! so we do prod(range(x-k+1, x, 1)) for
# computational simplicity.
# Since 21! is larger than a 64-bit integer (see factorial maths in
# counterpart_pairing_fortran.find_single_island_prob), we also filter for
# islands with more than 20 objects in both catalogues within
# _calc_group_length_exceeded.
maxiters = 5000000
grouplengthexceeded = np.zeros(dtype=bool, shape=(len(agrouplengths),))
counter = np.arange(0, len(agrouplengths))
iter_group = zip(counter, agrouplengths, bgrouplengths, itertools.repeat(maxiters))
with make_pool(n_pool) as pool:
for return_items in pool.imap_unordered(_calc_group_length_exceeded, iter_group,
chunksize=max(1, len(counter) // n_pool)):
i, len_exceeded_flag = return_items
grouplengthexceeded[i] = len_exceeded_flag
pool.join()
if np.any(grouplengthexceeded):
nremoveisland = np.sum(grouplengthexceeded)
nacatremove = np.sum(agrouplengths[grouplengthexceeded])
nbcatremove = np.sum(bgrouplengths[grouplengthexceeded])
warnings.warn(f"{nremoveisland} island{'' if nremoveisland == 1 else 's'}, containing "
f"{nacatremove}/{len(aoverlap)} catalogue a and {nbcatremove}/{len(boverlap)} "
f"catalogue b stars, {'was' if nremoveisland == 1 else 'were'} removed for having "
f"more than {maxiters} possible iterations. Please check any results carefully.")
sys.stdout.flush()
rejectgroupnum = np.arange(1, groupmax+1)[grouplengthexceeded]
areject = np.arange(0, len(aoverlap))[np.isin(agroup, rejectgroupnum)]
breject = np.arange(0, len(boverlap))[np.isin(bgroup, rejectgroupnum)]
reject_flag = True
else:
reject_flag = False
# Combine the potential "no fly" rejections with the island-too-long ones.
if reject_flag and no_fly_flag:
areject = np.concatenate((areject, a_no_fly))
breject = np.concatenate((breject, b_no_fly))
elif no_fly_flag:
# If no-fly rejections but no too-big-island rejections, move the
# variables across. Update the flag variable to match.
areject = a_no_fly
breject = b_no_fly
reject_flag = True
# If no no-fly objects but we have some too-big-island rejected objects
# then we can just do nothing, since we already set that up above.
# Keep track of which sources have "good" group sizes, and the size of each
# group in the two catalogues (e.g., group 1 has 2 "a" and 3 "b" sources).
goodlength = np.logical_not(grouplengthexceeded)
acounters = np.zeros(dtype=int, shape=(groupmax,))
bcounters = np.zeros(dtype=int, shape=(groupmax,))
if np.sum(goodlength) > 0:
amaxlen = int(np.amax(agrouplengths[goodlength]))
bmaxlen = int(np.amax(bgrouplengths[goodlength]))
alist = np.full(dtype=int, shape=(amaxlen, groupmax), fill_value=-1, order='F')
blist = np.full(dtype=int, shape=(bmaxlen, groupmax), fill_value=-1, order='F')
# Remember that we started groups from one, so convert to zero-indexing.
# Loop over each source in turn, skipping any which belong to an island
# too large to run, updating alist or blist with the corresponding island
# number the source belongs to.
for i, agrp in enumerate(agroup):
if agrp > 0 and goodlength[agrp-1]:
alist[acounters[agrp-1], agrp-1] = i
acounters[agrp-1] += 1
for i, bgrp in enumerate(bgroup):
if bgrp > 0 and goodlength[bgrp-1]:
blist[bcounters[bgrp-1], bgrp-1] = i
bcounters[bgrp-1] += 1
# Now, we simply want to remove any sources from the list with islands too
# large to run.
alist = alist[:, goodlength]
blist = blist[:, goodlength]
agrouplengths = agrouplengths[goodlength]
bgrouplengths = bgrouplengths[goodlength]
else:
# If we don't have any valid islands at all, then we just need to fake
# some variables.
alist, blist = [], []
agrouplengths, bgrouplengths = [], []
if reject_flag:
# pylint: disable-next=possibly-used-before-assignment
return alist, blist, agrouplengths, bgrouplengths, areject, breject
return alist, blist, agrouplengths, bgrouplengths
def _initial_group_numbering(aindices, bindices, aoverlap, boverlap):
'''
Iterates over the indices mapping overlaps between the two catalogues,
assigning initial group numbers to sources to be placed in "islands".
Loops through all "lonely", single-source islands in each catalogue, and
those with a one-to-one mapping of a single "a" object and one "b" source,
before iteratively grouping all multi-object islands together.
Parameters
----------
aindices : numpy.ndarray
The indices into catalogue "b", for each catalogue "a" source, that have
been determined to be potentially positionally correlated.
bindices : numpy.ndarray
The equivalent to ``aindices``, but mapping the overlaps in catalogue "a"
for each catalogue "b" source.
aoverlap : numpy.ndarray
The number of overlaps for each catalogue "a" source (i.e., the length
of each row in ``aindices`` for each source).
boverlap : numpy.ndarray
The equivalent number of overlaps for each catalogue "b" object.
Returns
-------
agroup : numpy.ndarray
Array detailing the group number of each catalogue "a" source.
bgroup : numpy.ndarray
Array detailing the group number of each catalogue "b" source.
no_fly_list : list of lists, optional
List containing any groups that hit a recursive limit and could
not be assigned a group number. Only returned if something is in
it.
'''
agroup = np.zeros(dtype=int, shape=(len(aoverlap),))
bgroup = np.zeros(dtype=int, shape=(len(boverlap),))
group_num = 0
# First search for catalogue "a" sources that are either lonely, with no
# catalogue "b" object near them, or those with only one corresponding
# object in catalogue "b", then iteratively group from a -> b -> a -> ...
# any remaining objects.
for i in range(0, len(agroup)): # pylint: disable=consider-using-enumerate
if aoverlap[i] == 0:
group_num += 1
agroup[i] = group_num
elif aoverlap[i] == 1 and boverlap[aindices[0, i]] == 1:
group_num += 1
agroup[i] = group_num
bgroup[aindices[0, i]] = group_num
for i in range(0, len(bgroup)): # pylint: disable=consider-using-enumerate
if boverlap[i] == 0:
group_num += 1
bgroup[i] = group_num
# Keep track of a "no fly" list, in which the python recursion limit
# has been reached. Objects should be removed from it if they succeed
# in getting put in an island in a future run.
no_fly_list = [[], []]
for i, agrp in enumerate(agroup):
if agrp == 0:
group_num += 1
try:
_a_to_b(i, group_num, aoverlap[i], aindices, bindices,
aoverlap, boverlap, agroup, bgroup)
# If we succeed, check the no fly list for removals:
successful_a = np.argwhere(agroup == group_num)
successful_b = np.argwhere(bgroup == group_num)
for j in successful_a:
if j in no_fly_list[0]:
k = np.nonzero(no_fly_list[0] == j)[0][0]
no_fly_list[0] = np.concatenate((no_fly_list[0][:k], no_fly_list[0][k+1:]))
for j in successful_b:
if j in no_fly_list[1]:
k = np.nonzero(no_fly_list[1] == j)[0][0]
no_fly_list[1] = np.concatenate((no_fly_list[1][:k], no_fly_list[1][k+1:]))
sys.stdout.flush()
except RecursionError:
too_many_a = np.nonzero(agroup == group_num)[0]
too_many_b = np.nonzero(bgroup == group_num)[0]
no_fly_list[0] = np.unique(np.concatenate((no_fly_list[0], too_many_a)))
no_fly_list[1] = np.unique(np.concatenate((no_fly_list[1], too_many_b)))
sys.stdout.flush()
agroup[too_many_a] = 0
bgroup[too_many_b] = 0
group_num -= 1
# Usually the fact that we entirely iterate over agroup is fine, as we
# should touch every detection in both catalogues. However, in cases where
# we hit recursive depths we may either fail to hit a source, or mess around
# with sources, in catalogue b. We then just need to assign any "missed"
# catalogue b entries as no-fly list in these cases, as we can't do
# anything else.
if np.any(bgroup == 0):
failed_b_indices = np.nonzero(bgroup == 0)[0]
no_fly_list[1] = np.unique(np.concatenate((no_fly_list[1], failed_b_indices)))
if len(no_fly_list[0]) > 0 or len(no_fly_list[1]) > 0:
return agroup, bgroup, no_fly_list
return agroup, bgroup
def _a_to_b(ind, grp, n, aindices, bindices, aoverlap, boverlap, agroup, bgroup):
'''
Iterative function, along with ``_b_to_a``, to assign all sources overlapping
this catalogue "a" object in catalogue "b" as being in the same group as it.
This subsequently calls ``_b_to_a`` for each of those "b" sources, to
assign any "a" sources that overlap those objects to the same group, until
there are no more overlaps.
Parameters
----------
ind : integer
The index into ``agroup``, the specific source in question to be assigned
this group number.
grp : integer
The group number of this "island" to be assigned.
n : integer
The number of sources that overlap this catalogue "a" source.
aindices : numpy.ndarray
The indices into catalogue "b", for each catalogue "a" source, that have
been determined to be potentially positionally correlated.
bindices : numpy.ndarray
The equivalent to ``aindices``, but mapping the overlaps in catalogue "a"
for each catalogue "b" source.
aoverlap : numpy.ndarray
The number of overlaps for each catalogue "a" source (i.e., the length
of each row in ``aindices`` for each source).
boverlap : numpy.ndarray
The equivalent number of overlaps for each catalogue "b" object.
agroup : numpy.ndarray
Array detailing the group number of each catalogue "a" source.
bgroup : numpy.ndarray
Array detailing the group number of each catalogue "b" source.
'''
agroup[ind] = grp
for q in aindices[:n, ind]:
if bgroup[q] != grp:
_b_to_a(q, grp, boverlap[q], aindices, bindices, aoverlap, boverlap, agroup, bgroup)
def _b_to_a(ind, grp, n, aindices, bindices, aoverlap, boverlap, agroup, bgroup):
'''
Iterative function, equivalent to ``_a_to_b``, to assign all sources
overlapping this catalogue "b" object in catalogue "a" with its group number.
Parameters
----------
ind : integer
The index into ``bgroup``, the specific source in question to be assigned
this group number.
grp : integer
The group number of this "island" to be assigned.
n : integer
The number of sources that overlap this catalogue "b" source.
aindices : numpy.ndarray
The indices into catalogue "b", for each catalogue "a" source, that have
been determined to be potentially positionally correlated.
bindices : numpy.ndarray
The equivalent to ``aindices``, but mapping the overlaps in catalogue "a"
for each catalogue "b" source.
aoverlap : numpy.ndarray
The number of overlaps for each catalogue "a" source (i.e., the length
of each row in ``aindices`` for each source).
boverlap : numpy.ndarray
The equivalent number of overlaps for each catalogue "b" object.
agroup : numpy.ndarray
Array detailing the group number of each catalogue "a" source.
bgroup : numpy.ndarray
Array detailing the group number of each catalogue "b" source.
'''
bgroup[ind] = grp
for f in bindices[:n, ind]:
if agroup[f] != grp:
_a_to_b(f, grp, aoverlap[f], aindices, bindices, aoverlap, boverlap, agroup, bgroup)
def _calc_group_length_exceeded(iterable):
i, n_a, n_b, maxiters = iterable
if max(n_a, n_b) > 20:
return i, 1
counter = 0
for k in np.arange(0, min(n_a, n_b)+1e-10, 1):
kcomb = np.prod(np.arange(n_a-k+1, n_a+1e-10, 1)) / scipy.special.factorial(k)
kperm = np.prod(np.arange(n_b-k+1, n_b+1e-10, 1))
counter += kcomb*kperm
if counter > maxiters:
exceed_flag = 1
return i, exceed_flag
exceed_flag = 0
return i, exceed_flag