Source code for pointpats.random

"""Spatial Point Pattern Simulation Module
=======================================

This module provides tools to simulate spatial point patterns for use
in spatial statistics, geographic analysis, and spatial modeling.
Patterns can be generated within arbitrary hulls, defined as polygons,
bounding boxes, or other spatial objects.

Currently supported point process models include:

- `poisson`: Simulates a homogeneous Poisson point process.
- `normal`: Simulates points from a multivariate normal distribution,
   constrained within a hull.
- `cluster_poisson`: Simulates cluster patterns using a two-stage Poisson
  process.
- `cluster_normal`: Simulates clusters around randomly placed seed points using
   a normal distribution.
- `strauss`: Simulates a Strauss process with pairwise interaction constraints.

Each distribution supports control over intensity, point count,
replication, and randomness. Geometry utilities support arbitrary
spatial hulls and containment logic.

"""

import numpy
from scipy.spatial import cKDTree

from .geometry import (
    spatial,
    area as _area,
    centroid as _centroid,
    contains as _contains,
    bbox as _bbox,
    prepare_hull as _prepare_hull,
    HULL_TYPES,
)

# ------------------------------------------------------------ #
# Utilities                                                    #
# ------------------------------------------------------------ #


def parse_size_and_intensity(hull, intensity=None, size=None):
    """
    Given a hull, an intensity, and a size int/tuple, correctly
    compute the resulting missing quantities. Defaults to 100 points in one
    replication, meaning the intensity will be computed on the fly
    if nothing is provided.

    Parameters
    ----------
    hull : A geometry-like object
        This encodes the "space" in which to simulate the normal pattern. All points will
        lie within this hull. Supported values are:
        - a bounding box encoded in a numpy array as numpy.array([xmin, ymin, xmax, ymax])
        - an (N,2) array of points for which the bounding box will be computed & used
        - a shapely polygon/multipolygon
        - a scipy convexh hull
    intensity : float
        the number of observations per unit area in the hull to use. If provided,
        then the number of observations is determined using the intensity * area(hull) and
        the size is assumed to represent n_replications (if provided).
    size : tuple or int
        a tuple of (n_observations, n_replications), where the first number is the number
        of points to simulate in each replication and the second number is the number of
        total replications. So, (10, 4) indicates 10 points, 4 times.
        If an integer is provided and intensity is None, n_replications is assumed to be 1.
        If size is an integer and intensity is also provided, then size indicates n_replications,
        and the number of observations is computed on the fly using intensity and area.
    """
    if size is None:
        if intensity is not None:
            # if intensity is provided, assume
            # n_observations
            n_observations = int(_area(hull) * intensity)
        else:
            # default to 100 points
            n_observations = 100
            intensity = n_observations / _area(hull)
        n_simulations = 1
        size = (n_observations, n_simulations)
    elif isinstance(size, tuple):
        if len(size) == 2 and intensity is None:
            n_observations, n_simulations = size
            intensity = n_observations / _area(hull)
        elif len(size) == 2 and intensity is not None:
            raise ValueError(
                "Either intensity or size as (n observations, n simulations)"
                " can be provided. Providing both creates statistical conflicts."
                " between the requested intensity and implied intensity by"
                " the number of observations and the area of the hull. If"
                " you want to specify the intensity, use the intensity argument"
                " and set size equal to the number of simulations."
            )
        else:
            raise ValueError(
                f"Intensity and size not understood. Provide size as a tuple"
                f" containing (number of observations, number of simulations)"
                f" with no specified intensity, or an intensity and size equal"
                f" to the number of simulations."
                f" Recieved: `intensity={intensity}, size={size}`"
            )
    elif isinstance(size, int):
        # assume int size with specified intensity means n_simulations at x intensity
        if intensity is not None:
            n_observations = int(intensity * _area(hull))
            n_simulations = size
        else:  # assume we have one replication at the specified number of points
            n_simulations = 1
            n_observations = size
            intensity = n_observations / _area(hull)
    else:
        raise ValueError(
            f"Intensity and size not understood. Provide size as a tuple"
            f" containing (number of observations, number of simulations)"
            f" with no specified intensity, or an intensity and size equal"
            f" to the number of simulations."
            f" Recieved: `intensity={intensity}, size={size}`"
        )
    return (n_observations, n_simulations, intensity)


# ------------------------------------------------------------ #
# Distributions                                                #
# ------------------------------------------------------------ #


[docs] def poisson(hull, intensity=None, size=None, rng=None): """ Simulate a poisson random point process with a specified intensity. Parameters ---------- hull : A geometry-like object This encodes the "space" in which to simulate the normal pattern. All points will lie within this hull. Supported values are: - a bounding box encoded in a numpy array as numpy.array([xmin, ymin, xmax, ymax]) - an (N,2) array of points for which the bounding box will be computed & used - a shapely polygon/multipolygon - a scipy convexh hull intensity : float the number of observations per unit area in the hull to use. If provided, then size must be an integer describing the number of replications to use. size : tuple or int a tuple of (n_observations, n_replications), where the first number is the number of points to simulate in each replication and the second number is the number of total replications. So, (10, 4) indicates 10 points, 4 times. If an integer is provided and intensity is None, n_replications is assumed to be 1. If size is an integer and intensity is also provided, then size indicates n_replications, and the number of observations is computed from the intensity. rng : int, numpy.random.Generator, or None, optional A source of randomness. This can be: - A `numpy.random.Generator` instance (recommended) - An `int` seed, used to initialize a new Generator - `None` (default), which uses a new `numpy.random.default_rng()` instance This interface follows Scientific Python SPEC 7, ensuring consistent and reproducible random number generation across libraries. Returns ------- : numpy.ndarray either an (n_replications, n_observations, 2) or (n_observations,2) array containing the simulated realizations. """ if isinstance(hull, numpy.ndarray): if hull.shape == (4,): hull = hull else: hull = _prepare_hull(hull) n_observations, n_simulations, intensity = parse_size_and_intensity( hull, intensity=intensity, size=size ) result = numpy.empty((n_simulations, n_observations, 2)) bbox = _bbox(hull) rng = numpy.random.default_rng(rng) for i_replication in range(n_simulations): generating = True i_observation = 0 while i_observation < n_observations: x, y = ( rng.uniform(bbox[0], bbox[2]), rng.uniform(bbox[1], bbox[3]), ) if _contains(hull, x, y): result[i_replication, i_observation] = (x, y) i_observation += 1 return result.squeeze()
[docs] def normal(hull, center=None, cov=None, size=None, rng=None): """ Simulate a multivariate random normal point cluster Parameters ---------- hull : A geometry-like object This encodes the "space" in which to simulate the normal pattern. All points will lie within this hull. Supported values are: - a bounding box encoded in a numpy array as numpy.array([xmin, ymin, xmax, ymax]) - an (N,2) array of points for which the bounding box will be computed & used - a shapely polygon/multipolygon - a scipy convexh hull center : iterable of shape (2, ) A point where the simulations will be centered. cov : float or a numpy array of shape (2,2) either the standard deviation of an independent and identically distributed normal distribution, or a 2 by 2 covariance matrix expressing the covariance of the x and y for the distribution. Default is half of the width or height of the hull's bounding box, whichever is larger. size : tuple or int a tuple of (n_observations, n_replications), where the first number is the number of points to simulate in each replication and the second number is the number of total replications. So, (10, 4) indicates 10 points, 4 times. If an integer is provided, n_replications is assumed to be 1. rng : int, numpy.random.Generator, or None, optional A source of randomness. This can be: - A `numpy.random.Generator` instance (recommended) - An `int` seed, used to initialize a new Generator - `None` (default), which uses a new `numpy.random.default_rng()` instance This interface follows Scientific Python SPEC 7, ensuring consistent and reproducible random number generation across libraries. Returns ------- : numpy.ndarray either an (n_replications, n_observations, 2) or (n_observations,2) array containing the simulated realizations. """ if isinstance(hull, numpy.ndarray): if hull.shape == (4,): hull = hull else: hull = _prepare_hull(hull) if center is None: center = _centroid(hull) n_observations, n_simulations, intensity = parse_size_and_intensity( hull, intensity=None, size=size ) if cov is None: bbox = _bbox(hull) width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] cov = numpy.maximum(width / 2, height / 2) ** 2 if isinstance(cov, (int, float)): sd = cov cov = numpy.eye(2) * sd elif isinstance(cov, numpy.ndarray): if cov.ndim == 2: assert cov.shape == (2, 2), "Bivariate covariance matrices must be 2 by 2" elif cov.ndim == 3: assert cov.shape[1:] == ( 2, 2, ), "3-dimensional covariance matrices must have shape (n_simulations, 2,2)" assert ( cov.shape[0] == n_simulations ), "3-dimensional covariance matrices must have shape (n_simulations, 2,2)" else: raise ValueError( "`cov` argument must be a float (signifying a standard deviation)" " or a 2 by 2 array expressing the covariance matrix of the " " bivariate normal distribution." ) result = numpy.empty((n_simulations, n_observations, 2)) bbox = _bbox(hull) rng = numpy.random.default_rng(rng) for i_replication in range(n_simulations): generating = True i_observation = 0 replication_cov = cov[i] if cov.ndim == 3 else cov replication_sd = numpy.diagonal(replication_cov) ** 0.5 replication_cor = (1 / replication_sd) * replication_cov * (1 / replication_sd) while i_observation < n_observations: # candidate = numpy.random.multivariate_normal((0, 0), replication_cor) candidate = rng.multivariate_normal((0, 0), replication_cor) x, y = center + candidate * replication_sd if _contains(hull, x, y): result[i_replication, i_observation] = (x, y) i_observation += 1 return result.squeeze()
[docs] def cluster_poisson( hull, intensity=None, size=None, n_seeds=2, cluster_radius=None, rng=None ): """ Simulate a cluster poisson random point process with a specified intensity & number of seeds. A cluster poisson process is a poisson process where the center of each "cluster" is itself distributed according to a spatial poisson process. Parameters ---------- hull : A geometry-like object This encodes the "space" in which to simulate the normal pattern. All points will lie within this hull. Supported values are: - a bounding box encoded in a numpy array as numpy.array([xmin, ymin, xmax, ymax]) - an (N,2) array of points for which the bounding box will be computed & used - a shapely polygon/multipolygon - a scipy convexh hull intensity : float the number of observations per unit area in the hull to use. If provided, then size must be an integer describing the number of replications to use. size : tuple or int a tuple of (n_observations, n_replications), where the first number is the number of points to simulate in each replication and the second number is the number of total replications. So, (10, 4) indicates 10 points, 4 times. If an integer is provided and intensity is None, n_replications is assumed to be 1. If size is an integer and intensity is also provided, then size indicates n_replications, and the number of observations is computed from the intensity. n_seeds : int the number of sub-clusters to use. cluster_radius : float or iterable the radius of each cluster. If a float, the same radius is used for all clusters. If an array, then there must be the same number of radii as clusters. If None, 50% of the minimum inter-point distance is used, which may fluctuate across replications. rng : int, numpy.random.Generator, or None, optional A source of randomness. This can be: - A `numpy.random.Generator` instance (recommended) - An `int` seed, used to initialize a new Generator - `None` (default), which uses a new `numpy.random.default_rng()` instance This interface follows Scientific Python SPEC 7, ensuring consistent and reproducible random number generation across libraries. Returns ------- : numpy.ndarray either an (n_replications, n_observations, 2) or (n_observations,2) array containing the simulated realizations. """ if isinstance(hull, numpy.ndarray): if hull.shape == (4,): hull = hull else: hull = _prepare_hull(hull) if isinstance(cluster_radius, numpy.ndarray): cluster_radii = cluster_radius.flatten() assert len(cluster_radii) == n_seeds, ( f"number of radii provided ({len(cluster_radii)})" f"does not match number of clusters requested" f" ({n_seeds})." ) elif isinstance(cluster_radius, (int, float)): cluster_radii = [cluster_radius] * n_seeds n_observations, n_simulations, intensity = parse_size_and_intensity( hull, intensity=intensity, size=size ) result = numpy.empty((n_simulations, n_observations, 2)) hull_area = _area(hull) rng = numpy.random.default_rng(rng) center_seeds = rng.integers(100_000, size=n_simulations) for i_replication in range(n_simulations): seeds = poisson(hull, size=n_seeds, rng=[center_seeds[i_replication]]) if cluster_radius is None: # default cluster radius is one half the minimum distance between seeds cluster_radii = [spatial.distance.pdist(seeds).min() * 0.5] * n_seeds clusters = numpy.array_split(result[i_replication], n_seeds) for i_cluster, radius in enumerate(cluster_radii): seed = seeds[i_cluster] cluster_points = clusters[i_cluster] n_in_cluster = len(cluster_points) if n_in_cluster == 1: clusters[i_cluster] = seed continue if n_in_cluster < 1: raise Exception( "There are too many clusters requested for the " " inputted number of samples. Reduce `n_seeds` or" " increase the number of sampled points." ) candidates = _uniform_circle( n_in_cluster - 1, radius=radius, center=seed, hull=hull, rng=rng, ) clusters[i_cluster] = numpy.row_stack((seed, candidates)) result[i_replication] = numpy.row_stack(clusters) return result.squeeze()
[docs] def cluster_normal(hull, cov=None, size=None, n_seeds=2, rng=None): """ Simulate a cluster poisson random point process with a specified intensity & number of seeds. A cluster poisson process is a poisson process where the center of each "cluster" is itself distributed according to a spatial poisson process. Parameters ---------- hull : A geometry-like object This encodes the "space" in which to simulate the normal pattern. All points will lie within this hull. Supported values are: - a bounding box encoded in a numpy array as numpy.array([xmin, ymin, xmax, ymax]) - an (N,2) array of points for which the bounding box will be computed & used - a shapely polygon/multipolygon - a scipy convexh hull cov : float, int, or numpy.ndarray of shape (2,2) The covariance structure for clusters. By default, this is the squared average distance between cluster seeds. size : tuple or int a tuple of (n_observations, n_replications), where the first number is the number of points to simulate in each replication and the second number is the number of total replications. So, (10, 4) indicates 10 points, 4 times. If an integer is provided and intensity is None, n_replications is assumed to be 1. If size is an integer and intensity is also provided, then size indicates n_replications, and the number of observations is computed from the intensity. n_seeds : int the number of sub-clusters to use. rng : int, numpy.random.Generator, or None, optional A source of randomness. This can be: - A `numpy.random.Generator` instance (recommended) - An `int` seed, used to initialize a new Generator - `None` (default), which uses a new `numpy.random.default_rng()` instance This interface follows Scientific Python SPEC 7, ensuring consistent and reproducible random number generation across libraries. Returns ------- : numpy.ndarray either an (n_replications, n_observations, 2) or (n_observations,2) array containing the simulated realizations. """ if isinstance(hull, numpy.ndarray): if hull.shape == (4,): hull = hull else: hull = _prepare_hull(hull) n_observations, n_simulations, intensity = parse_size_and_intensity( hull, intensity=None, size=size ) result = numpy.empty((n_simulations, n_observations, 2)) rng = numpy.random.default_rng(rng) seeds = rng.integers(100_000, size=n_simulations) for i_replication in range(n_simulations): centers = poisson(hull, size=n_seeds, rng=seeds[i_replication]) if cov is None: cov = spatial.distance.pdist(centers).mean() ** 2 clusters = numpy.array_split(result[i_replication], n_seeds) candidate_seeds = rng.integers(100_000, size=n_seeds) for i_cluster, center in enumerate(centers): cluster_points = clusters[i_cluster] n_in_cluster = len(cluster_points) if n_in_cluster == 1: clusters[i_cluster] = center continue if n_in_cluster < 1: raise Exception( "There are too many clusters requested for the " " inputted number of samples. Reduce `n_seeds` or" " increase the number of sampled points." ) candidates = normal( hull, center=center, cov=cov, size=n_in_cluster - 1, rng=candidate_seeds[i_cluster], ) clusters[i_cluster] = numpy.row_stack((center, candidates)) result[i_replication] = numpy.row_stack(clusters) return result.squeeze()
def _uniform_circle( n, radius=1.0, center=(0.0, 0.0), burn=2, verbose=False, hull=None, seed=None, rng=None, ): """ Generate n points within a circle of given radius. Parameters ---------- n : int Number of points. radius : float Radius of the circle. center : tuple Coordinates of the center. burn : int number of coordinates to simulate at a time. This is the "chunk" size sent to numpy.random.uniform each iteration of the rejection sampler seed : int or None, optional A seed to initialize the NumPy default random number generator (`numpy.random.default_rng`). If `None` (the default), the generator is initialized with entropy from the operating system, producing different sequences each time. Setting a specific integer seed ensures that the sequence of random numbers is reproducible. rng : int, numpy.random.Generator, or None, optional A source of randomness. This can be: - A `numpy.random.Generator` instance (recommended) - An `int` seed, used to initialize a new Generator - `None` (default), which uses a new `numpy.random.default_rng()` instance This interface follows Scientific Python SPEC 7, ensuring consistent and reproducible random number generation across libraries. Returns ------- : array (n, 2), coordinates of generated points """ good = numpy.zeros((n, 2), float) c = 0 center_x, center_y = center r = radius r2 = r * r it = 0 if rng is None: rng = numpy.random.default_rng(seed) while c < n: x = rng.uniform(-r, r, (burn * n, 1)) y = rng.uniform(-r, r, (burn * n, 1)) if hull is None: in_hull = True else: in_hull = numpy.asarray( [ _contains(hull, xi + center_x, yi + center_y) for xi, yi in numpy.column_stack((x, y)) ] ).reshape(-1, 1) ids, *_ = numpy.where(((x * x + y * y) <= r2) & in_hull) candidates = numpy.hstack((x, y))[ids] nc = candidates.shape[0] need = n - c if nc > need: # more than we need good[c:] = candidates[:need] else: # use them all and keep going good[c : c + nc] = candidates c += nc it += 1 if verbose: print("Iterations: {}".format(it)) return good + numpy.asarray(center) def _pairwise_count_kdtree(points, r): """Count number of pairs within distance r using KDTree""" tree = cKDTree(points) pairs = tree.query_pairs(r) return len(pairs) def strauss( hull, intensity=None, size=None, gamma=0.1, r=0.05, n_iter=5000, rng=None, max_iter=10, ): """ Simulate a realization of the Strauss spatial point process using Metropolis-Hastings within a Gibbs sampler. Parameters ---------- hull : array-like or geometry-like The spatial domain in which to simulate the point pattern. Supported formats include: - A bounding box as a NumPy array: np.array([xmin, ymin, xmax, ymax]) - An (N, 2) array of 2D points (bounding box is inferred) - A Shapely Polygon or MultiPolygon - A SciPy ConvexHull intensity : float, optional Target number of points per unit area. Used to derive the number of points if `size` is not explicitly given. Default is 100. size : int or tuple of (int, int), optional Either the number of points to generate (if `intensity` is None), or a tuple (n_observations, n_replications). If an integer and `intensity` is provided, it is interpreted as the number of replications. gamma : float, optional Strauss interaction parameter: - `gamma < 1`: inhibition (repulsion) - `gamma = 1`: homogeneous Poisson process (no interaction) - `gamma > 1`: clustering Default is 0.1. r : float, optional Interaction radius. Pairs of points closer than this distance contribute to the Strauss interaction term. Default is 0.05. n_iter : int, optional Number of iterations for the Metropolis-Hastings sampler per replication. Default is 5000. rng : int, numpy.random.Generator, or None, optional A source of randomness. This can be: - A `numpy.random.Generator` instance (recommended) - An `int` seed, used to initialize a new Generator - `None` (default), which uses a new `numpy.random.default_rng()` instance This interface follows Scientific Python SPEC 7, ensuring consistent and reproducible random number generation across libraries. max_iter : int, optional Maximum number of regeneration attempts per replication if a valid point pattern of the desired size is not achieved. Default is 10. Returns ------- numpy.ndarray A simulated realization of the Strauss point process: - If `n_replications` > 1: shape (n_replications, n_observations, 2) - If `n_replications` = 1: shape (n_observations, 2) Raises ------ RuntimeError If a valid pattern with the desired number of points cannot be generated within `max_iter` attempts. This may indicate that the parameter combination (especially low `gamma` and large `r`) makes the configuration too restrictive. Notes ----- The Strauss process models spatial inhibition or clustering via pairwise interactions. This implementation uses a Metropolis-Hastings sampler with biased birth-death moves to encourage convergence toward the desired number of points. """ if isinstance(hull, numpy.ndarray): if hull.shape != (4,): hull = _prepare_hull(hull) n_observations, n_simulations, intensity = parse_size_and_intensity( hull, intensity=intensity, size=size ) result = numpy.empty((n_simulations, n_observations, 2)) rng = numpy.random.default_rng(rng) for i_replication in range(n_simulations): found = False gen_iter = 0 while not found and gen_iter < max_iter: # Over-sample initially to improve convergence initial_n = int(n_observations * 1.5) points = poisson(hull, intensity=initial_n, rng=rng) for _ in range(n_iter): if len(points) < n_observations or rng.random() < 0.5: new_point = poisson(hull, 1, rng=rng) trial_points = numpy.vstack([points, new_point]) k_old = _pairwise_count_kdtree(points, r) k_new = _pairwise_count_kdtree(trial_points, r) ratio = n_observations * (gamma ** (k_new - k_old)) ratio /= len(points) + 1 if rng.random() < min(1, ratio): points = trial_points else: # death move if len(points) > n_observations: idx = rng.integers(0, len(points)) trial_points = numpy.delete(points, idx, axis=0) k_old = _pairwise_count_kdtree(points, r) k_new = _pairwise_count_kdtree(trial_points, r) den = n_observations * (gamma ** (k_old - k_new)) if den == 0: raise RuntimeError( "The number of points requested is too large for the specified " "values of gamma and radius. Try reducing `intensity` or increasing " "`gamma`, `radius`, or `max_iter`." ) ratio = len(points) / den if rng.random() < min(1, ratio): points = trial_points if points.shape[0] >= n_observations: result[i_replication] = points[:n_observations] found = True gen_iter += 1 if not found: raise RuntimeError( "The number of points requested is too large for the specified " "values of gamma and radius. Try reducing `intensity` or increasing " "`gamma`, `radius`, or `max_iter`." ) return result.squeeze()