Source code for utils.region_manager

#!/usr/bin/env python3
"""
Region Manager - Spatial region handling for EEPAS.

This module provides unified management of testing region and neighborhood region,
supporting both grid and polygon region types.

Region Types:
    - 'grid': Rectangular grid (e.g., CELLE_ter format)
    - 'polygon': Polygon boundary (e.g., CPTI15 format)

Regional Functions:
    - Testing Region (R): Used for likelihood calculation and forecast output
    - Neighborhood Region (N): Used for edge effect compensation

Examples:
    Single grid region (both regions use same grid)::

        mgr = RegionManager(celle_data, celle_data, 'grid', 'grid')

    Testing grid with neighborhood polygon::

        mgr = RegionManager(celle_data, cpti15_polygon, 'grid', 'polygon')
"""

import numpy as np



[docs]
class RegionManager:
    """
    Region Manager - Handles spatial determination for testing and neighborhood regions

    Attributes:
        testing_region: Testing region data (grid or polygon)
        neighborhood_region: Neighborhood region data (grid or polygon)
        testing_type: 'grid' or 'polygon'
        neighborhood_type: 'grid' or 'polygon'
    """


[docs]
    def __init__(self, testing_region_data, neighborhood_region_data=None,
                 testing_type='grid', neighborhood_type='grid'):
        """
        Initialize region manager.

        Args:
            testing_region_data: Testing region data

                - If type='grid': numpy array (N×10), column format is [x_min, x_max, y_min, y_max, ..., cell_id, ...]
                - If type='polygon': numpy array (M×2 or M×4), vertex coordinates

            neighborhood_region_data: Neighborhood region data (optional)

                - If None, then neighborhood = testing (same region for both)
                - Same format as testing_region_data

            testing_type: 'grid' or 'polygon'
            neighborhood_type: 'grid' or 'polygon'
        """
        self.testing_region = testing_region_data
        self.testing_type = testing_type

        # Backward compatibility: if neighborhood region not provided, use testing region
        if neighborhood_region_data is None:
            self.neighborhood_region = testing_region_data
            self.neighborhood_type = testing_type
        else:
            self.neighborhood_region = neighborhood_region_data
            self.neighborhood_type = neighborhood_type

        # Preprocessing: extract polygon vertices (if polygon)
        if self.testing_type == 'polygon':
            self.testing_polygon_vertices = self._extract_polygon_vertices(
                self.testing_region
            )

        if self.neighborhood_type == 'polygon':
            self.neighborhood_polygon_vertices = self._extract_polygon_vertices(
                self.neighborhood_region
            )

        # Preprocessing: extract grid boundaries (if grid)
        if self.testing_type == 'grid':
            self.testing_grid_bounds = self._extract_grid_bounds(
                self.testing_region
            )

        if self.neighborhood_type == 'grid':
            self.neighborhood_grid_bounds = self._extract_grid_bounds(
                self.neighborhood_region
            )



    def _extract_polygon_vertices(self, polygon_data):
        """
        Extract vertex coordinates from polygon data.

        Args:
            polygon_data: numpy array
                - If (N×2): directly [x, y] vertices
                - If (N×4): [lon, lat, x, y], use last two columns

        Returns:
            numpy array (N×2): [x, y] vertex coordinates
        """
        if polygon_data.shape[1] == 2:
            # Already in [x, y] format
            return polygon_data
        elif polygon_data.shape[1] == 4:
            # [lon, lat, x, y] format, use projected coordinates x, y
            return polygon_data[:, 2:4]
        else:
            raise ValueError(
                f"Unsupported polygon data format, number of columns: {polygon_data.shape[1]}"
            )


    def _extract_grid_bounds(self, grid_data):
        """
        Extract boundary information from grid data.

        Args:
            grid_data: numpy array (N×10 or more)
                Column format: [x_min, x_max, y_min, y_max, ...]

        Returns:
            numpy array (N×4): [x_min, x_max, y_min, y_max]
        """
        if grid_data.shape[1] < 4:
            raise ValueError(
                f"Insufficient grid data columns, need at least 4, current: {grid_data.shape[1]}"
            )

        return grid_data[:, 0:4]



[docs]
    def is_in_testing_region(self, x, y):
        """
        Determine if point (x, y) is within testing region.

        Args:
            x: X coordinate (can be scalar or array)
            y: Y coordinate (can be scalar or array)

        Returns:
            bool or numpy array of bool
        """
        if self.testing_type == 'grid':
            return self._is_in_grid(x, y, self.testing_grid_bounds)
        elif self.testing_type == 'polygon':
            return self._is_in_polygon(x, y, self.testing_polygon_vertices)
        else:
            raise ValueError(f"Unsupported testing region type: {self.testing_type}")




[docs]
    def is_in_neighborhood_region(self, x, y):
        """
        Determine if point (x, y) is within neighborhood region.

        Args:
            x: X coordinate (can be scalar or array)
            y: Y coordinate (can be scalar or array)

        Returns:
            bool or numpy array of bool
        """
        if self.neighborhood_type == 'grid':
            return self._is_in_grid(x, y, self.neighborhood_grid_bounds)
        elif self.neighborhood_type == 'polygon':
            return self._is_in_polygon(x, y, self.neighborhood_polygon_vertices)
        else:
            raise ValueError(
                f"Unsupported neighborhood region type: {self.neighborhood_type}"
            )



    def _is_in_grid(self, x, y, grid_bounds):
        """
        Determine if point is within rectangular grid (any cell).

        Args:
            x, y: Coordinates (scalar or array)
            grid_bounds: numpy array (N×4) [x_min, x_max, y_min, y_max]

        Returns:
            bool or numpy array of bool
        """
        x = np.atleast_1d(x)
        y = np.atleast_1d(y)

        # Vectorized determination: check if each point is in any grid cell
        in_grid = np.zeros(len(x), dtype=bool)

        for i in range(len(x)):
            # For each point, check if it's in any grid cell
            in_any_cell = np.any(
                (x[i] >= grid_bounds[:, 0]) &
                (x[i] <= grid_bounds[:, 1]) &
                (y[i] >= grid_bounds[:, 2]) &
                (y[i] <= grid_bounds[:, 3])
            )
            in_grid[i] = in_any_cell

        # If input is scalar, return scalar
        if len(in_grid) == 1:
            return bool(in_grid[0])

        return in_grid


    def _is_in_polygon(self, x, y, polygon_vertices):
        """
        Determine if point is within polygon - Ray Casting Algorithm.

        Principle:

            - Cast a ray from the point to the right, count intersections with polygon boundary
            - Odd number of intersections → point inside polygon
            - Even number of intersections → point outside polygon

        Boundary handling:

            - Point exactly on vertex → treat as outside
            - Point exactly on edge → treat as outside
            - This conservative approach avoids incorrectly including boundary earthquakes

        Args:
            x, y: Coordinates (scalar or array)
            polygon_vertices: numpy array (N×2) polygon vertices [x, y]

        Returns:
            bool or numpy.ndarray: Boolean indicating if point(s) inside polygon

        Reference:
            https://en.wikipedia.org/wiki/Point_in_polygon
        """
        x = np.atleast_1d(x)
        y = np.atleast_1d(y)

        n_vertices = len(polygon_vertices)
        inside = np.zeros(len(x), dtype=bool)

        eps = 1e-10  # Numerical tolerance for boundary detection

        for i in range(len(x)):
            xi, yi = x[i], y[i]

            # Check if on vertex (treat as outside)
            is_on_vertex = False
            for vx, vy in polygon_vertices:
                if abs(xi - vx) < eps and abs(yi - vy) < eps:
                    is_on_vertex = True
                    break

            if is_on_vertex:
                inside[i] = False
                continue

            # Ray casting algorithm
            crosses = 0
            for j in range(n_vertices):
                # Two vertices of current edge
                x1, y1 = polygon_vertices[j]
                x2, y2 = polygon_vertices[(j + 1) % n_vertices]

                # Check if ray intersects with edge
                # Condition: point's y coordinate is within edge's y range
                if ((y1 > yi) != (y2 > yi)):
                    # Calculate x coordinate of ray-edge intersection
                    x_intersect = (x2 - x1) * (yi - y1) / (y2 - y1) + x1

                    # If intersection is to the right of point, count+1
                    if xi < x_intersect:
                        crosses += 1

            # Odd number of intersections → inside polygon
            inside[i] = (crosses % 2 == 1)

        # If input is scalar, return scalar
        if len(inside) == 1:
            return bool(inside[0])

        return inside



[docs]
    def filter_events_for_learning(self, catalog, x_col=7, y_col=6):
        """
        Filter events for learning: events within neighborhood region.

        During PPE/EEPAS learning, use all events within neighborhood as source
        events to avoid edge effects.

        Args:
            catalog: numpy array (N×M) earthquake catalog
            x_col: X coordinate column (default 7, HORUS format)
            y_col: Y coordinate column (default 6, HORUS format)

        Returns:
            numpy array: Filtered catalog
        """
        x = catalog[:, x_col]
        y = catalog[:, y_col]

        mask = self.is_in_neighborhood_region(x, y)

        return catalog[mask]




[docs]
    def filter_events_for_targets(self, catalog, x_col=7, y_col=6):
        """
        Filter target events: events within testing region.

        During likelihood calculation and forecasting, only use/output events
        within the testing region.

        Args:
            catalog: numpy array (N×M) earthquake catalog
            x_col: X coordinate column (default 7, HORUS format)
            y_col: Y coordinate column (default 6, HORUS format)

        Returns:
            numpy array: Filtered catalog
        """
        x = catalog[:, x_col]
        y = catalog[:, y_col]

        mask = self.is_in_testing_region(x, y)

        return catalog[mask]




[docs]
    def get_active_cells(self):
        """
        Get active cell list (only when testing region is grid type).

        Returns:
            numpy array (N×4): [x_min, x_max, y_min, y_max] for each cell

        Raises:
            ValueError: If testing region is not grid type
        """
        if self.testing_type != 'grid':
            raise ValueError(
                f"get_active_cells() only supports grid-type testing region, "
                f"current type: {self.testing_type}"
            )

        return self.testing_grid_bounds




[docs]
    def get_n_active_cells(self):
        """
        Get number of active cells.

        Returns:
            int: Number of cells (returns number of vertices if polygon)
        """
        if self.testing_type == 'grid':
            return len(self.testing_grid_bounds)
        elif self.testing_type == 'polygon':
            return len(self.testing_polygon_vertices)
        else:
            raise ValueError(f"Unknown type: {self.testing_type}")




[docs]
    def get_region_summary(self):
        """
        Get region manager summary information (for debugging).

        Returns:
            dict: Dictionary containing region types, cell counts, and other metadata
        """
        summary = {
            'testing_type': self.testing_type,
            'neighborhood_type': self.neighborhood_type,
            'n_testing_cells': self.get_n_active_cells(),
        }

        if self.neighborhood_type == 'grid':
            summary['n_neighborhood_cells'] = len(self.neighborhood_grid_bounds)
        elif self.neighborhood_type == 'polygon':
            summary['n_neighborhood_vertices'] = len(self.neighborhood_polygon_vertices)

        return summary