Source code for eepas_learning_auto_boundary

#!/usr/bin/env python3
"""
EEPAS Parameter Learning with Automatic Boundary Adjustment.

Learn EEPAS model parameters with automatic boundary adjustment to avoid
local optima caused by parameters hitting bounds during optimization.

Defaults: Three-stage + Multistart (3 starts) + fminsearchcon optimizer

Optimization Modes (in priority order):
    1. Custom Stages (if customStages enabled in config)
    2. Single-Stage (if --single-stage specified)
    3. Three-Stage (default, or if --three-stage specified)

Examples:
    Default (three-stage)::

        python3 eepas_learning_auto_boundary.py --config config.json

    Single-stage optimization::

        python3 eepas_learning_auto_boundary.py --config config.json --single-stage

    Custom stages (enable in config)::

        # Set "customStages": {"enable": true, "stages": [...]} in config.json
        python3 eepas_learning_auto_boundary.py --config config_custom.json
"""

import argparse
import json
import sys
from pathlib import Path

from eepas_learning import eepas_learning
from utils.auto_boundary_adjustment import check_boundary_hits, suggest_boundary_adjustment, save_adjusted_config
import numpy as np


[docs] def parse_result_csv(csv_path: str): """ Parse EEPAS result CSV file. Args: csv_path: Path to CSV file Returns: dict: Dictionary containing 9 EEPAS parameters (am, bm, Sm, at, bt, St, ba, Sa, u) and ln_likelihood """ import pandas as pd df = pd.read_csv(csv_path) if len(df) == 0: return None return df.iloc[0].to_dict()
[docs] def eepas_with_auto_boundary( config_path: str, m0: float = None, max_adjustment_rounds: int = 3, disable_boundary_adjustment: bool = False, tolerance: float = 0.01, expansion_factor: float = 2.0, nll_convergence_threshold: float = 0.1, single_stage: bool = True, multi_start: bool = True, n_starts: int = 3, use_basinhopping: bool = False, basinhopping_niter: int = 20, optimizer: str = 'fminsearchcon', ppe_ref_mag: str = None, use_fast_mode: bool = False, magnitude_samples: int = 50, lead_time_days: float = None ): """ EEPAS parameter learning with automatic boundary adjustment. Workflow: 1. Execute EEPAS optimization with current boundaries (three-stage or single-stage) 2. Check if any result parameters hit boundaries 3. If hit, relax boundaries and save new configuration 4. Check if NLL has converged 5. Repeat until convergence or maximum rounds reached Args: config_path: Configuration file path m0: Completeness magnitude (if not specified, uses value from config file) max_adjustment_rounds: Maximum boundary adjustment rounds (default: 3), usually converges in 2-3 rounds tolerance: Boundary hit detection tolerance (default: 0.01 = 1%), parameter distance to boundary < 1% is considered hitting expansion_factor: Boundary expansion factor (default: 2.0), lower bound: new = old / 2.0, upper bound: new = old * 2.0 nll_convergence_threshold: NLL convergence threshold (default: 0.1), two consecutive rounds with improvement < 0.1 → automatic stop single_stage: Whether to use single-stage full parameter optimization (default: True, uses single-stage) multi_start: Whether to use multi-start optimization (default: True) n_starts: Number of multi-start points (default: 3) use_basinhopping: Whether to use basinhopping global optimization (default: False) basinhopping_niter: Number of basinhopping iterations (default: 20) optimizer: Optimizer to use: 'fminsearchcon', 'SLSQP', 'L-BFGS-B' (default: 'fminsearchcon') ppe_ref_mag: PPE reference magnitude: 'mT' or 'm0' (default: None, uses config) use_fast_mode: Whether to use fast magnitude integration (default: False) magnitude_samples: Number of magnitude samples for fast mode (default: 50) lead_time_days: Fixed lead time L in days for FLEEPAS (default: None) - None: Read from config's timeComp.lead_time_days, or use all historical precursors if not set - float: Only use earthquakes within [t-L, t-delay] as precursors (FLEEPAS mode) Returns: dict: Final parameter dictionary containing optimized EEPAS parameters """ print('='*80) if single_stage: print('EEPAS Parameter Learning - Single-Stage Full Optimization + Automatic Boundary Adjustment') else: print('EEPAS Parameter Learning - Three-Stage Optimization + Automatic Boundary Adjustment') print('='*80) print(f'Configuration: {config_path}') print(f'Optimization mode: {"Single-stage full parameter optimization" if single_stage else "Three-stage optimization"}') print(f'Maximum adjustment rounds: {max_adjustment_rounds}') print(f'Boundary tolerance: {tolerance} (parameter distance to boundary <{tolerance*100:.1f}% is considered hitting)') print(f'Expansion factor: {expansion_factor}x') print(f'NLL convergence threshold: {nll_convergence_threshold}') print('='*80) # ===== Load initial configuration ===== original_config_path = config_path # save original config path current_config_path = config_path # current config path in use with open(current_config_path, 'r') as f: config = json.load(f) results_dir = config.get('resultsDir', 'results_decluster') learn_start = config['learnStartYear'] learn_end = config['learnEndYear'] # Construct EEPAS result file path base_dir = Path(results_dir) if not base_dir.is_absolute(): base_dir = Path(current_config_path).parent / base_dir eepas_result_path = base_dir / f'Fitted_par_EEPAS_{learn_start}_{learn_end}.csv' # Record NLL from each round to determine convergence nll_history = [] # ===== Start iterative optimization ===== for round_num in range(1, max_adjustment_rounds + 1): print(f'\n{"="*80}') print(f'Round {round_num} Optimization') print(f'{"="*80}') print(f'Using configuration: {current_config_path}') print(f'{"="*80}') # Reload current config (might be newly adjusted) with open(current_config_path, 'r') as f: config = json.load(f) # Execute EEPAS optimization (call eepas_learning) try: eepas_learning(current_config_path, m0=m0, multi_start=multi_start, n_starts=n_starts, single_stage=single_stage, use_basinhopping=use_basinhopping, basinhopping_niter=basinhopping_niter, optimizer=optimizer, ppe_ref_mag=ppe_ref_mag, use_fast_mode=use_fast_mode, magnitude_samples=magnitude_samples, lead_time_days=lead_time_days) except Exception as e: print(f'❌ Optimization failed: {e}') return None # Check if result file was generated if not eepas_result_path.exists(): print(f'❌ Result file not found: {eepas_result_path}') return None # Parse optimization results (CSV file) result_params = parse_result_csv(str(eepas_result_path)) if result_params is None: print('❌ Failed to parse result file') return None final_nll = result_params.get('ln_likelihood', None) print(f'\n✅ This round optimization complete') print(f' Final NLL = {final_nll:.6f}') # ===== Check NLL convergence ===== nll_history.append(final_nll) # If we have results from two or more rounds, check if NLL improvement is below threshold if len(nll_history) > 1: nll_improvement = abs(nll_history[-2] - nll_history[-1]) print(f' NLL improvement: {nll_improvement:.6f}') # NLL convergence criterion: two consecutive rounds with improvement < 0.1 if nll_improvement < nll_convergence_threshold: print(f'\n{"="*80}') print(f'✅ NLL has converged! Improvement ({nll_improvement:.6f}) < threshold ({nll_convergence_threshold})') print(f' Round {len(nll_history)-1}: NLL = {nll_history[-2]:.6f}') print(f' Round {len(nll_history)}: NLL = {nll_history[-1]:.6f}') print(f' Stopping further adjustments.') print('='*80) return result_params # ===== If boundary adjustment is disabled, return result directly ===== if disable_boundary_adjustment: print('\n' + '='*80) print('✅ Optimization complete (boundary adjustment disabled)') print(f' Final NLL = {final_nll:.6f}') print('='*80) return result_params # ===== Check boundary hits ===== # Load boundary settings (Stage3 for three-stage, Stage1 for single-stage) if 'stage3' in config['optimization']: # Three-stage mode: use stage3 boundaries boundary_config = config['optimization']['stage3'] else: # Single-stage mode: use stage1 boundaries boundary_config = config['optimization']['stage1'] param_names = boundary_config['parameters'] lower_bounds = np.array(boundary_config['lowerBounds']) upper_bounds = np.array(boundary_config['upperBounds']) # Extract 8 EEPAS parameter values from results params_values = [] for name in param_names: if name in result_params: params_values.append(result_params[name]) else: print(f'⚠️ Warning: Parameter {name} not found in results') params_values.append(0.0) params = np.array(params_values) # Check if parameters hit boundaries (using configured tolerance) print(f'\nChecking if Stage3 parameters hit boundaries...') hits = check_boundary_hits(params, lower_bounds, upper_bounds, param_names, tolerance) # If no parameters hit boundaries → optimization successfully completed if not hits: print('\n' + '='*80) print('✅ Optimization successfully completed! No parameters hit boundaries') print(f' Final NLL = {final_nll:.6f}') print('='*80) return result_params # Suggest and apply adjustments (using unified adjustment logic, respecting physical constraints) print(f'\n💡 Suggesting boundary adjustments (expansion factor = {expansion_factor}x):') adjusted = False for param_name, bound_type in hits.items(): idx = param_names.index(param_name) current_value = params[idx] if bound_type == 'lower': old_bound = lower_bounds[idx] # Use unified boundary adjustment function (respecting physical constraints) new_bound = suggest_boundary_adjustment( param_name, current_value, old_bound, 'lower', expansion_factor ) boundary_config['lowerBounds'][idx] = new_bound print(f' {param_name} lower bound: {old_bound:.6f}{new_bound:.6f}') adjusted = True else: # upper old_bound = upper_bounds[idx] # Use unified boundary adjustment function (respecting physical constraints) new_bound = suggest_boundary_adjustment( param_name, current_value, old_bound, 'upper', expansion_factor ) boundary_config['upperBounds'][idx] = new_bound print(f' {param_name} upper bound: {old_bound:.6f}{new_bound:.6f}') adjusted = True if adjusted: # Create new configuration file (do not overwrite original) import shutil # Generate new config file name original_path = Path(original_config_path) stem = original_path.stem # filename without extension suffix = original_path.suffix # file extension new_config_name = f'{stem}_autoadjusted_round{round_num}{suffix}' new_config_path = original_path.parent / new_config_name # Save adjusted config to new file with open(new_config_path, 'w') as f: json.dump(config, f, indent=2) print(f' ✅ Created new config file: {new_config_path}') print(f' 📌 Original config file remains unchanged: {original_config_path}') # Update current config path (next round will use the new config) current_config_path = str(new_config_path) if round_num == max_adjustment_rounds: print(f'\n{"="*80}') print(f'⚠️ Maximum adjustment rounds ({max_adjustment_rounds}) reached, stopping') print(f' Current NLL = {final_nll:.6f}') print(' Recommendation: Check if parameters still hit boundaries, may need manual further adjustment') print('='*80) else: print(f'\n🔄 Boundary issue detected, preparing Round {round_num + 1} optimization...') import time time.sleep(2) # Give user 2 seconds to view information, but no interaction needed return result_params
[docs] def main(): parser = argparse.ArgumentParser( description='EEPAS Parameter Learning - Automatic Boundary Adjustment Version' ) parser.add_argument( '--config', type=str, required=True, help='Configuration file path (JSON format)' ) parser.add_argument( '--m0', type=float, default=None, help='Completeness magnitude (optional, overrides value in config file)' ) parser.add_argument( '--max-rounds', type=int, default=3, help='Maximum boundary adjustment rounds (default: 3)' ) parser.add_argument( '--no-boundary-adjustment', action='store_true', help='Disable automatic boundary adjustment, use fixed boundaries from config' ) parser.add_argument( '--tolerance', type=float, default=0.01, help='Boundary hit tolerance, relative proportion (default: 0.01 = 1%%)' ) parser.add_argument( '--expansion', type=float, default=2.0, help='Boundary expansion factor (default: 2.0)' ) parser.add_argument( '--nll-threshold', type=float, default=0.1, help='NLL convergence threshold (default: 0.1)' ) parser.add_argument( '--single-stage', action='store_true', help='Use single-stage full parameter optimization (8 parameters jointly)' ) parser.add_argument( '--three-stage', action='store_true', help='Use three-stage optimization (stage 1: am,at,Sa,u → stage 2: Sm,bt,St,ba,u → stage 3: all)' ) parser.add_argument( '--no-multistart', action='store_true', help='Disable multi-start search (default: enabled with 3 starting points)' ) parser.add_argument( '--n-starts', type=int, default=3, help='Number of multi-start points (default: 3)' ) parser.add_argument( '--basinhopping', action='store_true', help='Use Basin-Hopping global optimization (escape local optima)' ) parser.add_argument( '--basinhopping-niter', type=int, default=20, help='Basin-hopping iteration count (default: 20)' ) parser.add_argument( '--optimizer', type=str, default='fminsearchcon', choices=['fminsearchcon', 'L-BFGS-B', 'TNC', 'SLSQP', 'Powell'], help='Optimizer selection (default: fminsearchcon)' ) parser.add_argument( '--ppe-ref-mag', type=str, choices=['m0', 'mT'], default='mT', help='PPE reference magnitude: m0 or mT (default: mT, paper version)' ) parser.add_argument( '--accurate', action='store_true', help='Use accurate mode (quad_vec integration, slower but more precise)' ) parser.add_argument( '--magnitude-samples', type=int, default=50, help='Number of sampling points for fast mode (default: 50)' ) parser.add_argument( '--lead-time-days', type=float, default=None, help='Fixed lead time L in days for FLEEPAS mode (default: None, read from config)' ) args = parser.parse_args() # Check for conflicting arguments if args.single_stage and args.three_stage: print('❌ Error: Cannot specify both --single-stage and --three-stage') sys.exit(1) # Determine optimization mode # Priority: # 1. If config has customStages enabled → use custom stages # 2. If --single-stage or --three-stage specified → use explicit mode # 3. Auto-detect from stage1 configuration: # - If stage1 has all 8 parameters → single-stage # - Otherwise → three-stage from utils.data_loader import DataLoader custom_config = DataLoader.load_custom_stages(args.config) if custom_config is not None: # Custom stages enabled in config optimization_mode = 'custom' single_stage_flag = False # Not used for custom stages print(f'🎯 Optimization mode: Custom Stages ({len(custom_config["stages"])} stages)', flush=True) elif args.single_stage: # User explicitly requested single-stage optimization_mode = 'single' single_stage_flag = True print('🎯 Optimization mode: Single-Stage (8 parameters jointly)', flush=True) elif args.three_stage: # User explicitly requested three-stage optimization_mode = 'three' single_stage_flag = False print('🎯 Optimization mode: Three-Stage (am,at,Sa,u → Sm,bt,St,ba,u → all)', flush=True) else: # Auto-detect from stage1 configuration stage1_config_type = DataLoader.detect_stage1_config_type(args.config) if stage1_config_type == 'single': optimization_mode = 'single' single_stage_flag = True print('🎯 Optimization mode: Single-Stage (auto-detected from stage1 config)', flush=True) print(' stage1 contains all 8 parameters → using single-stage optimization', flush=True) else: optimization_mode = 'three' single_stage_flag = False print('🎯 Optimization mode: Three-Stage (auto-detected from stage1 config)', flush=True) print(' stage1 contains partial parameters → using three-stage optimization', flush=True) # Determine which mode to use use_fast_mode = not args.accurate if args.accurate: print('⚠️ Using accurate mode (quad_vec), slower but higher precision', flush=True) else: print(f'✅ Using fast mode (magnitude_samples={args.magnitude_samples})', flush=True) print(f'Using PPE reference magnitude: {args.ppe_ref_mag}', flush=True) result = eepas_with_auto_boundary( config_path=args.config, m0=args.m0, max_adjustment_rounds=1 if args.no_boundary_adjustment else args.max_rounds, disable_boundary_adjustment=args.no_boundary_adjustment, tolerance=args.tolerance, expansion_factor=args.expansion, nll_convergence_threshold=args.nll_threshold, single_stage=single_stage_flag, multi_start=not args.no_multistart, # Default enable multistart, unless --no-multistart specified n_starts=args.n_starts, use_basinhopping=args.basinhopping, basinhopping_niter=args.basinhopping_niter, optimizer=args.optimizer, ppe_ref_mag=args.ppe_ref_mag, use_fast_mode=use_fast_mode, magnitude_samples=args.magnitude_samples, lead_time_days=args.lead_time_days ) if result is not None: print('\nFinal Results Summary:') for key, value in result.items(): if key != 'ln_likelihood': print(f' {key} = {value:.6f}') print(f' NLL = {result["ln_likelihood"]:.6f}') sys.exit(0) else: sys.exit(1)
if __name__ == '__main__': main()