2025AsianB/org/chatgpt2/q1_anaylis.py

import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import CubicSpline
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
import warnings

from sklearn.metrics.pairwise import cosine_similarity

from org.chatgpt2.q1_2 import wl_max, wl_min

warnings.filterwarnings('ignore')


# -----------------------------
# 1. 通用工具函数（保持不变）
# -----------------------------
def make_strictly_increasing(wl, n, k):
    unique_wl, indices = np.unique(wl, return_index=True)
    if len(unique_wl) != len(wl):
        print(f"Removed {len(wl) - len(unique_wl)} duplicate wavelength points")
        wl, n, k = wl[indices], n[indices], k[indices]
    is_increasing = np.diff(wl) > 0
    if not all(is_increasing):
        valid_indices = np.concatenate([[True], is_increasing])
        wl, n, k = wl[valid_indices], n[valid_indices], k[valid_indices]
    return wl, n, k


def read_split_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = [line.strip() for line in f if line.strip() and not line.startswith('#')]
    split_idx = None
    for i, line in enumerate(lines):
        if line == 'wl	k':
            split_idx = i
            break
    n_lines = lines[1:split_idx]
    wl_n, n_list = [], []
    for line in n_lines:
        wl, n_val = line.split()
        wl_n.append(float(wl)), n_list.append(float(n_val))
    k_lines = lines[split_idx + 1:]
    wl_k, k_list = [], []
    for line in k_lines:
        wl, k_val = line.split()
        wl_k.append(float(wl)), k_list.append(float(k_val))
    wl_n, n_list = np.array(wl_n), np.array(n_list)
    wl_k, k_list = np.array(wl_k), np.array(k_list)
    assert np.allclose(wl_n, wl_k), "n和k的波长列表不一致！"
    sorted_idx = np.argsort(wl_n)
    return wl_n[sorted_idx], n_list[sorted_idx], k_list[sorted_idx]


def fresnel_reflectance(n1, k1, n2, k2):
    m1 = n1 + 1j * k1
    m2 = n2 + 1j * k2
    return np.abs((m1 - m2) / (m1 + m2)) ** 2


def thin_film_emissivity(n_film, k_film, d, wl, n_air=1.0, k_air=0.0, n_sub=1.5, k_sub=0.0, r=0.0):
    m_air = n_air + 1j * k_air
    m_film = n_film + 1j * k_film
    m_sub = n_sub + 1j * k_sub
    R12 = np.abs((m_air - m_film) / (m_air + m_film)) ** 2
    R23 = np.abs((m_film - m_sub) / (m_film + m_sub)) ** 2
    beta = 2 * np.pi * m_film / wl
    delta_complex = beta * d
    alpha = 2 * np.imag(delta_complex)
    sqrt_term = np.sqrt(R12 * R23 * np.exp(-alpha))
    cos_term = np.cos(2 * np.real(delta_complex))
    R_specular = (R12 + R23 * np.exp(-alpha) + 2 * sqrt_term * cos_term) / (
                1 + R12 * R23 * np.exp(-alpha) + 2 * sqrt_term * cos_term)
    R_diffuse = 0.05 * r
    R_total = (1 - r) * R_specular + r * R_diffuse
    T_total = (1 - R12) * (1 - R23) * np.exp(-alpha) / (1 + R12 * R23 * np.exp(-alpha) + 2 * sqrt_term * cos_term)
    return np.clip(1 - R_total - T_total, 0, 1)


# -----------------------------
# 2. 新增：局部相似度计算函数（滑动窗口法）
# -----------------------------
def calculate_local_similarity(eps1, eps2, wl_common, window_size=0.5):
    """
    计算逐波长的局部相似度（滑动窗口法）
    输入：
        eps1/eps2: 两个发射率数组
        wl_common: 统一波长数组
        window_size: 滑动窗口宽度（μm），默认0.5μm（覆盖5个采样点，保证平滑）
    输出：
        local_corr: 逐波长皮尔逊相关系数（局部相似度）
        local_cos_sim: 逐波长余弦相似度
        local_mae: 逐波长局部平均绝对误差（相似度的互补指标）
    """
    n_points = len(wl_common)
    local_corr = np.zeros(n_points)
    local_cos_sim = np.zeros(n_points)
    local_mae = np.zeros(n_points)

    # 滑动窗口计算局部相似度（窗口中心为每个波长点）
    for i in range(n_points):
        # 确定当前窗口的波长范围
        wl_center = wl_common[i]
        window_mask = (wl_common >= wl_center - window_size / 2) & (wl_common <= wl_center + window_size / 2)
        if np.sum(window_mask) < 3:  # 窗口内至少3个点才计算（避免统计无意义）
            local_corr[i] = np.nan
            local_cos_sim[i] = np.nan
            local_mae[i] = np.nan
            continue

        # 提取窗口内的发射率数据
        eps1_window = eps1[window_mask]
        eps2_window = eps2[window_mask]

        # 计算窗口内的相似度指标
        corr, _ = pearsonr(eps1_window, eps2_window)
        cos_sim = cosine_similarity(eps1_window.reshape(1, -1), eps2_window.reshape(1, -1))[0][0]
        mae = np.mean(np.abs(eps1_window - eps2_window))

        # 存储结果（相关系数和余弦相似度归一化到0-1范围）
        local_corr[i] = (corr + 1) / 2  # 原始相关系数-1~1 → 映射为0~1
        local_cos_sim[i] = cos_sim
        local_mae[i] = mae

    return local_corr, local_cos_sim, local_mae


# -----------------------------
# 3. 核心相似性分析函数（新增局部相似度计算）
# -----------------------------
def analyze_spectral_similarity(file1, file2, thicknesses=[1.0], n_sub=1.5, k_sub=0.0, r=0.0, window_size=0.5):
    # Step 1: 数据读取与预处理
    wl1, n1, k1 = read_split_data(file1)
    wl2, n2, k2 = read_split_data(file2)
    wl1, n1, k1 = make_strictly_increasing(wl1, n1, k1)
    wl2, n2, k2 = make_strictly_increasing(wl2, n2, k2)

    # Step 2: 统一波长范围
    wl_min = max(wl1.min(), wl2.min())
    wl_max = min(wl1.max(), wl2.max())
    if wl_min >= wl_max:
        raise ValueError("两个文件的波长范围无交集，无法进行相似性分析！")
    wl_common = np.linspace(wl_min, wl_max, 1000)

    # Step 3: 插值得到统一波长下的n和k
    cs_n1 = CubicSpline(wl1, n1)
    cs_k1 = CubicSpline(wl1, k1)
    cs_n2 = CubicSpline(wl2, n2)
    cs_k2 = CubicSpline(wl2, k2)
    n1_common = cs_n1(wl_common)
    k1_common = cs_k1(wl_common)
    n2_common = cs_n2(wl_common)
    k2_common = cs_k2(wl_common)

    # Step 4: 计算发射率和局部相似度
    emissivity_dict = {}
    local_similarity_dict = {}
    for d in thicknesses:
        eps1 = thin_film_emissivity(n1_common, k1_common, d, wl_common, n_sub=n_sub, k_sub=k_sub, r=r)
        eps2 = thin_film_emissivity(n2_common, k2_common, d, wl_common, n_sub=n_sub, k_sub=k_sub, r=r)
        emissivity_dict[d] = (eps1, eps2)

        # 计算局部相似度
        local_corr, local_cos_sim, local_mae = calculate_local_similarity(eps1, eps2, wl_common, window_size)
        local_similarity_dict[d] = (local_corr, local_cos_sim, local_mae)

    # Step 5: 全局相似性指标计算
    similarity_results = {}
    for d in thicknesses:
        eps1, eps2 = emissivity_dict[d]
        pearson_corr, _ = pearsonr(eps1, eps2)
        cos_sim = cosine_similarity(eps1.reshape(1, -1), eps2.reshape(1, -1))[0][0]
        mse = mean_squared_error(eps1, eps2)
        norm_mse = mse / (np.max([np.var(eps1), np.var(eps2)]) + 1e-8)
        mae = np.mean(np.abs(eps1 - eps2))

        # 大气窗口指标
        window_corr, window_mae = None, None
        if wl_min <= 13 and wl_max >= 8:
            window_mask = (wl_common >= 8) & (wl_common <= 13)
            eps1_window = eps1[window_mask]
            eps2_window = eps2[window_mask]
            window_corr, _ = pearsonr(eps1_window, eps2_window)
            window_mae = np.mean(np.abs(eps1_window - eps2_window))

        similarity_results[d] = {
            "pearson_correlation": pearson_corr,
            "cosine_similarity": cos_sim,
            "normalized_mse": norm_mse,
            "mae": mae,
            "window_pearson_correlation": window_corr,
            "window_mae": window_mae
        }

    # Step 6: 可视化（新增相似度曲线）
    plot_spectral_comparison(wl_common, emissivity_dict, local_similarity_dict, thicknesses, wl_min, wl_max)

    return similarity_results, wl_common, emissivity_dict, local_similarity_dict


# -----------------------------
# 4. 可视化函数（新增相似度曲线子图）
# -----------------------------
def plot_spectral_comparison(wl_common, emissivity_dict, local_similarity_dict, thicknesses, wl_min, wl_max):
    n_plots = len(thicknesses)
    fig, axes = plt.subplots(n_plots, 3, figsize=(18, 5 * n_plots))  # 新增1列用于相似度曲线
    plt.rcParams['font.sans-serif'] = ['Arial']

    for idx, d in enumerate(thicknesses):
        eps1, eps2 = emissivity_dict[d]
        local_corr, local_cos_sim, local_mae = local_similarity_dict[d]
        diff = np.abs(eps1 - eps2)

        # 子图1：发射率光谱对比
        ax1 = axes[idx, 0] if n_plots > 1 else axes[0]
        ax1.plot(wl_common, eps1, linewidth=2, label='data.txt', color='darkblue')
        ax1.plot(wl_common, eps2, linewidth=2, label='data2.txt', color='darkred', linestyle='--')
        if wl_min <= 13 and wl_max >= 8:
            ax1.axvspan(8, 13, alpha=0.15, color='orange', label='Atmospheric Window (8-13 μm)')
        ax1.set_xlabel('Wavelength (μm)', fontsize=12)
        ax1.set_ylabel('Emissivity ε(λ)', fontsize=12)
        ax1.set_title(f'Emissivity Spectrum Comparison (Thickness = {d} μm)', fontsize=14, fontweight='bold')
        ax1.grid(True, alpha=0.3)
        ax1.legend(fontsize=10)
        ax1.set_ylim(0, 1.05)

        # 子图2：发射率差异
        ax2 = axes[idx, 1] if n_plots > 1 else axes[1]
        ax2.plot(wl_common, diff, linewidth=2, color='darkgreen', label='Absolute Difference |ε1 - ε2|')
        ax2.fill_between(wl_common, 0, diff, alpha=0.3, color='darkgreen')
        if wl_min <= 13 and wl_max >= 8:
            ax2.axvspan(8, 13, alpha=0.15, color='orange', label='Atmospheric Window (8-13 μm)')
        ax2.set_xlabel('Wavelength (μm)', fontsize=12)
        ax2.set_ylabel('Absolute Difference', fontsize=12)
        ax2.set_title(f'Emissivity Difference (Thickness = {d} μm)', fontsize=14, fontweight='bold')
        ax2.grid(True, alpha=0.3)
        ax2.legend(fontsize=10)
        ax2.set_ylim(0, np.nanmax(diff) * 1.2)

        # 子图3：相似度曲线（核心新增）
        ax3 = axes[idx, 2] if n_plots > 1 else axes[2]
        # 绘制局部皮尔逊相关系数（归一化到0-1）
        ax3.plot(wl_common, local_corr, linewidth=2.5, color='#4B0082', label='Local Pearson Correlation (0-1)')        # 绘制局部余弦相似度（0-1）
        ax3.plot(wl_common, local_cos_sim, linewidth=2.5, color='orange', label='Local Cosine Similarity (0-1)',
                 linestyle='--')
        # 标注相似度阈值线（0.9为高相似，0.8为中等相似）
        ax3.axhline(y=0.9, color='red', linestyle=':', linewidth=1.5, label='High Similarity Threshold (0.9)')
        ax3.axhline(y=0.8, color='orange', linestyle=':', linewidth=1.5, label='Medium Similarity Threshold (0.8)')
        # 标注大气窗口
        if wl_min <= 13 and wl_max >= 8:
            ax3.axvspan(8, 13, alpha=0.15, color='orange', label='Atmospheric Window (8-13 μm)')
        ax3.set_xlabel('Wavelength (μm)', fontsize=12)
        ax3.set_ylabel('Local Similarity (0-1)', fontsize=12)
        ax3.set_title(f'Wavelength-Dependent Similarity Curve (Thickness = {d} μm)', fontsize=14, fontweight='bold')
        ax3.grid(True, alpha=0.3)
        ax3.legend(fontsize=10)
        ax3.set_ylim(0, 1.05)  # 相似度范围0-1
        ax3.set_xlim(wl_min, wl_max)

    plt.tight_layout()
    plt.savefig('spectral_similarity_complete.png', dpi=300, bbox_inches='tight')
    plt.show()


# -----------------------------
# 5. 主程序执行
# -----------------------------
if __name__ == "__main__":
    file1 = '/Users/spasolreisa/IdeaProjects/asiaMath/data.txt'
    file2 = '/Users/spasolreisa/IdeaProjects/asiaMath/data2.txt'
    target_thicknesses = [0.5, 1.0, 1.5, 2.0]
    base_params = {
        'n_sub': 1.5,
        'k_sub': 0.0,
        'r': 0.05
    }
    window_size = 0.5  # 滑动窗口宽度（可调整，建议0.3-0.8μm）

    print("=== 开始光谱相似性分析（含相似度曲线） ===")
    print(f"文件1: {file1}")
    print(f"文件2: {file2}")
    print(f"分析厚度: {target_thicknesses} μm")
    print(f"滑动窗口宽度: {window_size} μm")
    print("-" * 50)

    results, wl_common, emissivity_dict, local_similarity_dict = analyze_spectral_similarity(
        file1, file2,
        thicknesses=target_thicknesses,
        n_sub=base_params['n_sub'],
        k_sub=base_params['k_sub'],
        r=base_params['r'],
        window_size=window_size
    )

    # 输出全局指标
    print("\n=== 全局相似性指标 ===")
    for d in target_thicknesses:
        res = results[d]
        print(f"\n【厚度 {d} μm】")
        print(f"全局皮尔逊相关系数: {res['pearson_correlation']:.4f}")
        print(f"全局余弦相似度: {res['cosine_similarity']:.4f}")
        print(f"归一化均方误差: {res['normalized_mse']:.4f}")
        print(f"平均绝对误差: {res['mae']:.4f}")
        if res['window_pearson_correlation'] is not None:
            print(f"大气窗口全局相关系数: {res['window_pearson_correlation']:.4f}")

    # 输出局部相似度统计（大气窗口内）
    print("\n=== 大气窗口（8-13μm）局部相似度统计 ===")
    for d in target_thicknesses:
        local_corr, local_cos_sim, _ = local_similarity_dict[d]
        if wl_min <= 13 and wl_max >= 8:
            window_mask = (wl_common >= 8) & (wl_common <= 13)
            window_local_corr = local_corr[window_mask]
            window_local_cos = local_cos_sim[window_mask]
            # 过滤NaN值
            window_local_corr = window_local_corr[~np.isnan(window_local_corr)]
            window_local_cos = window_local_cos[~np.isnan(window_local_cos)]
            if len(window_local_corr) > 0:
                print(f"\n【厚度 {d} μm】")
                print(f"大气窗口局部相关系数均值: {np.mean(window_local_corr):.4f}")
                print(f"大气窗口局部相关系数最小值: {np.min(window_local_corr):.4f}")
                print(f"大气窗口局部余弦相似度均值: {np.mean(window_local_cos):.4f}")

    # 结果解读
    print("\n=== 结果解读 ===")
    print("1. 相似度曲线（子图3）：值越接近1，对应波长下的发射率越相似；")
    print("2. 高相似区域（≥0.9）：两文件在该波段的辐射冷却性能几乎一致；")
    print("3. 低相似区域（<0.8）：需关注该波段的材料差异对冷却效果的影响；")
    print("4. 大气窗口内相似度：优先关注该区域，直接决定辐射冷却核心性能是否一致。")