4. API 参考#
警告
常见错误: AttributeError: 'numpy.ndarray' object has no attribute 'columns'
当您向 Bgolearn 方法传递 numpy 数组而不是 pandas DataFrames 时会发生此错误。
解决方案: 始终使用 pandas DataFrames 和 Series:
data_matrix→pd.DataFrameMeasured_response→pd.Seriesvirtual_samples→pd.DataFrame
永远不要使用 .values - 它会将 DataFrames 转换为 numpy 数组!
4.1. 核心类#
4.1.1. BGOsampling.Bgolearn#
提供贝叶斯全局优化功能的主要优化类。
from Bgolearn import BGOsampling
class Bgolearn:
"""
Bayesian Global Optimization for materials discovery and scientific research.
This class implements various acquisition functions and provides tools for
efficient experimental design and optimization.
"""
4.1.1.1. 构造函数#
def __init__(self):
"""
Initialize Bgolearn optimizer.
Creates necessary directories and displays welcome information.
The optimizer is configured with default settings and can be customized
through the fit() method parameters.
"""
4.1.1.2. 核心方法#
4.1.1.2.1. fit()#
def fit(self, data_matrix, Measured_response, virtual_samples,
Mission='Regression', Classifier='GaussianProcess', noise_std=None,
Kriging_model=None, opt_num=1, min_search=True, CV_test=False,
Dynamic_W=False, seed=42, Normalize=True):
"""
Fit the Bayesian optimization model and recommend next experiments.
Parameters
----------
data_matrix : pandas.DataFrame
训练输入特征 (n_samples, n_features)
**必须是 DataFrame** - numpy 数组会导致 AttributeError
Measured_response : pandas.Series
训练目标值 (n_samples,)
**必须是 Series** - numpy 数组可能导致问题
virtual_samples : pandas.DataFrame
优化的候选点 (n_candidates, n_features)
**必须是 DataFrame** - numpy 数组会导致 AttributeError
Mission : str, default='Regression'
任务类型:'Regression' 或 'Classification'
Classifier : str, default='GaussianProcess'
用于分类:'GaussianProcess', 'LogisticRegression',
'NaiveBayes', 'SVM', 'RandomForest'
noise_std : float or ndarray, default=None
高斯过程的噪声水平。如果为 None,则自动估计
Kriging_model : str or callable, default=None
代理模型:'SVM', 'RF', 'AdaB', 'MLP' 或自定义模型
opt_num : int, default=1
下一次迭代推荐的候选数量
min_search : bool, default=True
True 表示最小化,False 表示最大化
CV_test : False, int, or 'LOOCV', default=False
交叉验证设置:
- False:无交叉验证(默认)
- int:k 折交叉验证(例如,10 表示 10 折)
- 'LOOCV':留一交叉验证
Dynamic_W : bool, default=False
是否应用动态重采样
seed : int, default=42
随机种子以确保可重复性
Normalize : bool, default=True
是否归一化输入数据
Returns
-------
Global_min or Global_max : optimization model
优化模型对象,包含以下方法和属性:
- EI():期望提升采集函数
- UCB():上置信界采集函数
- PoI():改进概率采集函数
- virtual_samples_mean:所有候选点的预测值
- virtual_samples_std:预测不确定性
- data_matrix:训练特征
- Measured_response:训练目标
Examples
--------
>>> from Bgolearn import BGOsampling
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample data
>>> X = pd.DataFrame(np.random.randn(20, 3), columns=['x1', 'x2', 'x3'])
>>> y = pd.Series(np.random.randn(20))
>>> candidates = pd.DataFrame(np.random.randn(100, 3), columns=['x1', 'x2', 'x3'])
>>>
>>> # Fit model
>>> optimizer = BGOsampling.Bgolearn()
>>> model = optimizer.fit(
... data_matrix=X, # Pass DataFrame directly
... Measured_response=y, # Pass Series directly
... virtual_samples=candidates, # Pass DataFrame directly
... opt_num=1,
... min_search=True
... )
>>>
>>> # Get recommendation using Expected Improvement
>>> ei_values, recommended_points = model.EI()
>>> next_experiment = recommended_points[0] # First recommendation
"""
4.2. 可用的采集函数(单目标)#
Bgolearn 支持多种采集函数用于不同的优化场景:
4.2.1. 核心采集函数#
4.2.1.1. 期望提升 (EI)#
EI:期望提升
Logarithmic EI:对 EI 应用对数变换
EI_plugin:基于插件的期望提升
Augmented_EI:带有附加参数的增强 EI
EQI:期望分位数提升
Reinterpolation_EI:基于重插值的 EI
4.2.1.2. 基于探索的函数#
UCB:上置信界
PoI:改进概率
4.2.1.3. 基于信息的函数#
PES:预测熵搜索
Knowledge_G:知识梯度
4.2.2. 在 fit() 方法中的使用#
采集函数在 fit() 方法期间自动选择和应用。优化过程内部使用这些函数来推荐下一个实验点。
# Example: Basic optimization with automatic acquisition function selection
from Bgolearn import BGOsampling
optimizer = BGOsampling.Bgolearn()
model = optimizer.fit(
data_matrix=X_train, # Pass DataFrame directly
Measured_response=y_train, # Pass Series directly
virtual_samples=X_candidates, # Pass DataFrame directly
opt_num=1, # Number of recommendations
min_search=True # Minimize objective, min_search=False for maximization
)
# Get the recommended point using EI
ei_values, recommended_points = model.EI()
next_experiment = recommended_points[0]
4.2.3. 使用 BGO_Efficient 的高级用法#
对于想要显式控制采集函数的高级用户:
from Bgolearn.BgolearnFuns.BGO_eval import BGO_Efficient
# Create BGO_Efficient instance (advanced usage)
# This requires more setup and is typically used for research purposes
next_point : numpy.ndarray
具有最大 UCB 的点的坐标
Examples
--------
>>> # Conservative exploration
>>> ucb_values, next_point = model.UCB(alpha=1.0)
>>>
>>> # Aggressive exploration
>>> ucb_values, next_point = model.UCB(alpha=3.0)
"""
4.2.3.1. 改进概率 (PoI)#
def PoI(self, tao=0.01, T=None):
"""
Probability of Improvement acquisition function.
Parameters
----------
tao : float, default=0.01
改进的容差参数
T : float, optional
阈值。如果为 None,则使用最佳观测值。
Returns
-------
poi_values : numpy.ndarray
所有候选点的 PoI 值
next_point : numpy.ndarray
具有最大 PoI 的点的坐标
Examples
--------
>>> # Strict improvement required
>>> poi_values, next_point = model.PoI(tao=0.0)
>>>
>>> # Allow small degradation
>>> poi_values, next_point = model.PoI(tao=0.1)
"""
4.2.3.2. 增强期望提升 (AEI)#
def Augmented_EI(self, alpha=1.0, tao=0.0):
"""
Augmented Expected Improvement for noisy functions.
Parameters
----------
alpha : float, default=1.0
基线选择的权衡系数
tao : float, default=0.0
噪声标准差估计
Returns
-------
aei_values : numpy.ndarray
所有候选点的 AEI 值
next_point : numpy.ndarray
具有最大 AEI 的点的坐标
Examples
--------
>>> # For noisy experiments
>>> aei_values, next_point = model.Augmented_EI(alpha=1.5, tao=0.1)
"""
4.2.3.3. 期望分位数提升 (EQI)#
def EQI(self, beta=0.5):
"""
Expected Quantile Improvement for robust optimization.
Parameters
----------
beta : float, default=0.5
分位数水平 (0 < beta < 1)
0.5 = 中位数,0.25 = 保守,0.75 = 乐观
Returns
-------
eqi_values : numpy.ndarray
所有候选点的 EQI 值
next_point : numpy.ndarray
具有最大 EQI 的点的坐标
Examples
--------
>>> # Optimize median performance
>>> eqi_values, next_point = model.EQI(beta=0.5)
>>>
>>> # Conservative optimization (25th percentile)
>>> eqi_values, next_point = model.EQI(beta=0.25)
"""
4.2.3.4. 预测熵搜索 (PES)#
def PES(self, sam_num=100):
"""
Predictive Entropy Search for information-theoretic optimization.
Parameters
----------
sam_num : int, default=100
用于熵估计的蒙特卡洛样本数
Returns
-------
pes_values : numpy.ndarray
所有候选点的 PES 值
next_point : numpy.ndarray
具有最大 PES 的点的坐标
Examples
--------
>>> # Standard precision
>>> pes_values, next_point = model.PES(sam_num=100)
>>>
>>> # High precision (slower)
>>> pes_values, next_point = model.PES(sam_num=500)
"""
4.2.3.5. 知识梯度 (KG)#
def Knowledge_G(self, MC_num=1, Proc_num=1):
"""
Knowledge Gradient for value of information optimization.
Parameters
----------
MC_num : int, default=1
蒙特卡洛样本数
Proc_num : int, default=1
并行进程数(如果支持)
Returns
-------
kg_values : numpy.ndarray
所有候选点的 KG 值
next_point : numpy.ndarray
具有最大 KG 的点的坐标
Examples
--------
>>> # Fast approximation
>>> kg_values, next_point = model.Knowledge_G(MC_num=1)
>>>
>>> # More accurate estimation
>>> kg_values, next_point = model.Knowledge_G(MC_num=5)
"""
4.3. 基本可视化#
Bgolearn 不包含内置的可视化方法。但是,您可以使用 matplotlib 轻松创建可视化:
import matplotlib.pyplot as plt
# Example: Plot Expected Improvement values
ei_values, recommended_points = model.EI()
plt.figure(figsize=(10, 6))
plt.plot(ei_values)
plt.title('Expected Improvement Values')
plt.xlabel('Candidate Index')
plt.ylabel('EI Value')
plt.axvline(x=np.argmax(ei_values), color='red', linestyle='--',
label=f'Best EI (idx={np.argmax(ei_values)})')
plt.legend()
plt.grid(True)
plt.show()
# Example: Plot predictions vs uncertainties
predictions = model.virtual_samples_mean
uncertainties = model.virtual_samples_std
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(predictions, uncertainties, alpha=0.6)
plt.xlabel('Predicted Value')
plt.ylabel('Prediction Uncertainty')
plt.title('Prediction vs Uncertainty')
plt.grid(True)
plt.subplot(1, 2, 2)
plt.hist(predictions, bins=20, alpha=0.7)
plt.xlabel('Predicted Value')
plt.ylabel('Frequency')
plt.title('Distribution of Predictions')
plt.grid(True)
plt.tight_layout()
plt.show()
4.4. 可视化注意事项#
备注
Bgolearn 专注于优化算法而不是可视化。对于高级绘图:
使用 matplotlib 进行基本绘图(如上所示)
使用 seaborn 进行统计可视化
使用 plotly 进行交互式绘图
使用 BgoFace GUI 进行可视化优化工作流
上面的示例展示了如何从 Bgolearn 模型中提取数据并创建自定义可视化。
----------
figsize : tuple, default=(10, 6)
图形大小(宽度,高度),单位为英寸
dpi : int, default=100
图形分辨率
style : str, default='seaborn'
要使用的 Matplotlib 样式
Examples
--------
>>> from bgolearn.visualization import BgolearnVisualizer
>>> visualizer = BgolearnVisualizer(figsize=(12, 8), dpi=150)
"""
#### 可视化方法
```python
def plot_optimization_history(self, y_history, y_true_optimum=None,
title="Optimization History", save_path=None):
"""
Plot optimization convergence history.
Parameters
----------
y_history : list or numpy.ndarray
迭代过程中的最佳目标值
y_true_optimum : float, optional
用于比较的真实最优值
title : str, default="Optimization History"
图表标题
save_path : str, optional
保存图形的路径
Returns
-------
fig : matplotlib.figure.Figure
生成的图形
Examples
--------
>>> history = [1.0, 0.8, 0.5, 0.3, 0.1]
>>> fig = visualizer.plot_optimization_history(
... y_history=history,
... y_true_optimum=0.0,
... title="My Optimization"
... )
"""
def plot_acquisition_function_2d(self, X_candidates, acquisition_values,
X_train, y_train, next_point=None,
title="2D Acquisition Function", save_path=None):
"""
Plot 2D acquisition function heatmap.
Parameters
----------
X_candidates : numpy.ndarray
候选点 (n_candidates, 2)
acquisition_values : numpy.ndarray
采集函数值
X_train : numpy.ndarray
训练输入点
y_train : numpy.ndarray
训练目标值
next_point : numpy.ndarray, optional
要突出显示的下一个推荐点
title : str, default="2D Acquisition Function"
图表标题
save_path : str, optional
保存图形的路径
Returns
-------
fig : matplotlib.figure.Figure
生成的图形
Examples
--------
>>> ei_values, next_point = model.EI()
>>> fig = visualizer.plot_acquisition_function_2d(
... X_candidates, ei_values,
... X_train, y_train,
... next_point=next_point
... )
"""
def plot_pareto_front(self, objectives, labels=None,
title="Pareto Front", save_path=None):
"""
Plot Pareto front for multi-objective optimization.
Parameters
----------
objectives : numpy.ndarray
目标值 (n_points, n_objectives)
labels : list, optional
目标标签
title : str, default="Pareto Front"
图表标题
save_path : str, optional
保存图形的路径
Returns
-------
fig : matplotlib.figure.Figure
生成的图形
Examples
--------
>>> # For 2-objective optimization
>>> objectives = np.column_stack([strength, ductility])
>>> fig = visualizer.plot_pareto_front(
... objectives,
... labels=['Strength', 'Ductility']
... )
"""
4.5. 快速参考#
4.5.1. 常见工作流#
# Basic optimization workflow
from Bgolearn import BGOsampling
# 1. Prepare data
optimizer = BGOsampling.Bgolearn()
model = optimizer.fit(X_train, y_train, X_candidates)
# 2. Single-point optimization
ei_values, next_point = model.EI()
4.5.2. 参数指南#
函数 |
关键参数 |
推荐值 |
|---|---|---|
EI |
T |
None(自动)或自定义阈值 |
UCB |
alpha |
1.0-3.0(越高 = 更多探索) |
PoI |
tao |
0.0-0.1(改进容差) |
AEI |
alpha, tao |
1.0-2.0, 0.0-0.2 |
EQI |
beta |
0.25(保守),0.5(中位数),0.75(乐观) |
PES |
sam_num |
100-500(越高 = 更准确) |
KG |
MC_num |
1-5(越高 = 更准确) |