Skip to content

Commit

Permalink
repo-sync-2024-06-19T10:45:51+0800 (#72)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongtianq authored Jun 20, 2024
1 parent a10a98d commit 0215a9a
Show file tree
Hide file tree
Showing 9 changed files with 923 additions and 86 deletions.
2 changes: 2 additions & 0 deletions docs/architecture/apps/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ TrustedFlow内置了多种可信APP,每一个可信APP在执行计算逻辑之
lr_train
xgb_predict
lr_predict
lgbm_train
lgbm_predict
binary_evaluation
prediction_bias_eval

Expand Down
123 changes: 123 additions & 0 deletions docs/architecture/apps/lgbm_predict.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# LightGBM预测

使用给定的LightGBM模型对数据进行预测。

## 组件定义

1. 参数
(1) pred_name: 预测值的列名。
(2) save_label: 输出结果是否包含标签列,true表示保存。
(3) label_name: 标签列的名称,默认为“label”。
(4) save_id: 输出结果是否保存ID列,true表示保存。
(5) id_name: ID列的名称。
(6) col_names: 可选,输出指定的列到结果中,默认为空。
2. 输入:待预测的数据以及LightGBM模型。
3. 输出:预测结果。

```json
{
"domain": "ml.predict",
"name": "lgbm_predict",
"desc": "Predict using the lgbm model.",
"version": "0.0.1",
"attrs": [
{
"name": "pred_name",
"desc": "Column name for predictions.",
"type": "AT_STRING",
"atomic": {
"is_optional": true,
"default_value": {
"s": "pred"
}
}
},
{
"name": "save_label",
"desc": "Whether or not to save real label column into output pred table. If true, input feature_dataset must contain label column.",
"type": "AT_BOOL",
"atomic": {
"is_optional": true,
"default_value": {}
}
},
{
"name": "label_name",
"desc": "Column name for label.",
"type": "AT_STRING",
"atomic": {
"is_optional": true,
"default_value": {
"s": "label"
}
}
},
{
"name": "save_id",
"desc": "Whether to save id column into output pred table. If true, input feature_dataset must contain id column.",
"type": "AT_BOOL",
"atomic": {
"is_optional": true,
"default_value": {}
}
},
{
"name": "id_name",
"desc": "Column name for id.",
"type": "AT_STRING",
"atomic": {
"is_optional": true,
"default_value": {
"s": "id"
}
}
},
{
"name": "col_names",
"desc": "Extra column names into output pred table.",
"type": "AT_STRINGS",
"atomic": {
"list_max_length_inclusive": "-1",
"is_optional": true
}
}
],
"inputs": [
{
"name": "feature_dataset",
"desc": "Input feature dataset.",
"types": [
"sf.table.individual"
],
"attrs": [
{
"name": "ids",
"desc": "Id columns.",
"col_max_cnt_inclusive": "1"
},
{
"name": "label",
"desc": "Label column.",
"col_max_cnt_inclusive": "1"
}
]
},
{
"name": "model",
"desc": "Input model.",
"types": [
"sf.model.lgbm"
]
}
],
"outputs": [
{
"name": "pred",
"desc": "Output prediction.",
"types": [
"sf.table.individual"
]
}
]
}
```
141 changes: 141 additions & 0 deletions docs/architecture/apps/lgbm_train.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# LightGBM训练

使用LightGBM对数据集进行训练,得到LightGBM模型,支持二分类和线性回归。

## 组件定义

```json
{
"domain": "ml.train",
"name": "lgbm_train",
"desc": "LightGBM train component for individual dataset.",
"version": "0.0.1",
"attrs": [
{
"name": "n_estimators",
"desc": "Number of boosted trees to fit.",
"type": "AT_INT",
"atomic": {
"is_optional": true,
"default_value": {
"i64": "10"
},
"lower_bound_enabled": true,
"lower_bound": {
"i64": "1"
},
"lower_bound_inclusive": true,
"upper_bound_enabled": true,
"upper_bound": {
"i64": "1024"
},
"upper_bound_inclusive": true
}
},
{
"name": "objective",
"desc": "Specify the learning objective.",
"type": "AT_STRING",
"atomic": {
"is_optional": true,
"default_value": {
"s": "binary"
},
"allowed_values": {
"ss": [
"regression",
"binary"
]
}
}
},
{
"name": "boosting_type",
"desc": "Boosting type.",
"type": "AT_STRING",
"atomic": {
"is_optional": true,
"default_value": {
"s": "gbdt"
},
"allowed_values": {
"ss": [
"gbdt",
"rf",
"dart"
]
}
}
},
{
"name": "learning_rate",
"desc": "Learning rate.",
"type": "AT_FLOAT",
"atomic": {
"is_optional": true,
"default_value": {
"f": 0.1
},
"lower_bound_enabled": true,
"lower_bound": {},
"upper_bound_enabled": true,
"upper_bound": {
"f": 1
},
"upper_bound_inclusive": true
}
},
{
"name": "num_leaves",
"desc": "Max number of leaves in one tree.",
"type": "AT_INT",
"atomic": {
"is_optional": true,
"default_value": {
"i64": "31"
},
"lower_bound_enabled": true,
"lower_bound": {
"i64": "2"
},
"lower_bound_inclusive": true,
"upper_bound_enabled": true,
"upper_bound": {
"i64": "1024"
},
"upper_bound_inclusive": true
}
}
],
"inputs": [
{
"name": "train_dataset",
"desc": "Input table.",
"types": [
"sf.table.individual"
],
"attrs": [
{
"name": "ids",
"desc": "Id columns will not be trained."
},
{
"name": "label",
"desc": "Label column.",
"col_min_cnt_inclusive": "1",
"col_max_cnt_inclusive": "1"
}
]
}
],
"outputs": [
{
"name": "output_model",
"desc": "Output model.",
"types": [
"sf.model.lgbm"
]
}
]
}
```
2 changes: 1 addition & 1 deletion docs/architecture/index.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
核心功能
架构设计
========================
想了解TrustedFlow原理和功能,欢迎阅读下列文章!

Expand Down
Loading

0 comments on commit 0215a9a

Please sign in to comment.