在svm中,训练是一个十分重要的步骤,下面我们来看看svm的train部分。
在libsvm中的svm_train中分别有回归和分类两部分,我只对其中分类做介绍。
分类的步骤如下:
- 统计类别总数,同时记录类别的标号,统计每个类的样本数目
- 将属于相同类的样本分组,连续存放
- 计算权重C
- 训练n(n-1)/2 个模型
- 初始化nozero数组,便于统计SV
- //初始化概率数组
- 训练过程中,需要重建子数据集,样本的特征不变,但样本的类别要改为+1/-1
- //如有必要,先调用svm_binary_svc_probability
- 训练子数据集svm_train_one
- 统计一下nozero,如果nozero已经是真,就不变,否则改为真
- 输出模型
- 主要是填充svm_model
- 清除内存
函数中调用过程如下:
svm_train-->svm_train_one-->solve_c_svc(for example)-->s.Solve
- //
- // Interface functions
- //重点函数:svm训练函数
- //根据选择的算法,来组织参加训练的分样本,以及进行训练结果的保存。其中会对样本进行初步的统计。
- svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
- {
- svm_model *model = Malloc(svm_model,1);//#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
- model->param = *param;
- model->free_sv = 0; // XXX
- if(param->svm_type == ONE_CLASS ||
- param->svm_type == EPSILON_SVR ||
- param->svm_type == NU_SVR)
- {
- // regression or one-class-svm
- model->nr_class = 2;
- model->label = NULL;
- model->nSV = NULL;
- model->probA = NULL; model->probB = NULL;
- model->sv_coef = Malloc(double *,1);
- if(param->probability &&
- (param->svm_type == EPSILON_SVR ||
- param->svm_type == NU_SVR))
- {
- model->probA = Malloc(double,1);
- model->probA[0] = svm_svr_probability(prob,param);
- }
- decision_function f = svm_train_one(prob,param,0,0);
- model->rho = Malloc(double,1);
- model->rho[0] = f.rho;
- int nSV = 0;
- int i;
- for(i=0;i<prob->l;i++)
- if(fabs(f.alpha[i]) > 0) ++nSV;
- model->l = nSV;
- model->SV = Malloc(svm_node *,nSV);
- model->sv_coef[0] = Malloc(double,nSV);
- model->sv_indices = Malloc(int,nSV);
- int j = 0;
- for(i=0;i<prob->l;i++)
- if(fabs(f.alpha[i]) > 0)
- {
- model->SV[j] = prob->x[i];
- model->sv_coef[0][j] = f.alpha[i];
- model->sv_indices[j] = i+1;
- ++j;
- }
- free(f.alpha);
- }
- else
- {
- // classification
- int l = prob->l;
- int nr_class;
- int *label = NULL;
- int *start = NULL;
- int *count = NULL;
- int *perm = Malloc(int,l);
- // group training data of the same class对训练样本进行处理,同类整合到一起
- svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
- if(nr_class == 1)
- info("WARNING: training data in only one class. See README for details.\n");
- svm_node **x = Malloc(svm_node *,l);
- int i;
- for(i=0;i<l;i++)
- x[i] = prob->x[perm[i]];
- // calculate weighted C
- double *weighted_C = Malloc(double, nr_class);
- for(i=0;i<nr_class;i++)
- weighted_C[i] = param->C;
- for(i=0;i<param->nr_weight;i++)
- {
- int j;
- for(j=0;j<nr_class;j++)
- if(param->weight_label[i] == label[j])
- break;
- if(j == nr_class)
- fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
- else
- weighted_C[j] *= param->weight[i];
- }
- // train k*(k-1)/2 models
- bool *nonzero = Malloc(bool,l);
- for(i=0;i<l;i++)
- nonzero[i] = false;
- decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
- double *probA=NULL,*probB=NULL;
- if (param->probability)
- {
- probA=Malloc(double,nr_class*(nr_class-1)/2);
- probB=Malloc(double,nr_class*(nr_class-1)/2);
- }
- int p = 0;
- for(i=0;i<nr_class;i++)
- for(int j=i+1;j<nr_class;j++)
- {
- svm_problem sub_prob;
- int si = start[i], sj = start[j];
- int ci = count[i], cj = count[j];
- sub_prob.l = ci+cj;
- sub_prob.x = Malloc(svm_node *,sub_prob.l);
- sub_prob.y = Malloc(double,sub_prob.l);
- int k;
- for(k=0;k<ci;k++)
- {
- sub_prob.x[k] = x[si+k];
- sub_prob.y[k] = +1;
- }
- for(k=0;k<cj;k++)
- {
- sub_prob.x[ci+k] = x[sj+k];
- sub_prob.y[ci+k] = -1;
- }
- if(param->probability)
- svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
- f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
- for(k=0;k<ci;k++)
- if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
- nonzero[si+k] = true;
- for(k=0;k<cj;k++)
- if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
- nonzero[sj+k] = true;
- free(sub_prob.x);
- free(sub_prob.y);
- ++p;
- }
- // build output
- model->nr_class = nr_class;
- model->label = Malloc(int,nr_class);
- for(i=0;i<nr_class;i++)
- model->label[i] = label[i];
- model->rho = Malloc(double,nr_class*(nr_class-1)/2);
- for(i=0;i<nr_class*(nr_class-1)/2;i++)
- model->rho[i] = f[i].rho;
- if(param->probability)
- {
- model->probA = Malloc(double,nr_class*(nr_class-1)/2);
- model->probB = Malloc(double,nr_class*(nr_class-1)/2);
- for(i=0;i<nr_class*(nr_class-1)/2;i++)
- {
- model->probA[i] = probA[i];
- model->probB[i] = probB[i];
- }
- }
- else
- {
- model->probA=NULL;
- model->probB=NULL;
- }
- int total_sv = 0;
- int *nz_count = Malloc(int,nr_class);
- model->nSV = Malloc(int,nr_class);
- for(i=0;i<nr_class;i++)
- {
- int nSV = 0;
- for(int j=0;j<count[i];j++)
- if(nonzero[start[i]+j])
- {
- ++nSV;
- ++total_sv;
- }
- model->nSV[i] = nSV;
- nz_count[i] = nSV;
- }
- info("Total nSV = %d\n",total_sv);
- model->l = total_sv;
- model->SV = Malloc(svm_node *,total_sv);
- model->sv_indices = Malloc(int,total_sv);
- p = 0;
- for(i=0;i<l;i++)
- if(nonzero[i])
- {
- model->SV[p] = x[i];
- model->sv_indices[p++] = perm[i] + 1;
- }
- int *nz_start = Malloc(int,nr_class);
- nz_start[0] = 0;
- for(i=1;i<nr_class;i++)
- nz_start[i] = nz_start[i-1]+nz_count[i-1];
- model->sv_coef = Malloc(double *,nr_class-1);
- for(i=0;i<nr_class-1;i++)
- model->sv_coef[i] = Malloc(double,total_sv);
- p = 0;
- for(i=0;i<nr_class;i++)
- for(int j=i+1;j<nr_class;j++)
- {
- // classifier (i,j): coefficients with
- // i are in sv_coef[j-1][nz_start[i]...],
- // j are in sv_coef[i][nz_start[j]...]
- int si = start[i];
- int sj = start[j];
- int ci = count[i];
- int cj = count[j];
- int q = nz_start[i];
- int k;
- for(k=0;k<ci;k++)
- if(nonzero[si+k])
- model->sv_coef[j-1][q++] = f[p].alpha[k];
- q = nz_start[j];
- for(k=0;k<cj;k++)
- if(nonzero[sj+k])
- model->sv_coef[i][q++] = f[p].alpha[ci+k];
- ++p;
- }
- free(label);
- free(probA);
- free(probB);
- free(count);
- free(perm);
- free(start);
- free(x);
- free(weighted_C);
- free(nonzero);
- for(i=0;i<nr_class*(nr_class-1)/2;i++)
- free(f[i].alpha);
- free(f);
- free(nz_count);
- free(nz_start);
- }
- return model;
- }