1

LIBBLE · May 29, 2016 · a1864c9 · a1864c9
1 parent 8dc9005
commit a1864c9
Show file tree

Hide file tree

Showing 13 changed files with 56 additions and 33 deletions.
diff --git a/src/main/scala/classification/LogisticRegression.scala b/src/main/scala/classification/LogisticRegression.scala
@@ -7,6 +7,7 @@ package libble.classification
 import libble.generalizedLinear.{GeneralizedLinearModel, L2Reg, LogisticLoss}
 
 /**
+  * This class is the model of LogisticRegression with default regularization L2Reg.
   *
   * @param stepSize
   * @param regParam
@@ -22,6 +23,10 @@ class LogisticRegression(stepSize: Double,
 
   setLossFunc(new LogisticLoss())
   setRegularizer(new L2Reg())
+
+  /**
+    * default threshold is 0.5.
+    */
   setThreshold(0.5)
 
 

diff --git a/src/main/scala/classification/SVM.scala b/src/main/scala/classification/SVM.scala
@@ -7,6 +7,7 @@ package libble.classification
 import libble.generalizedLinear.{GeneralizedLinearModel, HingeLoss, L2Reg}
 
 /**
+  *This class is the model of SVM with default regularization L2Reg.
   *
   * @param stepSize
   * @param regParam
@@ -21,6 +22,10 @@ class SVM(stepSize: Double,
           partsNum: Int) extends GeneralizedLinearModel(stepSize, regParam, factor, iters, partsNum){
   setLossFunc(new HingeLoss)
   setRegularizer(new L2Reg)
+
+  /**
+    * default threshold is 0.0.
+    */
   setThreshold(0.0)
 
 }
diff --git a/src/main/scala/context/implicits.scala b/src/main/scala/context/implicits.scala
@@ -9,15 +9,17 @@ import libble.linalg.{DenseVector, SparseVector}
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 
-
+/**
+  * Here we define the implicit convert function.
+  */
 object implicits {
   implicit def sc2LibContext(sc: SparkContext) = new LibContext(sc)
 
   implicit def RDD2LibRDD(data: RDD[Instance]) = new libbleRDD(data)
 }
 
 /**
-  * This class includes the methods of load libbleFILE from the file system
+  * This class includes the methods of load libbleFILE from the file system.
   *
   * @param sc
   */
@@ -52,7 +54,7 @@ class LibContext(val sc: SparkContext) {
         val temp = item.split(":")
         (temp.head.toInt - 1, temp.last.toDouble)
       }.unzip
-      (label, term._1.toArray,term._2.toArray)
+      (label, term._1.toArray, term._2.toArray)
     }.cache()
     val d = terms.map(_._2.lastOption.getOrElse(0))
       .reduce(math.max) + 1
@@ -94,7 +96,7 @@ class LibContext(val sc: SparkContext) {
             val temp = item.split(':')
             (temp.head.toInt - 1, temp.last.toDouble)
           }.unzip
-          (label, term._1.toArray,term._2.toArray)
+          (label, term._1.toArray, term._2.toArray)
         }.cache()
 
         val d = terms.map(_._2.lastOption.getOrElse(0)).reduce(math.max) + 1
@@ -118,6 +120,7 @@ class LibContext(val sc: SparkContext) {
 
 /**
   * With this class,we add save data method to the RDD[Instance].
+  *
   * @param data
   */
 class libbleRDD(val data: RDD[Instance]) {

diff --git a/src/main/scala/dimReduction/PCA.scala b/src/main/scala/dimReduction/PCA.scala
@@ -6,7 +6,6 @@ package libble.dimReduction
 
 import scala.collection.mutable.ArrayBuffer
 import java.util.Calendar
-
 import libble.context.Instance
 import libble.linalg.{DenseVector, Vector}
 import libble.linalg.implicits._
@@ -31,11 +30,6 @@ class PCA(var K: Int,
       s"data dimension size is ${training.first().features.size}, it must be greater than K=$K")
 
     val centerData = centralize(training)
-    // centerData.collect().map(x => println(x.features))
-    // val statTraining = centerData.map(x => Vectors.dense(x.features.toArray))
-    // val summary: MultivariateStatisticalSummary = Statistics.colStats(statTraining)
-    // for (i <- 1 to 3) println(summary.mean.apply(i) + ", " + summary.variance.apply(i))
-    //    println(s"center data size: ${centerData.count()}")
 
     val st = Calendar.getInstance().getTimeInMillis
     val m = new GLS_Matrix_Batch(stepSize, 0.0, 0.0, iteration, parts, batchSize, K)

diff --git a/src/main/scala/examples/testLR.scala b/src/main/scala/examples/testLR.scala
@@ -9,7 +9,9 @@ import org.apache.log4j.{Level, Logger}
 import org.apache.spark.{SparkConf, SparkContext}
 
 import scala.collection.mutable
-
+/***
+  * Here is the example of using LogisticRegression.
+  */
 object testLR {
   def main(args: Array[String]) {
 

diff --git a/src/main/scala/examples/testPCA.scala b/src/main/scala/examples/testPCA.scala
@@ -7,10 +7,11 @@ package libble.examples
 import libble.dimReduction.PCA
 import org.apache.log4j.{Level, Logger}
 import org.apache.spark.{SparkConf, SparkContext}
-
 import scala.collection.mutable
 
-
+/**
+  * This is the example of using PCA.
+  */
 object testPCA {
   def main(args: Array[String]): Unit = {
     Logger.getLogger("org.apache.spark").setLevel(Level.WARN)

diff --git a/src/main/scala/examples/testScaller.scala b/src/main/scala/examples/testScaller.scala
@@ -7,6 +7,9 @@ package libble.examples
 import libble.features.Scaller
 import org.apache.spark.{SparkConf, SparkContext}
 
+/**
+  * This is the example of using SVD.
+  */
 object testScaller {
   def main(args: Array[String]) {
     System.setProperty("hadoop.home.dir", "D:\\Program Files\\hadoop-2.6.0")
@@ -17,25 +20,22 @@ object testScaller {
 
 
     import libble.context.implicits.sc2LibContext
-    val training=sc.loadlibbleFile("sparse.data")
+    val training = sc.loadlibbleFile("sparse.data")
 
-    val scaller=new Scaller(true,true)
-    val features= training.map(_.features)
+    val scaller = new Scaller(true, true)
+    val features = training.map(_.features)
     scaller.computeFactor(features)
 
 
 
-    println("center:"+scaller.getCenter.get)
-    println("std:"+scaller.getStd.get)
+    println("center:" + scaller.getCenter.get)
+    println("std:" + scaller.getStd.get)
 
 
-    val result=scaller.transform(features).collect()
+    val result = scaller.transform(features).collect()
     println(result.mkString(", "))
 
 
-
-
-
   }
 
 }
diff --git a/src/main/scala/generalizedLinear/GeneralizedLinearModel.scala b/src/main/scala/generalizedLinear/GeneralizedLinearModel.scala
@@ -14,6 +14,7 @@ import scala.math._
 import scala.util.Random
 
 /**
+  * This class is the model of Generalized Linear Algorithms with default lossfunc LogisticLoss and default regularization L2Reg.
   *
   * @param stepSize
   * @param regParam
@@ -41,7 +42,6 @@ class GeneralizedLinearModel(var stepSize: Double,
   private[this] var numPredictor: Int = 1
 
 
-
   var threshold: Option[Double] = Some(0.5)
 
   def setThreshold(value: Double): this.type = {
@@ -157,7 +157,7 @@ class GeneralizedLinearModel(var stepSize: Double,
     */
   def setClassNum(classNum: Int): this.type = {
     numPredictor = classNum - 1
-    lossfunc=new LogisticLoss(classNum)
+    lossfunc = new LogisticLoss(classNum)
     this
   }
 
@@ -305,7 +305,7 @@ class GeneralizedLinearModel(var stepSize: Double,
     * @param v
     * @return Double
     */
- def predict(v: Vector): Double = {
+  def predict(v: Vector): Double = {
     if (threshold == None) {
       predictT(v)
     }
@@ -326,7 +326,6 @@ class GeneralizedLinearModel(var stepSize: Double,
   }
 
 
-
   private def predictT(v: Vector): Double = weights match {
 
     case Some(w) => {
@@ -350,5 +349,4 @@ class GeneralizedLinearModel(var stepSize: Double,
   }
 
 
-
 }
diff --git a/src/main/scala/linalg/Vector.scala b/src/main/scala/linalg/Vector.scala
@@ -7,7 +7,7 @@ package libble.linalg
 import java.util
 
 /**
-  *
+  * This is the trait of Vector.
   */
 sealed trait Vector extends Serializable {
 
@@ -58,7 +58,7 @@ sealed trait Vector extends Serializable {
 }
 
 /**
-  *
+  * class of Dense Vector.
   * @param values
   */
 case class DenseVector(val values: Array[Double]) extends Vector {
@@ -171,7 +171,7 @@ case class DenseVector(val values: Array[Double]) extends Vector {
 }
 
 /**
-  *
+  * Class of the Sparse Vector.
   * @param indices
   * @param values
   * @param dim

diff --git a/src/main/scala/linalg/VectorsOp.scala b/src/main/scala/linalg/VectorsOp.scala
@@ -7,7 +7,7 @@ package libble.linalg
 import scala.collection.mutable.ArrayBuffer
 
 /**
-  *
+  * Using with the implicit method, add fuctions to the Vectors.
   * @param vec
   */
 class VectorsOp(val vec: Vector) {

diff --git a/src/main/scala/linalg/package.scala b/src/main/scala/linalg/package.scala
@@ -5,7 +5,7 @@
 package libble.linalg
 
 /**
-  *
+  * Here define the implicit method for converting the Vector to VectorsOp.
   */
 package object implicits {
   implicit def vectorAdOps(vec: Vector) = new VectorsOp(vec)

diff --git a/src/main/scala/matrixFactorization/SVD.scala b/src/main/scala/matrixFactorization/SVD.scala
@@ -16,6 +16,17 @@ import org.apache.spark.Logging
 import org.apache.spark.rdd.RDD
 
 
+/**
+  * This is the model of SVD.
+  *
+  * @param K
+  * @param bound
+  * @param stepSize
+  * @param iteration
+  * @param parts
+  * @param batchSize
+  */
+
 class SVD(var K: Int,
           var bound: Double,
           var stepSize: Double,

diff --git a/src/main/scala/regression/LinearRegression.scala b/src/main/scala/regression/LinearRegression.scala
@@ -7,7 +7,7 @@ package libble.regression
 import libble.generalizedLinear.{GeneralizedLinearModel, L1Reg, LeastSquareLoss}
 
 /**
-  *
+  * This is the model of LinearRegression with default regularization L1Reg.
   * @param stepSize
   * @param regParam
   * @param factor
@@ -22,6 +22,10 @@ class LinearRegression(stepSize: Double,
 
   setLossFunc(new LeastSquareLoss)
   setRegularizer(new L1Reg())
+
+  /**
+    * output the predict value.
+    */
   clearThreshold