Skip to content

Commit

Permalink
1
Browse files Browse the repository at this point in the history
  • Loading branch information
Aplysia committed May 29, 2016
1 parent 8dc9005 commit a1864c9
Show file tree
Hide file tree
Showing 13 changed files with 56 additions and 33 deletions.
5 changes: 5 additions & 0 deletions src/main/scala/classification/LogisticRegression.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package libble.classification
import libble.generalizedLinear.{GeneralizedLinearModel, L2Reg, LogisticLoss}

/**
* This class is the model of LogisticRegression with default regularization L2Reg.
*
* @param stepSize
* @param regParam
Expand All @@ -22,6 +23,10 @@ class LogisticRegression(stepSize: Double,

setLossFunc(new LogisticLoss())
setRegularizer(new L2Reg())

/**
* default threshold is 0.5.
*/
setThreshold(0.5)


Expand Down
5 changes: 5 additions & 0 deletions src/main/scala/classification/SVM.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package libble.classification
import libble.generalizedLinear.{GeneralizedLinearModel, HingeLoss, L2Reg}

/**
*This class is the model of SVM with default regularization L2Reg.
*
* @param stepSize
* @param regParam
Expand All @@ -21,6 +22,10 @@ class SVM(stepSize: Double,
partsNum: Int) extends GeneralizedLinearModel(stepSize, regParam, factor, iters, partsNum){
setLossFunc(new HingeLoss)
setRegularizer(new L2Reg)

/**
* default threshold is 0.0.
*/
setThreshold(0.0)

}
11 changes: 7 additions & 4 deletions src/main/scala/context/implicits.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@ import libble.linalg.{DenseVector, SparseVector}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD


/**
* Here we define the implicit convert function.
*/
object implicits {
implicit def sc2LibContext(sc: SparkContext) = new LibContext(sc)

implicit def RDD2LibRDD(data: RDD[Instance]) = new libbleRDD(data)
}

/**
* This class includes the methods of load libbleFILE from the file system
* This class includes the methods of load libbleFILE from the file system.
*
* @param sc
*/
Expand Down Expand Up @@ -52,7 +54,7 @@ class LibContext(val sc: SparkContext) {
val temp = item.split(":")
(temp.head.toInt - 1, temp.last.toDouble)
}.unzip
(label, term._1.toArray,term._2.toArray)
(label, term._1.toArray, term._2.toArray)
}.cache()
val d = terms.map(_._2.lastOption.getOrElse(0))
.reduce(math.max) + 1
Expand Down Expand Up @@ -94,7 +96,7 @@ class LibContext(val sc: SparkContext) {
val temp = item.split(':')
(temp.head.toInt - 1, temp.last.toDouble)
}.unzip
(label, term._1.toArray,term._2.toArray)
(label, term._1.toArray, term._2.toArray)
}.cache()

val d = terms.map(_._2.lastOption.getOrElse(0)).reduce(math.max) + 1
Expand All @@ -118,6 +120,7 @@ class LibContext(val sc: SparkContext) {

/**
* With this class,we add save data method to the RDD[Instance].
*
* @param data
*/
class libbleRDD(val data: RDD[Instance]) {
Expand Down
6 changes: 0 additions & 6 deletions src/main/scala/dimReduction/PCA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package libble.dimReduction

import scala.collection.mutable.ArrayBuffer
import java.util.Calendar

import libble.context.Instance
import libble.linalg.{DenseVector, Vector}
import libble.linalg.implicits._
Expand All @@ -31,11 +30,6 @@ class PCA(var K: Int,
s"data dimension size is ${training.first().features.size}, it must be greater than K=$K")

val centerData = centralize(training)
// centerData.collect().map(x => println(x.features))
// val statTraining = centerData.map(x => Vectors.dense(x.features.toArray))
// val summary: MultivariateStatisticalSummary = Statistics.colStats(statTraining)
// for (i <- 1 to 3) println(summary.mean.apply(i) + ", " + summary.variance.apply(i))
// println(s"center data size: ${centerData.count()}")

val st = Calendar.getInstance().getTimeInMillis
val m = new GLS_Matrix_Batch(stepSize, 0.0, 0.0, iteration, parts, batchSize, K)
Expand Down
4 changes: 3 additions & 1 deletion src/main/scala/examples/testLR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable

/***
* Here is the example of using LogisticRegression.
*/
object testLR {
def main(args: Array[String]) {

Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/examples/testPCA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ package libble.examples
import libble.dimReduction.PCA
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}

import scala.collection.mutable


/**
* This is the example of using PCA.
*/
object testPCA {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Expand Down
18 changes: 9 additions & 9 deletions src/main/scala/examples/testScaller.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ package libble.examples
import libble.features.Scaller
import org.apache.spark.{SparkConf, SparkContext}

/**
* This is the example of using SVD.
*/
object testScaller {
def main(args: Array[String]) {
System.setProperty("hadoop.home.dir", "D:\\Program Files\\hadoop-2.6.0")
Expand All @@ -17,25 +20,22 @@ object testScaller {


import libble.context.implicits.sc2LibContext
val training=sc.loadlibbleFile("sparse.data")
val training = sc.loadlibbleFile("sparse.data")

val scaller=new Scaller(true,true)
val features= training.map(_.features)
val scaller = new Scaller(true, true)
val features = training.map(_.features)
scaller.computeFactor(features)



println("center:"+scaller.getCenter.get)
println("std:"+scaller.getStd.get)
println("center:" + scaller.getCenter.get)
println("std:" + scaller.getStd.get)


val result=scaller.transform(features).collect()
val result = scaller.transform(features).collect()
println(result.mkString(", "))





}

}
8 changes: 3 additions & 5 deletions src/main/scala/generalizedLinear/GeneralizedLinearModel.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import scala.math._
import scala.util.Random

/**
* This class is the model of Generalized Linear Algorithms with default lossfunc LogisticLoss and default regularization L2Reg.
*
* @param stepSize
* @param regParam
Expand Down Expand Up @@ -41,7 +42,6 @@ class GeneralizedLinearModel(var stepSize: Double,
private[this] var numPredictor: Int = 1



var threshold: Option[Double] = Some(0.5)

def setThreshold(value: Double): this.type = {
Expand Down Expand Up @@ -157,7 +157,7 @@ class GeneralizedLinearModel(var stepSize: Double,
*/
def setClassNum(classNum: Int): this.type = {
numPredictor = classNum - 1
lossfunc=new LogisticLoss(classNum)
lossfunc = new LogisticLoss(classNum)
this
}

Expand Down Expand Up @@ -305,7 +305,7 @@ class GeneralizedLinearModel(var stepSize: Double,
* @param v
* @return Double
*/
def predict(v: Vector): Double = {
def predict(v: Vector): Double = {
if (threshold == None) {
predictT(v)
}
Expand All @@ -326,7 +326,6 @@ class GeneralizedLinearModel(var stepSize: Double,
}



private def predictT(v: Vector): Double = weights match {

case Some(w) => {
Expand All @@ -350,5 +349,4 @@ class GeneralizedLinearModel(var stepSize: Double,
}



}
6 changes: 3 additions & 3 deletions src/main/scala/linalg/Vector.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ package libble.linalg
import java.util

/**
*
* This is the trait of Vector.
*/
sealed trait Vector extends Serializable {

Expand Down Expand Up @@ -58,7 +58,7 @@ sealed trait Vector extends Serializable {
}

/**
*
* class of Dense Vector.
* @param values
*/
case class DenseVector(val values: Array[Double]) extends Vector {
Expand Down Expand Up @@ -171,7 +171,7 @@ case class DenseVector(val values: Array[Double]) extends Vector {
}

/**
*
* Class of the Sparse Vector.
* @param indices
* @param values
* @param dim
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/linalg/VectorsOp.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ package libble.linalg
import scala.collection.mutable.ArrayBuffer

/**
*
* Using with the implicit method, add fuctions to the Vectors.
* @param vec
*/
class VectorsOp(val vec: Vector) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/linalg/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
package libble.linalg

/**
*
* Here define the implicit method for converting the Vector to VectorsOp.
*/
package object implicits {
implicit def vectorAdOps(vec: Vector) = new VectorsOp(vec)
Expand Down
11 changes: 11 additions & 0 deletions src/main/scala/matrixFactorization/SVD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ import org.apache.spark.Logging
import org.apache.spark.rdd.RDD


/**
* This is the model of SVD.
*
* @param K
* @param bound
* @param stepSize
* @param iteration
* @param parts
* @param batchSize
*/

class SVD(var K: Int,
var bound: Double,
var stepSize: Double,
Expand Down
6 changes: 5 additions & 1 deletion src/main/scala/regression/LinearRegression.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ package libble.regression
import libble.generalizedLinear.{GeneralizedLinearModel, L1Reg, LeastSquareLoss}

/**
*
* This is the model of LinearRegression with default regularization L1Reg.
* @param stepSize
* @param regParam
* @param factor
Expand All @@ -22,6 +22,10 @@ class LinearRegression(stepSize: Double,

setLossFunc(new LeastSquareLoss)
setRegularizer(new L1Reg())

/**
* output the predict value.
*/
clearThreshold


Expand Down

0 comments on commit a1864c9

Please sign in to comment.