support_vector_machine

svm.jl
Sequential minimal optimization for support vector machine.
Support vector machine for binary and multi class (one vs one strategy) classification.
Only radial basis function kernel for now.

Goal is to implement from scratch.

If you aren't expert in lagrangian arithmetics (I'm not), this is a great ressource to get started with SMO.
http://cs229.stanford.edu/materials/smo.pdf

How to use :

1. Binary classification

# β = binaryβ(x::Array{Float64,2}, y::Vector{Int64}, classpos::Int64, classneg::Int64, splitα::Float64, mi::Int64, mp::Int64, k::String, c::Float64, γ::Float64)

# x = training features
# y = training labels
# classpos = used for multi class. Use 1
# classneg = used for multi class. Use -1
# splitα =  Ntraining / (Ntraining + Ntesting) -> use to set how many training samples you want to feed the model with
# mi = max iteration for smo
# mp = max smo loops without change to tolerance
# k = kernel type. For now, only "rbf" (radial basis function kernel)
# c = regularization parameter
# γ = RBF kernel parameter

binaryModel = binaryβ(xx, yy, 1, -1, 0.5, 1000, 1000, "rbf", 0.6, 0.1)

# try the model on new feature samples ( myfeatures )
predictions = predict(myfeatures, binaryModel.β)

Using some dataset from kaggle :
https://www.kaggle.com/ronitf/heart-disease-uci

pwd()
cd()
cd("your file path")
kaggle_file = "heart.csv"


function loadNpreprocess()
    data = CSV.File(kaggle_file) |> DataFrame
    x = convert(Array{Float64,2}, data[:,1:13])
    y = vec(data[:,14])
    replace!(y, 0 => -1)
    return x, y
end

# get features and labels
xx, yy = loadNpreprocess()

# try the classifier with and without normalisation and see results
#
# xx[:,1] = normalise(xx[:,1])
# xx[:,4] = normalise(xx[:,4])
# xx[:,5] = normalise(xx[:,5])
# xx[:,8] = normalise(xx[:,8])
#

# feed features and labels into binary classifier
binaryModel = binaryβ(xx, yy, 1, -1, 0.5, 1000, 1000, "rbf", 0.6, 0.1)
print(binaryModel.accuracy)

2. Multi class classification

iris = dataset("datasets", "iris")

mapping = multiClassPreprocess(iris) # some pre processing to data
y = vec(convert(Array, mapping[:,1])) # get labels
x = convert(Array, mapping[:,2:5]) # get features
x_train, y_train, x_test, y_test = splitTestTrain(x, y, 0.5) # split first time for fresh unseen data

models, labels = βbattleground(x_train, y_train, 0.9, 1000, 1000, "rbf", 0.6, 0.001) # feed xtrain into one vs one method
print(labels) # check our labels
predictions = kaloskagathing(models, x_test, labels) # check our prediction on xtest
accu = computeAccuracy(predictions, y_test)

3. Grid search

Find best hyper params (c, γ) combination from range.

# xx are our features, yy our labels

binaryModel = binaryβ(xx, yy, 1, -1, 0.5, 1000, 1000, "rbf", 0.6, 0.1)
models = gridSearch(xx, yy, 0.5, 1000, 1000, "rbf")

Classification results on random clusterised data points :

White dots = training data
Blue markers = testing data failure
Orange markers = testing data success

How it works :

1. Sequential minimal optimization
http://cs229.stanford.edu/materials/smo.pdf

function smo!(β::SVM)
    m = size(β.x, 1)
    for i = 1:m
        # does rbf betzeen all rows of features into columns of K
        β.k[:,i] = kernel(β.x, β.x[i,:], β)
    end

    passes = 0
    while passes < β.max_passes

        Δα = 0

        for i = 1:m

            Ei = computeError(i, β)

            if (β.y[i] * Ei < -β.tol && β.α[i] < β.c) || (β.y[i] * Ei > β.tol && β.α[i] > 0)

                j = rand(1:m)
                if j == i
                    j = rand(1:m)
                end

                L, H = computeBounds(i, j, β)
                L == H && continue

                η = 2.0 * β.k[i, j] - β.k[i, i] - β.k[j, j]
                η >= 0 && continue

                Ej = computeError(j, β)

                α_io, α_jo = β.α[i], β.α[j]

                β.α[j] -= (β.y[j] * (Ei - Ej)) / η
                β.α[j] = clamp(β.α[j], L, H)

                abs(β.α[j] - α_jo) < β.tol && continue

                β.α[i] = β.α[i] + β.y[i] * β.y[j] * (α_jo - β.α[j])

                b1 = β.b - Ei - β.y[i] * (β.α[i] - α_jo) * β.k[i, i] -
                     β.y[j] * (β.α[j] - α_jo) * β.k[i, j]
                b2 = β.b - Ej - β.y[j] * (β.α[j] - α_jo) * β.k[j, j] -
                     β.y[i] * (β.α[i] - α_io) * β.k[i, j]

                if 0 < β.α[i] < β.c
                    β.b = b1
                elseif 0 < β.α[j] < β.c
                    β.b = b2
                else
                    β.b = 0.5 * (b1 + b2)
                end

                Δα += 1

            end

            if Δα == 0
                passes += 1
            else
                passes = 0
            end

        end

        β.sv_pos = findall(β.α .> 0)

    end
end

2. One Vs One strategy for multi classification

Converts the multi classification task into several binary classifications ones.

There are n * (n - 1) / 2 binary matchups for n labels.

function βbattleground(x::Array{Float64,2}, y::Vector{Int64}, splitα::Float64, mi::Int64, mp::Int64, k::String, c::Float64, γ::Float64)

    n = length(unique(y))
    labels = [i for i in unique(y)]
    binaries = combinations(1:n, 2)
    #nmodels = trunc(Int, n * (n-1) / 2) == length(binaryMaps)

    βs = Array{BinaryModel, 1}(undef, length(binaries))

    idx = 1
    for binary in binaries
        c1 = binary[1]
        c2 = binary[2]

        sidx = findall(x->x ∈ [c1, c2], y)

        xi = x[sidx,:]
        d = Dict([ labels[c1] => 1, labels[c2] => -1])
        yi = getindex.(Ref(d), y[sidx])

        βs[idx] = binaryβ(xi, yi, c1, c2, splitα, mi, mp, k, c, γ)

        idx += 1
    end

    return βs, labels
end

3. Multi class voting following One Vs One strategy:

The class with the most votes wins.

'Kalos kagathos or kalokagathos (Ancient Greek: καλὸς κἀγαθός [kalòs kaːɡatʰós]),
of which kalokagathia (καλοκαγαθία) is the derived noun, is a phrase used
by classical Greek writers to describe an ideal of gentlemanly personal conduct,
especially in a military context.'

https://en.wikipedia.org/wiki/Kalos_kagathos

function kaloskagathing(βs::Array{BinaryModel, 1}, x_test::Array{Float64, 2}, classIdx::Array{Int64, 1})

    kaloskagathos = Array{Int64, 2}(undef, size(x_test, 1), length(βs))

    for i in 1:length(βs)
        p = predict(x_test, βs[i].β)

        replace!(p, 1 => βs[i].classpos)
        replace!(p, -1 => βs[i].classneg)
        kaloskagathos[:,i] = p
    end

    kaloskagathos = mapslices(x->countmemb(x), kaloskagathos, dims=2)

    kaloskagathos = findmax.(kaloskagathos)

    kaloskagathos = [(kaloskagathos...)...]

    i = 2:2:length(kaloskagathos)

    return kaloskagathos[i]
end

Name		Name	Last commit message	Last commit date
Latest commit History 47 Commits
README.md		README.md
svm.jl		svm.jl
testing_errors.png		testing_errors.png

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

support_vector_machine

About

Releases

Packages

Languages

k0m0r3bi/support_vector_machine

Folders and files

Latest commit

History

Repository files navigation

support_vector_machine

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages