Skip to content

Commit 3e6983e

Browse files
author
Andrey Oskin
committed
half-baked implementation do not merge
1 parent 6854197 commit 3e6983e

File tree

6 files changed

+401
-52
lines changed

6 files changed

+401
-52
lines changed

src/ParallelKMeans.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ include("kmeans.jl")
1212
include("lloyd.jl")
1313
include("hamerly.jl")
1414
include("elkan.jl")
15+
include("yingyang.jl")
1516
include("mlj_interface.jl")
1617

1718
export kmeans
18-
export Lloyd, Hamerly, Elkan
19+
export Lloyd, Hamerly, Elkan, YingYang
1920

2021
end # module

src/elkan.jl

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -236,21 +236,6 @@ function chunk_update_centroids(::Elkan, containers, centroids, X, r, idx)
236236
end
237237
end
238238

239-
function collect_containers(alg::Elkan, containers, n_threads)
240-
if n_threads == 1
241-
@inbounds containers.centroids_new[end] .= containers.centroids_new[1] ./ containers.centroids_cnt[1]'
242-
else
243-
@inbounds containers.centroids_new[end] .= containers.centroids_new[1]
244-
@inbounds containers.centroids_cnt[end] .= containers.centroids_cnt[1]
245-
@inbounds for i in 2:n_threads
246-
containers.centroids_new[end] .+= containers.centroids_new[i]
247-
containers.centroids_cnt[end] .+= containers.centroids_cnt[i]
248-
end
249-
250-
@inbounds containers.centroids_new[end] .= containers.centroids_new[end] ./ containers.centroids_cnt[end]'
251-
end
252-
end
253-
254239
function calculate_centroids_movement(alg::Elkan, containers, centroids)
255240
p = containers.p
256241
centroids_new = containers.centroids_new[end]
@@ -260,7 +245,6 @@ function calculate_centroids_movement(alg::Elkan, containers, centroids)
260245
end
261246
end
262247

263-
264248
function chunk_update_bounds(alg, containers, centroids, r, idx)
265249
p = containers.p
266250
lb = containers.lb

src/hamerly.jl

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,23 +73,6 @@ function kmeans!(alg::Hamerly, containers, X, k;
7373
return KmeansResult(centroids, containers.labels, Float64[], Int[], Float64[], totalcost, niters, converged)
7474
end
7575

76-
77-
function collect_containers(alg::Hamerly, containers, n_threads)
78-
if n_threads == 1
79-
@inbounds containers.centroids_new[end] .= containers.centroids_new[1] ./ containers.centroids_cnt[1]'
80-
else
81-
@inbounds containers.centroids_new[end] .= containers.centroids_new[1]
82-
@inbounds containers.centroids_cnt[end] .= containers.centroids_cnt[1]
83-
@inbounds for i in 2:n_threads
84-
containers.centroids_new[end] .+= containers.centroids_new[i]
85-
containers.centroids_cnt[end] .+= containers.centroids_cnt[i]
86-
end
87-
88-
@inbounds containers.centroids_new[end] .= containers.centroids_new[end] ./ containers.centroids_cnt[end]'
89-
end
90-
end
91-
92-
9376
function create_containers(alg::Hamerly, k, nrow, ncol, n_threads)
9477
lng = n_threads + 1
9578
centroids_new = Vector{Array{Float64,2}}(undef, lng)
@@ -108,7 +91,7 @@ function create_containers(alg::Hamerly, k, nrow, ncol, n_threads)
10891

10992
labels = zeros(Int, ncol)
11093

111-
# distance that centroid moved
94+
# distance that centroid has moved
11295
p = Vector{Float64}(undef, k)
11396

11497
# distance from the center to the closest other center
@@ -289,9 +272,9 @@ function chunk_update_bounds(alg::Hamerly, containers, r1, r2, pr1, pr2, r, idx)
289272
label = labels[i]
290273
ub[i] += 2*sqrt(abs(ub[i] * p[label])) + p[label]
291274
if r1 == label
292-
lb[i] += pr2 - 2*sqrt(abs(pr2*lb[i]))
275+
lb[i] = lb[i] <= pr2 ? 0.0 : lb[i] + pr2 - 2*sqrt(abs(pr2*lb[i]))
293276
else
294-
lb[i] += pr1 - 2*sqrt(abs(pr1*lb[i]))
277+
lb[i] = lb[i] <= pr1 ? 0.0 : lb[i] + pr1 - 2*sqrt(abs(pr1*lb[i]))
295278
end
296279
end
297280
end

src/kmeans.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,19 @@ function kmeans(alg, design_matrix, k;
159159
k_init = k_init, max_iters = max_iters, tol = tol,
160160
verbose = verbose, init = init)
161161
end
162+
163+
164+
function collect_containers(::AbstractKMeansAlg, containers, n_threads)
165+
if n_threads == 1
166+
@inbounds containers.centroids_new[end] .= containers.centroids_new[1] ./ containers.centroids_cnt[1]'
167+
else
168+
@inbounds containers.centroids_new[end] .= containers.centroids_new[1]
169+
@inbounds containers.centroids_cnt[end] .= containers.centroids_cnt[1]
170+
@inbounds for i in 2:n_threads
171+
containers.centroids_new[end] .+= containers.centroids_new[i]
172+
containers.centroids_cnt[end] .+= containers.centroids_cnt[i]
173+
end
174+
175+
@inbounds containers.centroids_new[end] .= containers.centroids_new[end] ./ containers.centroids_cnt[end]'
176+
end
177+
end

src/lloyd.jl

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -124,18 +124,3 @@ function chunk_update_centroids(::Lloyd, containers, centroids, X, r, idx)
124124

125125
containers.J[idx] = J
126126
end
127-
128-
function collect_containers(alg::Lloyd, containers, centroids, n_threads)
129-
if n_threads == 1
130-
@inbounds centroids .= containers.centroids_new[1] ./ containers.centroids_cnt[1]'
131-
else
132-
@inbounds containers.centroids_new[end] .= containers.centroids_new[1]
133-
@inbounds containers.centroids_cnt[end] .= containers.centroids_cnt[1]
134-
@inbounds for i in 2:n_threads
135-
containers.centroids_new[end] .+= containers.centroids_new[i]
136-
containers.centroids_cnt[end] .+= containers.centroids_cnt[i]
137-
end
138-
139-
@inbounds centroids .= containers.centroids_new[end] ./ containers.centroids_cnt[end]'
140-
end
141-
end

0 commit comments

Comments
 (0)