23 ostr =
'#### Configuration ###########################\n' 25 ostr +=
'Grid size = %d\n' % (self.
grid_size)
26 ostr +=
'Dimension = %d\n' % (self.
dim)
27 ostr +=
'Number of clusters = %d\n' % (self.
n_means)
28 ostr +=
'Number of points = %d\n' % (self.
n_points)
29 ostr +=
'##############################################' 36 def init_points(self):
40 def _fill_randoms(self, tlst, count):
41 for _
in range(0, count):
43 for _
in range(0, self.
dim):
44 lst.append(random.randint(0, self.
grid_size - 1))
47 def calc_sq_dist(v1, v2):
49 for (x, y)
in zip(v1, v2):
50 sum_ += (x - y) * (x - y)
54 def load_points(kv, kvi, kvo, i):
56 for (idp, point)
in enumerate(kmeans.points):
60 def calc_cluster((k, v), kvi, kvo, i):
63 min_dst = kmeans.grid_size * kmeans.grid_size
64 for (idm, mean)
in enumerate(kmeans.means):
65 dst = calc_sq_dist(v, mean)
72 def copy_center((k, v), kvi, kvo, i):
78 def update_cluster(kvvec, kvi, kvo):
81 for d
in range(0, kmeans.dim):
84 for d
in range(0, kmeans.dim):
86 avg = [x / (len(kvvec))
for x
in sum_]
87 kvo.add_kv(kvvec[0][0], avg)
92 kmr = kmr4py.KMR(
"world")
97 print 'Number of processes = %d' % (comm.size)
100 kmeans.means = comm.bcast(kmeans.means, root=0)
103 for _
in range(0, kmeans.n_iteration):
104 kvs0 = kmr.emptykvs.map_once(
False, load_points, key=
"integer")
105 kvs1 = kvs0.map(calc_cluster, key=
"integer")
106 kvs2 = kvs1.shuffle()
107 kvs3 = kvs2.reduce(update_cluster, key=
"integer")
108 kvs4 = kvs3.replicate()
109 kvs4.map(copy_center)
112 print 'Cluster coordinates' 113 for m
in kmeans.means:
def _fill_randoms(self, tlst, count)