KMR
kmrisort.c
Go to the documentation of this file.
1 /* kmrisort.c (2014-02-04) */
2 /* Copyright (C) 2012-2016 RIKEN AICS */
3 /* Copyright (c) 1992, 1993 The Regents of the University of California. */
4 
5 /* $NetBSD: qsort.c,v 1.15 2008/03/11 18:04:59 rmind Exp $ */
6 
7 /*-
8  * Copyright (c) 1992, 1993
9  * The Regents of the University of California. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  * may be used to endorse or promote products derived from this software
21  * without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /** \file kmrisort.c Sorter on Long Integers. This sorting is
37  NOT-STABLE. This file is a copy of "qsort.c" from NetBSD-5.1, and
38  copyrighted by The Regents of the University of California. It is
39  deoptimized by removing the code to gather equal keys, due to
40  not-fast 3-way comparison (that returns -1/0/1) on K. */
41 
42 #include <mpi.h>
43 #include <sys/types.h>
44 #include <assert.h>
45 #include <errno.h>
46 #ifdef _OPENMP
47 #include <omp.h>
48 #endif
49 #include "kmr.h"
50 #include "kmrimpl.h"
51 
52 #define MAX(a,b) (((a)>(b))?(a):(b))
53 
54 #if defined(_OPENMP) && (_OPENMP >= 201106) && !defined(KMROMP30)
55 /* OK. */
56 #elif defined(_OPENMP)
57 #pragma message ("Compiler seems not support OMP 3.1")
58 #else
59 #pragma message ("Compiler seems not support OMP")
60 #endif
61 
62 /* kmr_isort only looks at the first entry (long v). It should match
63  the type of struct kmr_sort_record. */
64 
66  long v;
67  char _[1];
68 };
69 
70 static inline ptrdiff_t
71 kmr_min(ptrdiff_t a, intptr_t b) {return ((a < b) ? a : b);}
72 
73 /* It returns 0 for long, 1 for long-aligned, or 2 for unaligned. */
74 
75 static inline int
76 kmr_swaptype(void *a, size_t es)
77 {
78  return ((((size_t)(intptr_t)a % sizeof(long) != 0)
79  || (es % sizeof(long) != 0))
80  ? 2 : (es == sizeof(long) ? 0 : 1));
81 }
82 
83 static inline void
84 kmr_swapbytype(void *a, void *b, size_t sz, int swaptype)
85 {
86  if (swaptype <= 1) {
87  long *p = a;
88  long *q = b;
89  size_t i = (sz / sizeof(long));
90  do {
91  long t = *p; *p = *q; *q = t;
92  p++; q++;
93  } while (--i > 0);
94  } else {
95  char *p = a;
96  char *q = b;
97  size_t i = (sz / sizeof(char));
98  do {
99  char t = *p; *p = *q; *q = t;
100  p++; q++;
101  } while (--i > 0);
102  }
103 }
104 
105 static inline void
106 kmr_vecswap(void *a, void *b, size_t sz, int swaptype)
107 {
108  if (sz > 0) {
109  kmr_swapbytype(a, b, sz, swaptype);
110  }
111 }
112 
113 static inline void
114 kmr_swap(void *a, void *b, size_t ES, int swaptype)
115 {
116  if (swaptype == 0) {
117  long *p = a;
118  long *q = b;
119  long t = *p; *p = *q; *q = t;
120  } else {
121  kmr_swapbytype(a, b, ES, swaptype);
122  }
123 }
124 
125 /* Compares in two-way; it never returns 0. This variation returns
126  POSITIVE on equals. */
127 
128 static inline int
129 KMR_CMP2P(void *A, void *B)
130 {
131  struct kmr_isort_entry *a = A;
132  struct kmr_isort_entry *b = B;
133  /*return ((a->v == b->v) ? 0 : ((a->v < b->v) ? -1 : 1));*/
134  return ((a->v < b->v) ? -1 : 1);
135 }
136 
137 /* Compares in two-way; it never returns 0. This variation returns
138  NEGATIVE on equals. */
139 
140 static inline int
141 KMR_CMP2N(void *A, void *B)
142 {
143  struct kmr_isort_entry *a = A;
144  struct kmr_isort_entry *b = B;
145  /*return ((a->v == b->v) ? 0 : ((a->v < b->v) ? -1 : 1));*/
146  return ((a->v <= b->v) ? -1 : 1);
147 }
148 
149 static inline void *
150 kmr_meddleof(void *a, void *b, void *c)
151 {
152  return (KMR_CMP2P(a, b) < 0
153  ? (KMR_CMP2P(b, c) < 0 ? b : (KMR_CMP2P(a, c) < 0 ? c : a))
154  : (KMR_CMP2P(b, c) > 0 ? b : (KMR_CMP2P(a, c) < 0 ? a : c)));
155 }
156 
157 static inline char *
158 kmr_medianof(char *A, const size_t N, const size_t ES)
159 {
160  char *pm = A + (N / 2) * ES;
161  if (N > 7) {
162  char *pl = A;
163  char *pn = A + (N - 1) * ES;
164  if (N > 40) {
165  size_t d = (N / 8) * ES;
166  pl = kmr_meddleof(pl, (pl + d), (pl + 2 * d));
167  pm = kmr_meddleof((pm - d), pm, (pm + d));
168  pn = kmr_meddleof((pn - 2 * d), (pn - d), pn);
169  }
170  pm = kmr_meddleof(pl, pm, pn);
171  }
172  return pm;
173 }
174 
175 static inline void
176 kmr_subsort(char *A, size_t N, const size_t ES, int swaptype)
177 {
178  for (char *p0 = A + ES; p0 < A + N * ES; p0 += ES) {
179  for (char *p1 = p0; p1 > A && KMR_CMP2N((p1 - ES), p1) > 0; p1 -= ES) {
180  kmr_swap(p1, (p1 - ES), ES, swaptype);
181  }
182  }
183 }
184 
185 static void
186 kmr_isort0(void *A0, size_t N, const size_t ES, int depth)
187 {
188  char *A = A0;
189  /*LOOP:*/
190  assert(A != 0);
191  int swaptype = kmr_swaptype(A, ES);
192  int swapoccur = 0;
193  if (N < 10) {
194  kmr_subsort(A, N, ES, swaptype);
195  return;
196  }
197  char *pm = kmr_medianof(A, N, ES);
198  kmr_swap(A, pm, ES, swaptype);
199 
200  char *pa = A + ES;
201  char *pb = pa;
202  char *pc = A + (N - 1) * ES;
203  char *pd = pc;
204  for (;;) {
205  while (pb <= pc && KMR_CMP2P(pb, A) <= 0) {
206  if (0) {
207  /* EQUAL CASE HANDLING. */
208  swapoccur = 1;
209  kmr_swap(pa, pb, ES, swaptype);
210  pa += ES;
211  }
212  pb += ES;
213  }
214  while (pb <= pc && KMR_CMP2N(pc, A) >= 0) {
215  if (0) {
216  /* EQUAL CASE HANDLING. */
217  swapoccur = 1;
218  kmr_swap(pc, pd, ES, swaptype);
219  pd -= ES;
220  }
221  pc -= ES;
222  }
223  if (pb > pc) {
224  break;
225  }
226  kmr_swap(pb, pc, ES, swaptype);
227  swapoccur = 1;
228  pb += ES;
229  pc -= ES;
230  }
231  if (swapoccur == 0) {
232  kmr_subsort(A, N, ES, swaptype);
233  return;
234  }
235 
236  char *pn = A + N * ES;
237 
238  if (0) {
239  /* EQUAL CASE HANDLING. */
240  size_t r0 = (size_t)kmr_min(pa - A, pb - pa);
241  kmr_vecswap(A, (pb - r0), r0, swaptype);
242  size_t r1 = (size_t)kmr_min((pd - pc), (pn - pd - (ptrdiff_t)ES));
243  kmr_vecswap(pb, (pn - r1), r1, swaptype);
244  assert(r0 == ES);
245  assert(r1 == 0);
246  } else {
247  kmr_swap(A, (pb - ES), ES, swaptype);
248  }
249 
250  size_t r2 = (size_t)(pb - pa);
251  if (r2 > ES) {
252 #if defined(_OPENMP) && (_OPENMP >= 201106) && !defined(KMROMP30)
253  KMR_OMP_TASK_FINAL_(depth == 0)
254  kmr_isort0(A, (r2 / ES), ES, MAX((depth - 1), 0));
255 #else
256  if (depth > 0) {
257  KMR_OMP_TASK_
258  kmr_isort0(A, (r2 / ES), ES, MAX((depth - 1), 0));
259  } else {
260  kmr_isort0(A, (r2 / ES), ES, MAX((depth - 1), 0));
261  }
262 #endif
263  }
264  size_t r3 = (size_t)(pd - pc);
265  if (r3 > ES) {
266 #if defined(_OPENMP) && (_OPENMP >= 201106) && !defined(KMROMP30)
267  KMR_OMP_TASK_FINAL_(depth == 0)
268  kmr_isort0((pn - r3), (r3 / ES), ES, MAX((depth - 1), 0));
269 #else
270  if (depth > 0) {
271  KMR_OMP_TASK_
272  kmr_isort0((pn - r3), (r3 / ES), ES, MAX((depth - 1), 0));
273  } else {
274  kmr_isort0((pn - r3), (r3 / ES), ES, MAX((depth - 1), 0));
275  }
276 #endif
277  }
278 #if defined(_OPENMP) && (_OPENMP >= 201106) && !defined(KMROMP30)
279  /* (ONLY INTEL-CC ACTUALLY NEEDS TASKWAIT (ICC 13 and 14)). */
280  KMR_OMP_TASKWAIT_;
281 #else
282  /*empty*/
283 #endif
284 }
285 
286 /** Sorts by comparator on long integers. The target array is of
287  "struct kmr_isort_entry". DEPTH tells how deep OMP threading is
288  enabled (using tasks), and it is decremented by one each
289  recursion. Zero depth tells a sequential run. */
290 
291 void
292 kmr_isort(void *A, const size_t N, const size_t ES, int depth)
293 {
294  KMR_OMP_PARALLEL_
295  {
296  KMR_OMP_SINGLE_NOWAIT_
297  {
298  kmr_isort0(A, N, ES, depth);
299  }
300  }
301 }
302 
303 /*
304 Copyright (C) 2012-2016 RIKEN AICS
305 This library is distributed WITHOUT ANY WARRANTY. This library can be
306 redistributed and/or modified under the terms of the BSD 2-Clause License.
307 */
Utilities Private Part (do not include from applications).
Definition: kmrisort.c:65
KMR Interface.
void kmr_isort(void *A, const size_t N, const size_t ES, int depth)
Sorts by comparator on long integers.
Definition: kmrisort.c:292