compatibility.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. // -*- C++ -*-
  2. // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the terms
  6. // of the GNU General Public License as published by the Free Software
  7. // Foundation; either version 3, or (at your option) any later
  8. // version.
  9. // This library is distributed in the hope that it will be useful, but
  10. // WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. // General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /** @file parallel/compatibility.h
  21. * @brief Compatibility layer, mostly concerned with atomic operations.
  22. * This file is a GNU parallel extension to the Standard C++ Library.
  23. */
  24. // Written by Felix Putze.
  25. #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
  26. #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
  27. #include <parallel/types.h>
  28. #include <parallel/base.h>
  29. #if defined(__SUNPRO_CC) && defined(__sparc)
  30. #include <sys/atomic.h>
  31. #endif
  32. #if !defined(_WIN32) || defined (__CYGWIN__)
  33. #include <sched.h>
  34. #endif
  35. #if defined(_MSC_VER)
  36. #include <Windows.h>
  37. #include <intrin.h>
  38. #undef max
  39. #undef min
  40. #endif
  41. #ifdef __MINGW32__
  42. // Including <windows.h> will drag in all the windows32 names. Since
  43. // that can cause user code portability problems, we just declare the
  44. // one needed function here.
  45. extern "C"
  46. __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
  47. #endif
  48. namespace __gnu_parallel
  49. {
  50. #if defined(__ICC)
  51. template<typename must_be_int = int>
  52. int32 faa32(int32* x, int32 inc)
  53. {
  54. asm volatile("lock xadd %0,%1"
  55. : "=r" (inc), "=m" (*x)
  56. : "0" (inc)
  57. : "memory");
  58. return inc;
  59. }
  60. #if defined(__x86_64)
  61. template<typename must_be_int = int>
  62. int64 faa64(int64* x, int64 inc)
  63. {
  64. asm volatile("lock xadd %0,%1"
  65. : "=r" (inc), "=m" (*x)
  66. : "0" (inc)
  67. : "memory");
  68. return inc;
  69. }
  70. #endif
  71. #endif
  72. // atomic functions only work on integers
  73. /** @brief Add a value to a variable, atomically.
  74. *
  75. * Implementation is heavily platform-dependent.
  76. * @param ptr Pointer to a 32-bit signed integer.
  77. * @param addend Value to add.
  78. */
  79. inline int32
  80. fetch_and_add_32(volatile int32* ptr, int32 addend)
  81. {
  82. #if defined(__ICC) //x86 version
  83. return _InterlockedExchangeAdd((void*)ptr, addend);
  84. #elif defined(__ECC) //IA-64 version
  85. return _InterlockedExchangeAdd((void*)ptr, addend);
  86. #elif defined(__ICL) || defined(_MSC_VER)
  87. return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
  88. addend);
  89. #elif defined(__GNUC__)
  90. return __sync_fetch_and_add(ptr, addend);
  91. #elif defined(__SUNPRO_CC) && defined(__sparc)
  92. volatile int32 before, after;
  93. do
  94. {
  95. before = *ptr;
  96. after = before + addend;
  97. } while (atomic_cas_32((volatile unsigned int*)ptr, before,
  98. after) != before);
  99. return before;
  100. #else //fallback, slow
  101. #pragma message("slow fetch_and_add_32")
  102. int32 res;
  103. #pragma omp critical
  104. {
  105. res = *ptr;
  106. *(ptr) += addend;
  107. }
  108. return res;
  109. #endif
  110. }
  111. /** @brief Add a value to a variable, atomically.
  112. *
  113. * Implementation is heavily platform-dependent.
  114. * @param ptr Pointer to a 64-bit signed integer.
  115. * @param addend Value to add.
  116. */
  117. inline int64
  118. fetch_and_add_64(volatile int64* ptr, int64 addend)
  119. {
  120. #if defined(__ICC) && defined(__x86_64) //x86 version
  121. return faa64<int>((int64*)ptr, addend);
  122. #elif defined(__ECC) //IA-64 version
  123. return _InterlockedExchangeAdd64((void*)ptr, addend);
  124. #elif defined(__ICL) || defined(_MSC_VER)
  125. #ifndef _WIN64
  126. _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
  127. return 0;
  128. #else
  129. return _InterlockedExchangeAdd64(ptr, addend);
  130. #endif
  131. #elif defined(__GNUC__) && defined(__x86_64)
  132. return __sync_fetch_and_add(ptr, addend);
  133. #elif defined(__GNUC__) && defined(__i386) && \
  134. (defined(__i686) || defined(__pentium4) || defined(__athlon))
  135. return __sync_fetch_and_add(ptr, addend);
  136. #elif defined(__SUNPRO_CC) && defined(__sparc)
  137. volatile int64 before, after;
  138. do
  139. {
  140. before = *ptr;
  141. after = before + addend;
  142. } while (atomic_cas_64((volatile unsigned long long*)ptr, before,
  143. after) != before);
  144. return before;
  145. #else //fallback, slow
  146. #if defined(__GNUC__) && defined(__i386)
  147. // XXX doesn't work with -march=native
  148. //#warning "please compile with -march=i686 or better"
  149. #endif
  150. #pragma message("slow fetch_and_add_64")
  151. int64 res;
  152. #pragma omp critical
  153. {
  154. res = *ptr;
  155. *(ptr) += addend;
  156. }
  157. return res;
  158. #endif
  159. }
  160. /** @brief Add a value to a variable, atomically.
  161. *
  162. * Implementation is heavily platform-dependent.
  163. * @param ptr Pointer to a signed integer.
  164. * @param addend Value to add.
  165. */
  166. template<typename T>
  167. inline T
  168. fetch_and_add(volatile T* ptr, T addend)
  169. {
  170. if (sizeof(T) == sizeof(int32))
  171. return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
  172. else if (sizeof(T) == sizeof(int64))
  173. return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
  174. else
  175. _GLIBCXX_PARALLEL_ASSERT(false);
  176. }
  177. #if defined(__ICC)
  178. template<typename must_be_int = int>
  179. inline int32
  180. cas32(volatile int32* ptr, int32 old, int32 nw)
  181. {
  182. int32 before;
  183. __asm__ __volatile__("lock; cmpxchgl %1,%2"
  184. : "=a"(before)
  185. : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
  186. : "memory");
  187. return before;
  188. }
  189. #if defined(__x86_64)
  190. template<typename must_be_int = int>
  191. inline int64
  192. cas64(volatile int64 *ptr, int64 old, int64 nw)
  193. {
  194. int64 before;
  195. __asm__ __volatile__("lock; cmpxchgq %1,%2"
  196. : "=a"(before)
  197. : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
  198. : "memory");
  199. return before;
  200. }
  201. #endif
  202. #endif
  203. /** @brief Compare @c *ptr and @c comparand. If equal, let @c
  204. * *ptr=replacement and return @c true, return @c false otherwise.
  205. *
  206. * Implementation is heavily platform-dependent.
  207. * @param ptr Pointer to 32-bit signed integer.
  208. * @param comparand Compare value.
  209. * @param replacement Replacement value.
  210. */
  211. inline bool
  212. compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
  213. {
  214. #if defined(__ICC) //x86 version
  215. return _InterlockedCompareExchange((void*)ptr, replacement,
  216. comparand) == comparand;
  217. #elif defined(__ECC) //IA-64 version
  218. return _InterlockedCompareExchange((void*)ptr, replacement,
  219. comparand) == comparand;
  220. #elif defined(__ICL) || defined(_MSC_VER)
  221. return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
  222. replacement, comparand) == comparand;
  223. #elif defined(__GNUC__)
  224. return __sync_bool_compare_and_swap(ptr, comparand, replacement);
  225. #elif defined(__SUNPRO_CC) && defined(__sparc)
  226. return atomic_cas_32((volatile unsigned int*)ptr, comparand,
  227. replacement) == comparand;
  228. #else
  229. #pragma message("slow compare_and_swap_32")
  230. bool res = false;
  231. #pragma omp critical
  232. {
  233. if (*ptr == comparand)
  234. {
  235. *ptr = replacement;
  236. res = true;
  237. }
  238. }
  239. return res;
  240. #endif
  241. }
  242. /** @brief Compare @c *ptr and @c comparand. If equal, let @c
  243. * *ptr=replacement and return @c true, return @c false otherwise.
  244. *
  245. * Implementation is heavily platform-dependent.
  246. * @param ptr Pointer to 64-bit signed integer.
  247. * @param comparand Compare value.
  248. * @param replacement Replacement value.
  249. */
  250. inline bool
  251. compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
  252. {
  253. #if defined(__ICC) && defined(__x86_64) //x86 version
  254. return cas64<int>(ptr, comparand, replacement) == comparand;
  255. #elif defined(__ECC) //IA-64 version
  256. return _InterlockedCompareExchange64((void*)ptr, replacement,
  257. comparand) == comparand;
  258. #elif defined(__ICL) || defined(_MSC_VER)
  259. #ifndef _WIN64
  260. _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
  261. return 0;
  262. #else
  263. return _InterlockedCompareExchange64(ptr, replacement,
  264. comparand) == comparand;
  265. #endif
  266. #elif defined(__GNUC__) && defined(__x86_64)
  267. return __sync_bool_compare_and_swap(ptr, comparand, replacement);
  268. #elif defined(__GNUC__) && defined(__i386) && \
  269. (defined(__i686) || defined(__pentium4) || defined(__athlon))
  270. return __sync_bool_compare_and_swap(ptr, comparand, replacement);
  271. #elif defined(__SUNPRO_CC) && defined(__sparc)
  272. return atomic_cas_64((volatile unsigned long long*)ptr,
  273. comparand, replacement) == comparand;
  274. #else
  275. #if defined(__GNUC__) && defined(__i386)
  276. // XXX -march=native
  277. //#warning "please compile with -march=i686 or better"
  278. #endif
  279. #pragma message("slow compare_and_swap_64")
  280. bool res = false;
  281. #pragma omp critical
  282. {
  283. if (*ptr == comparand)
  284. {
  285. *ptr = replacement;
  286. res = true;
  287. }
  288. }
  289. return res;
  290. #endif
  291. }
  292. /** @brief Compare @c *ptr and @c comparand. If equal, let @c
  293. * *ptr=replacement and return @c true, return @c false otherwise.
  294. *
  295. * Implementation is heavily platform-dependent.
  296. * @param ptr Pointer to signed integer.
  297. * @param comparand Compare value.
  298. * @param replacement Replacement value. */
  299. template<typename T>
  300. inline bool
  301. compare_and_swap(volatile T* ptr, T comparand, T replacement)
  302. {
  303. if (sizeof(T) == sizeof(int32))
  304. return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
  305. else if (sizeof(T) == sizeof(int64))
  306. return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
  307. else
  308. _GLIBCXX_PARALLEL_ASSERT(false);
  309. }
  310. /** @brief Yield the control to another thread, without waiting for
  311. the end to the time slice. */
  312. inline void
  313. yield()
  314. {
  315. #if defined (_WIN32) && !defined (__CYGWIN__)
  316. Sleep(0);
  317. #else
  318. sched_yield();
  319. #endif
  320. }
  321. } // end namespace
  322. #endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */