loongson.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
  1. /* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
  2. Copyright (C) 2008-2015 Free Software Foundation, Inc.
  3. Contributed by CodeSourcery.
  4. This file is part of GCC.
  5. GCC is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published
  7. by the Free Software Foundation; either version 3, or (at your
  8. option) any later version.
  9. GCC is distributed in the hope that it will be useful, but WITHOUT
  10. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
  12. License for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #ifndef _GCC_LOONGSON_H
  21. #define _GCC_LOONGSON_H
  22. #if !defined(__mips_loongson_vector_rev)
  23. # error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
  24. #endif
  25. #ifdef __cplusplus
  26. extern "C" {
  27. #endif
  28. #include <stdint.h>
  29. /* Vectors of unsigned bytes, halfwords and words. */
  30. typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
  31. typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
  32. typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
  33. /* Vectors of signed bytes, halfwords and words. */
  34. typedef int8_t int8x8_t __attribute__((vector_size (8)));
  35. typedef int16_t int16x4_t __attribute__((vector_size (8)));
  36. typedef int32_t int32x2_t __attribute__((vector_size (8)));
  37. /* SIMD intrinsics.
  38. Unless otherwise noted, calls to the functions below will expand into
  39. precisely one machine instruction, modulo any moves required to
  40. satisfy register allocation constraints. */
  41. /* Pack with signed saturation. */
  42. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  43. packsswh (int32x2_t s, int32x2_t t)
  44. {
  45. return __builtin_loongson_packsswh (s, t);
  46. }
  47. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  48. packsshb (int16x4_t s, int16x4_t t)
  49. {
  50. return __builtin_loongson_packsshb (s, t);
  51. }
  52. /* Pack with unsigned saturation. */
  53. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  54. packushb (uint16x4_t s, uint16x4_t t)
  55. {
  56. return __builtin_loongson_packushb (s, t);
  57. }
  58. /* Vector addition, treating overflow by wraparound. */
  59. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  60. paddw_u (uint32x2_t s, uint32x2_t t)
  61. {
  62. return __builtin_loongson_paddw_u (s, t);
  63. }
  64. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  65. paddh_u (uint16x4_t s, uint16x4_t t)
  66. {
  67. return __builtin_loongson_paddh_u (s, t);
  68. }
  69. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  70. paddb_u (uint8x8_t s, uint8x8_t t)
  71. {
  72. return __builtin_loongson_paddb_u (s, t);
  73. }
  74. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  75. paddw_s (int32x2_t s, int32x2_t t)
  76. {
  77. return __builtin_loongson_paddw_s (s, t);
  78. }
  79. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  80. paddh_s (int16x4_t s, int16x4_t t)
  81. {
  82. return __builtin_loongson_paddh_s (s, t);
  83. }
  84. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  85. paddb_s (int8x8_t s, int8x8_t t)
  86. {
  87. return __builtin_loongson_paddb_s (s, t);
  88. }
  89. /* Addition of doubleword integers, treating overflow by wraparound. */
  90. __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
  91. paddd_u (uint64_t s, uint64_t t)
  92. {
  93. return __builtin_loongson_paddd_u (s, t);
  94. }
  95. __extension__ static __inline int64_t __attribute__ ((__always_inline__))
  96. paddd_s (int64_t s, int64_t t)
  97. {
  98. return __builtin_loongson_paddd_s (s, t);
  99. }
  100. /* Vector addition, treating overflow by signed saturation. */
  101. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  102. paddsh (int16x4_t s, int16x4_t t)
  103. {
  104. return __builtin_loongson_paddsh (s, t);
  105. }
  106. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  107. paddsb (int8x8_t s, int8x8_t t)
  108. {
  109. return __builtin_loongson_paddsb (s, t);
  110. }
  111. /* Vector addition, treating overflow by unsigned saturation. */
  112. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  113. paddush (uint16x4_t s, uint16x4_t t)
  114. {
  115. return __builtin_loongson_paddush (s, t);
  116. }
  117. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  118. paddusb (uint8x8_t s, uint8x8_t t)
  119. {
  120. return __builtin_loongson_paddusb (s, t);
  121. }
  122. /* Logical AND NOT. */
  123. __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
  124. pandn_ud (uint64_t s, uint64_t t)
  125. {
  126. return __builtin_loongson_pandn_ud (s, t);
  127. }
  128. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  129. pandn_uw (uint32x2_t s, uint32x2_t t)
  130. {
  131. return __builtin_loongson_pandn_uw (s, t);
  132. }
  133. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  134. pandn_uh (uint16x4_t s, uint16x4_t t)
  135. {
  136. return __builtin_loongson_pandn_uh (s, t);
  137. }
  138. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  139. pandn_ub (uint8x8_t s, uint8x8_t t)
  140. {
  141. return __builtin_loongson_pandn_ub (s, t);
  142. }
  143. __extension__ static __inline int64_t __attribute__ ((__always_inline__))
  144. pandn_sd (int64_t s, int64_t t)
  145. {
  146. return __builtin_loongson_pandn_sd (s, t);
  147. }
  148. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  149. pandn_sw (int32x2_t s, int32x2_t t)
  150. {
  151. return __builtin_loongson_pandn_sw (s, t);
  152. }
  153. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  154. pandn_sh (int16x4_t s, int16x4_t t)
  155. {
  156. return __builtin_loongson_pandn_sh (s, t);
  157. }
  158. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  159. pandn_sb (int8x8_t s, int8x8_t t)
  160. {
  161. return __builtin_loongson_pandn_sb (s, t);
  162. }
  163. /* Average. */
  164. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  165. pavgh (uint16x4_t s, uint16x4_t t)
  166. {
  167. return __builtin_loongson_pavgh (s, t);
  168. }
  169. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  170. pavgb (uint8x8_t s, uint8x8_t t)
  171. {
  172. return __builtin_loongson_pavgb (s, t);
  173. }
  174. /* Equality test. */
  175. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  176. pcmpeqw_u (uint32x2_t s, uint32x2_t t)
  177. {
  178. return __builtin_loongson_pcmpeqw_u (s, t);
  179. }
  180. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  181. pcmpeqh_u (uint16x4_t s, uint16x4_t t)
  182. {
  183. return __builtin_loongson_pcmpeqh_u (s, t);
  184. }
  185. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  186. pcmpeqb_u (uint8x8_t s, uint8x8_t t)
  187. {
  188. return __builtin_loongson_pcmpeqb_u (s, t);
  189. }
  190. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  191. pcmpeqw_s (int32x2_t s, int32x2_t t)
  192. {
  193. return __builtin_loongson_pcmpeqw_s (s, t);
  194. }
  195. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  196. pcmpeqh_s (int16x4_t s, int16x4_t t)
  197. {
  198. return __builtin_loongson_pcmpeqh_s (s, t);
  199. }
  200. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  201. pcmpeqb_s (int8x8_t s, int8x8_t t)
  202. {
  203. return __builtin_loongson_pcmpeqb_s (s, t);
  204. }
  205. /* Greater-than test. */
  206. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  207. pcmpgtw_u (uint32x2_t s, uint32x2_t t)
  208. {
  209. return __builtin_loongson_pcmpgtw_u (s, t);
  210. }
  211. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  212. pcmpgth_u (uint16x4_t s, uint16x4_t t)
  213. {
  214. return __builtin_loongson_pcmpgth_u (s, t);
  215. }
  216. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  217. pcmpgtb_u (uint8x8_t s, uint8x8_t t)
  218. {
  219. return __builtin_loongson_pcmpgtb_u (s, t);
  220. }
  221. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  222. pcmpgtw_s (int32x2_t s, int32x2_t t)
  223. {
  224. return __builtin_loongson_pcmpgtw_s (s, t);
  225. }
  226. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  227. pcmpgth_s (int16x4_t s, int16x4_t t)
  228. {
  229. return __builtin_loongson_pcmpgth_s (s, t);
  230. }
  231. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  232. pcmpgtb_s (int8x8_t s, int8x8_t t)
  233. {
  234. return __builtin_loongson_pcmpgtb_s (s, t);
  235. }
  236. /* Extract halfword. */
  237. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  238. pextrh_u (uint16x4_t s, int field /* 0--3 */)
  239. {
  240. return __builtin_loongson_pextrh_u (s, field);
  241. }
  242. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  243. pextrh_s (int16x4_t s, int field /* 0--3 */)
  244. {
  245. return __builtin_loongson_pextrh_s (s, field);
  246. }
  247. /* Insert halfword. */
  248. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  249. pinsrh_0_u (uint16x4_t s, uint16x4_t t)
  250. {
  251. return __builtin_loongson_pinsrh_0_u (s, t);
  252. }
  253. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  254. pinsrh_1_u (uint16x4_t s, uint16x4_t t)
  255. {
  256. return __builtin_loongson_pinsrh_1_u (s, t);
  257. }
  258. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  259. pinsrh_2_u (uint16x4_t s, uint16x4_t t)
  260. {
  261. return __builtin_loongson_pinsrh_2_u (s, t);
  262. }
  263. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  264. pinsrh_3_u (uint16x4_t s, uint16x4_t t)
  265. {
  266. return __builtin_loongson_pinsrh_3_u (s, t);
  267. }
  268. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  269. pinsrh_0_s (int16x4_t s, int16x4_t t)
  270. {
  271. return __builtin_loongson_pinsrh_0_s (s, t);
  272. }
  273. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  274. pinsrh_1_s (int16x4_t s, int16x4_t t)
  275. {
  276. return __builtin_loongson_pinsrh_1_s (s, t);
  277. }
  278. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  279. pinsrh_2_s (int16x4_t s, int16x4_t t)
  280. {
  281. return __builtin_loongson_pinsrh_2_s (s, t);
  282. }
  283. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  284. pinsrh_3_s (int16x4_t s, int16x4_t t)
  285. {
  286. return __builtin_loongson_pinsrh_3_s (s, t);
  287. }
  288. /* Multiply and add. */
  289. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  290. pmaddhw (int16x4_t s, int16x4_t t)
  291. {
  292. return __builtin_loongson_pmaddhw (s, t);
  293. }
  294. /* Maximum of signed halfwords. */
  295. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  296. pmaxsh (int16x4_t s, int16x4_t t)
  297. {
  298. return __builtin_loongson_pmaxsh (s, t);
  299. }
  300. /* Maximum of unsigned bytes. */
  301. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  302. pmaxub (uint8x8_t s, uint8x8_t t)
  303. {
  304. return __builtin_loongson_pmaxub (s, t);
  305. }
  306. /* Minimum of signed halfwords. */
  307. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  308. pminsh (int16x4_t s, int16x4_t t)
  309. {
  310. return __builtin_loongson_pminsh (s, t);
  311. }
  312. /* Minimum of unsigned bytes. */
  313. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  314. pminub (uint8x8_t s, uint8x8_t t)
  315. {
  316. return __builtin_loongson_pminub (s, t);
  317. }
  318. /* Move byte mask. */
  319. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  320. pmovmskb_u (uint8x8_t s)
  321. {
  322. return __builtin_loongson_pmovmskb_u (s);
  323. }
  324. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  325. pmovmskb_s (int8x8_t s)
  326. {
  327. return __builtin_loongson_pmovmskb_s (s);
  328. }
  329. /* Multiply unsigned integers and store high result. */
  330. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  331. pmulhuh (uint16x4_t s, uint16x4_t t)
  332. {
  333. return __builtin_loongson_pmulhuh (s, t);
  334. }
  335. /* Multiply signed integers and store high result. */
  336. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  337. pmulhh (int16x4_t s, int16x4_t t)
  338. {
  339. return __builtin_loongson_pmulhh (s, t);
  340. }
  341. /* Multiply signed integers and store low result. */
  342. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  343. pmullh (int16x4_t s, int16x4_t t)
  344. {
  345. return __builtin_loongson_pmullh (s, t);
  346. }
  347. /* Multiply unsigned word integers. */
  348. __extension__ static __inline int64_t __attribute__ ((__always_inline__))
  349. pmuluw (uint32x2_t s, uint32x2_t t)
  350. {
  351. return __builtin_loongson_pmuluw (s, t);
  352. }
  353. /* Absolute difference. */
  354. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  355. pasubub (uint8x8_t s, uint8x8_t t)
  356. {
  357. return __builtin_loongson_pasubub (s, t);
  358. }
  359. /* Sum of unsigned byte integers. */
  360. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  361. biadd (uint8x8_t s)
  362. {
  363. return __builtin_loongson_biadd (s);
  364. }
  365. /* Sum of absolute differences.
  366. Note that this intrinsic expands into two machine instructions:
  367. PASUBUB followed by BIADD. */
  368. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  369. psadbh (uint8x8_t s, uint8x8_t t)
  370. {
  371. return __builtin_loongson_psadbh (s, t);
  372. }
  373. /* Shuffle halfwords. */
  374. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  375. pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
  376. {
  377. return __builtin_loongson_pshufh_u (s, order);
  378. }
  379. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  380. pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
  381. {
  382. return __builtin_loongson_pshufh_s (s, order);
  383. }
  384. /* Shift left logical. */
  385. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  386. psllh_u (uint16x4_t s, uint8_t amount)
  387. {
  388. return __builtin_loongson_psllh_u (s, amount);
  389. }
  390. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  391. psllh_s (int16x4_t s, uint8_t amount)
  392. {
  393. return __builtin_loongson_psllh_s (s, amount);
  394. }
  395. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  396. psllw_u (uint32x2_t s, uint8_t amount)
  397. {
  398. return __builtin_loongson_psllw_u (s, amount);
  399. }
  400. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  401. psllw_s (int32x2_t s, uint8_t amount)
  402. {
  403. return __builtin_loongson_psllw_s (s, amount);
  404. }
  405. /* Shift right logical. */
  406. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  407. psrlh_u (uint16x4_t s, uint8_t amount)
  408. {
  409. return __builtin_loongson_psrlh_u (s, amount);
  410. }
  411. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  412. psrlh_s (int16x4_t s, uint8_t amount)
  413. {
  414. return __builtin_loongson_psrlh_s (s, amount);
  415. }
  416. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  417. psrlw_u (uint32x2_t s, uint8_t amount)
  418. {
  419. return __builtin_loongson_psrlw_u (s, amount);
  420. }
  421. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  422. psrlw_s (int32x2_t s, uint8_t amount)
  423. {
  424. return __builtin_loongson_psrlw_s (s, amount);
  425. }
  426. /* Shift right arithmetic. */
  427. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  428. psrah_u (uint16x4_t s, uint8_t amount)
  429. {
  430. return __builtin_loongson_psrah_u (s, amount);
  431. }
  432. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  433. psrah_s (int16x4_t s, uint8_t amount)
  434. {
  435. return __builtin_loongson_psrah_s (s, amount);
  436. }
  437. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  438. psraw_u (uint32x2_t s, uint8_t amount)
  439. {
  440. return __builtin_loongson_psraw_u (s, amount);
  441. }
  442. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  443. psraw_s (int32x2_t s, uint8_t amount)
  444. {
  445. return __builtin_loongson_psraw_s (s, amount);
  446. }
  447. /* Vector subtraction, treating overflow by wraparound. */
  448. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  449. psubw_u (uint32x2_t s, uint32x2_t t)
  450. {
  451. return __builtin_loongson_psubw_u (s, t);
  452. }
  453. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  454. psubh_u (uint16x4_t s, uint16x4_t t)
  455. {
  456. return __builtin_loongson_psubh_u (s, t);
  457. }
  458. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  459. psubb_u (uint8x8_t s, uint8x8_t t)
  460. {
  461. return __builtin_loongson_psubb_u (s, t);
  462. }
  463. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  464. psubw_s (int32x2_t s, int32x2_t t)
  465. {
  466. return __builtin_loongson_psubw_s (s, t);
  467. }
  468. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  469. psubh_s (int16x4_t s, int16x4_t t)
  470. {
  471. return __builtin_loongson_psubh_s (s, t);
  472. }
  473. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  474. psubb_s (int8x8_t s, int8x8_t t)
  475. {
  476. return __builtin_loongson_psubb_s (s, t);
  477. }
  478. /* Subtraction of doubleword integers, treating overflow by wraparound. */
  479. __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
  480. psubd_u (uint64_t s, uint64_t t)
  481. {
  482. return __builtin_loongson_psubd_u (s, t);
  483. }
  484. __extension__ static __inline int64_t __attribute__ ((__always_inline__))
  485. psubd_s (int64_t s, int64_t t)
  486. {
  487. return __builtin_loongson_psubd_s (s, t);
  488. }
  489. /* Vector subtraction, treating overflow by signed saturation. */
  490. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  491. psubsh (int16x4_t s, int16x4_t t)
  492. {
  493. return __builtin_loongson_psubsh (s, t);
  494. }
  495. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  496. psubsb (int8x8_t s, int8x8_t t)
  497. {
  498. return __builtin_loongson_psubsb (s, t);
  499. }
  500. /* Vector subtraction, treating overflow by unsigned saturation. */
  501. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  502. psubush (uint16x4_t s, uint16x4_t t)
  503. {
  504. return __builtin_loongson_psubush (s, t);
  505. }
  506. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  507. psubusb (uint8x8_t s, uint8x8_t t)
  508. {
  509. return __builtin_loongson_psubusb (s, t);
  510. }
  511. /* Unpack high data. */
  512. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  513. punpckhwd_u (uint32x2_t s, uint32x2_t t)
  514. {
  515. return __builtin_loongson_punpckhwd_u (s, t);
  516. }
  517. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  518. punpckhhw_u (uint16x4_t s, uint16x4_t t)
  519. {
  520. return __builtin_loongson_punpckhhw_u (s, t);
  521. }
  522. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  523. punpckhbh_u (uint8x8_t s, uint8x8_t t)
  524. {
  525. return __builtin_loongson_punpckhbh_u (s, t);
  526. }
  527. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  528. punpckhwd_s (int32x2_t s, int32x2_t t)
  529. {
  530. return __builtin_loongson_punpckhwd_s (s, t);
  531. }
  532. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  533. punpckhhw_s (int16x4_t s, int16x4_t t)
  534. {
  535. return __builtin_loongson_punpckhhw_s (s, t);
  536. }
  537. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  538. punpckhbh_s (int8x8_t s, int8x8_t t)
  539. {
  540. return __builtin_loongson_punpckhbh_s (s, t);
  541. }
  542. /* Unpack low data. */
  543. __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
  544. punpcklwd_u (uint32x2_t s, uint32x2_t t)
  545. {
  546. return __builtin_loongson_punpcklwd_u (s, t);
  547. }
  548. __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
  549. punpcklhw_u (uint16x4_t s, uint16x4_t t)
  550. {
  551. return __builtin_loongson_punpcklhw_u (s, t);
  552. }
  553. __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
  554. punpcklbh_u (uint8x8_t s, uint8x8_t t)
  555. {
  556. return __builtin_loongson_punpcklbh_u (s, t);
  557. }
  558. __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
  559. punpcklwd_s (int32x2_t s, int32x2_t t)
  560. {
  561. return __builtin_loongson_punpcklwd_s (s, t);
  562. }
  563. __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
  564. punpcklhw_s (int16x4_t s, int16x4_t t)
  565. {
  566. return __builtin_loongson_punpcklhw_s (s, t);
  567. }
  568. __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
  569. punpcklbh_s (int8x8_t s, int8x8_t t)
  570. {
  571. return __builtin_loongson_punpcklbh_s (s, t);
  572. }
  573. #ifdef __cplusplus
  574. }
  575. #endif
  576. #endif