Add tools
This commit is contained in:
BIN
tools/bin/x86_64-unknown-redox/toolchain/lib/bfd-plugins/libdep.so
Executable file
BIN
tools/bin/x86_64-unknown-redox/toolchain/lib/bfd-plugins/libdep.so
Executable file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,14 @@
|
||||
This README file is copied into the directory for GCC-only header files
|
||||
when fixincludes is run by the makefile for GCC.
|
||||
|
||||
Many of the files in this directory were automatically edited from the
|
||||
standard system header files by the fixincludes process. They are
|
||||
system-specific, and will not work on any other kind of system. They
|
||||
are also not part of GCC. The reason we have to do this is because
|
||||
GCC requires ANSI C headers and many vendors supply ANSI-incompatible
|
||||
headers.
|
||||
|
||||
Because this is an automated process, sometimes headers get "fixed"
|
||||
that do not, strictly speaking, need a fix. As long as nothing is broken
|
||||
by the process, it is just an unfortunate collateral inconvenience.
|
||||
We would like to rectify it, if it is not "too inconvenient".
|
@ -0,0 +1,81 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _ADXINTRIN_H_INCLUDED
|
||||
#define _ADXINTRIN_H_INCLUDED
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ADXINTRIN_H_INCLUDED */
|
@ -0,0 +1,93 @@
|
||||
/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Implemented from the specification included in the AMD Programmers
|
||||
Manual Update, version 2.x */
|
||||
|
||||
#ifndef _AMMINTRIN_H_INCLUDED
|
||||
#define _AMMINTRIN_H_INCLUDED
|
||||
|
||||
/* We need definitions from the SSE3, SSE2 and SSE header files*/
|
||||
#include <pmmintrin.h>
|
||||
|
||||
#ifndef __SSE4A__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4a")
|
||||
#define __DISABLE_SSE4A__
|
||||
#endif /* __SSE4A__ */
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_sd (double * __P, __m128d __Y)
|
||||
{
|
||||
__builtin_ia32_movntsd (__P, (__v2df) __Y);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_ss (float * __P, __m128 __Y)
|
||||
{
|
||||
__builtin_ia32_movntss (__P, (__v4sf) __Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_si64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
|
||||
}
|
||||
#else
|
||||
#define _mm_extracti_si64(X, I, L) \
|
||||
((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), \
|
||||
(unsigned int)(I), (unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_si64 (__m128i __X,__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||
}
|
||||
#else
|
||||
#define _mm_inserti_si64(X, Y, I, L) \
|
||||
((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), \
|
||||
(__v2di)(__m128i)(Y), \
|
||||
(unsigned int)(I), (unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_SSE4A__
|
||||
#undef __DISABLE_SSE4A__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4A__ */
|
||||
|
||||
#endif /* _AMMINTRIN_H_INCLUDED */
|
@ -0,0 +1,52 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXBF16INTRIN_H_INCLUDED
|
||||
#define _AMXBF16INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-bf16")
|
||||
#define __DISABLE_AMX_BF16__
|
||||
#endif /* __AMX_BF16__ */
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define _tile_dpbf16ps_internal(dst,src1,src2) \
|
||||
__asm__ volatile\
|
||||
("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
|
||||
#define _tile_dpbf16ps(dst,src1,src2) \
|
||||
_tile_dpbf16ps_internal (dst, src1, src2)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_BF16__
|
||||
#undef __DISABLE_AMX_BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_BF16__ */
|
||||
|
||||
#endif /* _AMXBF16INTRIN_H_INCLUDED */
|
@ -0,0 +1,59 @@
|
||||
/* Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxcomplexintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXCOMPLEXINTRIN_H_INCLUDED
|
||||
#define _AMXCOMPLEXINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_COMPLEX__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-complex")
|
||||
#define __DISABLE_AMX_COMPLEX__
|
||||
#endif /* __AMX_COMPLEX__ */
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \
|
||||
__asm__ volatile\
|
||||
("{tcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
|
||||
|
||||
#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \
|
||||
__asm__ volatile\
|
||||
("{tcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
|
||||
|
||||
#define _tile_cmmimfp16ps(src1_dst,src2,src3) \
|
||||
_tile_cmmimfp16ps_internal (src1_dst, src2, src3)
|
||||
|
||||
#define _tile_cmmrlfp16ps(src1_dst,src2,src3) \
|
||||
_tile_cmmrlfp16ps_internal (src1_dst, src2, src3)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_COMPLEX__
|
||||
#undef __DISABLE_AMX_COMPLEX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_COMPLEX__ */
|
||||
|
||||
#endif /* _AMXCOMPLEXINTRIN_H_INCLUDED */
|
@ -0,0 +1,46 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxfp16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXFP16INTRIN_H_INCLUDED
|
||||
#define _AMXFP16INTRIN_H_INCLUDED
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define _tile_dpfp16ps_internal(dst,src1,src2) \
|
||||
__asm__ volatile \
|
||||
("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
|
||||
#define _tile_dpfp16ps(dst,src1,src2) \
|
||||
_tile_dpfp16ps_internal (dst,src1,src2)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_FP16__
|
||||
#undef __DISABLE_AMX_FP16__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_FP16__ */
|
||||
|
||||
#endif /* _AMXFP16INTRIN_H_INCLUDED */
|
@ -0,0 +1,61 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXINT8INTRIN_H_INCLUDED
|
||||
#define _AMXINT8INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_INT8__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-int8")
|
||||
#define __DISABLE_AMX_INT8__
|
||||
#endif /* __AMX_INT8__ */
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define _tile_int8_dp_internal(name,dst,src1,src2) \
|
||||
__asm__ volatile \
|
||||
("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
|
||||
#define _tile_dpbssd(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbssd, dst, src1, src2)
|
||||
|
||||
#define _tile_dpbsud(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbsud, dst, src1, src2)
|
||||
|
||||
#define _tile_dpbusd(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbusd, dst, src1, src2)
|
||||
|
||||
#define _tile_dpbuud(dst,src1,src2) \
|
||||
_tile_int8_dp_internal (tdpbuud, dst, src1, src2)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_INT8__
|
||||
#undef __DISABLE_AMX_INT8__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_INT8__ */
|
||||
|
||||
#endif /* _AMXINT8INTRIN_H_INCLUDED */
|
@ -0,0 +1,98 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AMXTILEINTRIN_H_INCLUDED
|
||||
#define _AMXTILEINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AMX_TILE__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-tile")
|
||||
#define __DISABLE_AMX_TILE__
|
||||
#endif /* __AMX_TILE__ */
|
||||
|
||||
#if defined(__x86_64__)
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_loadconfig (const void *__config)
|
||||
{
|
||||
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_storeconfig (void *__config)
|
||||
{
|
||||
__asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_release (void)
|
||||
{
|
||||
__asm__ volatile ("tilerelease" ::);
|
||||
}
|
||||
|
||||
#define _tile_loadd(dst,base,stride) \
|
||||
_tile_loadd_internal (dst, base, stride)
|
||||
|
||||
#define _tile_loadd_internal(dst,base,stride) \
|
||||
__asm__ volatile \
|
||||
("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \
|
||||
:: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)))
|
||||
|
||||
#define _tile_stream_loadd(dst,base,stride) \
|
||||
_tile_stream_loadd_internal (dst, base, stride)
|
||||
|
||||
#define _tile_stream_loadd_internal(dst,base,stride) \
|
||||
__asm__ volatile \
|
||||
("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \
|
||||
:: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)))
|
||||
|
||||
#define _tile_stored(dst,base,stride) \
|
||||
_tile_stored_internal (dst, base, stride)
|
||||
|
||||
#define _tile_stored_internal(src,base,stride) \
|
||||
__asm__ volatile \
|
||||
("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \
|
||||
:: "r" ((void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)) \
|
||||
: "memory")
|
||||
|
||||
#define _tile_zero(dst) \
|
||||
_tile_zero_internal (dst)
|
||||
|
||||
#define _tile_zero_internal(dst) \
|
||||
__asm__ volatile \
|
||||
("tilezero\t%%tmm"#dst ::)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AMX_TILE__
|
||||
#undef __DISABLE_AMX_TILE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AMX_TILE__ */
|
||||
|
||||
#endif /* _AMXTILEINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,216 @@
|
||||
/* Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
#define _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124FMAPS__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124fmaps")
|
||||
#define __DISABLE_AVX5124FMAPS__
|
||||
#endif /* __AVX5124FMAPS__ */
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124FMAPS__
|
||||
#undef __DISABLE_AVX5124FMAPS__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124FMAPS__ */
|
||||
|
||||
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */
|
@ -0,0 +1,132 @@
|
||||
/* Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
#define _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124VNNIW__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124vnniw")
|
||||
#define __DISABLE_AVX5124VNNIW__
|
||||
#endif /* __AVX5124VNNIW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124VNNIW__
|
||||
#undef __DISABLE_AVX5124VNNIW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124VNNIW__ */
|
||||
|
||||
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */
|
@ -0,0 +1,152 @@
|
||||
/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512BF16INTRIN_H_INCLUDED
|
||||
#define _AVX512BF16INTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512BF16__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bf16")
|
||||
#define __DISABLE_AVX512BF16__
|
||||
#endif /* __AVX512BF16__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
/* Convert One BF16 Data to One Single Float Data. */
|
||||
extern __inline float
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsbh_ss (__bf16 __A)
|
||||
{
|
||||
return __builtin_ia32_cvtbf2sf (__A);
|
||||
}
|
||||
|
||||
/* vcvtne2ps2bf16 */
|
||||
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A);
|
||||
}
|
||||
|
||||
/* vcvtneps2bf16 */
|
||||
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtneps_pbh (__m512 __A)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
|
||||
}
|
||||
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
|
||||
}
|
||||
|
||||
/* vdpbf16ps */
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtpbh_ps (__m256bh __A)
|
||||
{
|
||||
return (__m512)_mm512_castsi512_ps ((__m512i)_mm512_slli_epi32 (
|
||||
(__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
|
||||
{
|
||||
return (__m512)_mm512_castsi512_ps ((__m512i) _mm512_slli_epi32 (
|
||||
(__m512i)_mm512_maskz_cvtepi16_epi32 (
|
||||
(__mmask16)__U, (__m256i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
|
||||
{
|
||||
return (__m512)_mm512_castsi512_ps ((__m512i)(_mm512_mask_slli_epi32 (
|
||||
(__m512i)__S, (__mmask16)__U,
|
||||
(__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BF16__
|
||||
#undef __DISABLE_AVX512BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BF16__ */
|
||||
|
||||
#endif /* _AVX512BF16INTRIN_H_INCLUDED */
|
@ -0,0 +1,238 @@
|
||||
/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
|
||||
#define _AVX512BF16VLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bf16,avx512vl")
|
||||
#define __DISABLE_AVX512BF16VL__
|
||||
#endif /* __AVX512BF16__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef __bf16 __v16bf __attribute__ ((__vector_size__ (32)));
|
||||
typedef __bf16 __v8bf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef __bf16 __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||
typedef __bf16 __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
|
||||
typedef __bf16 __bfloat16;
|
||||
|
||||
#define _mm256_cvtneps_pbh(A) \
|
||||
(__m128bh) __builtin_ia32_cvtneps2bf16_v8sf (A)
|
||||
#define _mm_cvtneps_pbh(A) \
|
||||
(__m128bh) __builtin_ia32_cvtneps2bf16_v4sf (A)
|
||||
|
||||
/* vcvtne2ps2bf16 */
|
||||
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf(__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf_mask(__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16bf_maskz(__B, __C, __A);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf(__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf_mask(__C, __D, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8bf_maskz(__B, __C, __A);
|
||||
}
|
||||
|
||||
/* vcvtneps2bf16 */
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
|
||||
}
|
||||
|
||||
/* vdpbf16ps */
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
|
||||
extern __inline __bf16
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtness_sbh (float __A)
|
||||
{
|
||||
__v4sf __V = {__A, 0, 0, 0};
|
||||
__v8bf __R = __builtin_ia32_cvtneps2bf16_v4sf_mask ((__v4sf)__V,
|
||||
(__v8bf)_mm_undefined_si128 (), (__mmask8)-1);
|
||||
return __R[0];
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtpbh_ps (__m128bh __A)
|
||||
{
|
||||
return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
|
||||
(__m128i)_mm_cvtepi16_epi32 ((__m128i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtpbh_ps (__m128bh __A)
|
||||
{
|
||||
return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
|
||||
(__m256i)_mm256_cvtepi16_epi32 ((__m128i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
|
||||
{
|
||||
return (__m128)_mm_castsi128_ps ((__m128i)_mm_slli_epi32 (
|
||||
(__m128i)_mm_maskz_cvtepi16_epi32 (
|
||||
(__mmask8)__U, (__m128i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A)
|
||||
{
|
||||
return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_slli_epi32 (
|
||||
(__m256i)_mm256_maskz_cvtepi16_epi32 (
|
||||
(__mmask8)__U, (__m128i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtpbh_ps (__m128 __S, __mmask8 __U, __m128bh __A)
|
||||
{
|
||||
return (__m128)_mm_castsi128_ps ((__m128i)_mm_mask_slli_epi32 (
|
||||
(__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32 (
|
||||
(__m128i)__A), 16));
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A)
|
||||
{
|
||||
return (__m256)_mm256_castsi256_ps ((__m256i)_mm256_mask_slli_epi32 (
|
||||
(__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32 (
|
||||
(__m128i)__A), 16));
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BF16VL__
|
||||
#undef __DISABLE_AVX512BF16VL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BF16VL__ */
|
||||
|
||||
#endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
|
@ -0,0 +1,283 @@
|
||||
/* Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
|
||||
#define _AVX512BITALGINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512BITALG__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg")
|
||||
#define __DISABLE_AVX512BITALG__
|
||||
#endif /* __AVX512BITALG__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi8 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi16 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALG__
|
||||
#undef __DISABLE_AVX512BITALG__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALG__ */
|
||||
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512bw")
|
||||
#define __DISABLE_AVX512BITALGBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask64) __U);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||
(__v32hi) __W,
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||
(__v32hi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALGBW__
|
||||
#undef __DISABLE_AVX512BITALGBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
||||
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
|
||||
#define __DISABLE_AVX512BITALGVLBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALGVLBW__
|
||||
#undef __DISABLE_AVX512BITALGVLBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGVLBW__ */
|
||||
|
||||
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512vl")
|
||||
#define __DISABLE_AVX512BITALGVL__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi8 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi16 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi8 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi16 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||
(__v16hi) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||
(__v16hi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||
(__v8hi) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||
(__v8hi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BITALGVL__
|
||||
#undef __DISABLE_AVX512BITALGVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
||||
|
||||
#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,184 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512CDINTRIN_H_INCLUDED
|
||||
#define _AVX512CDINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512CD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512cd")
|
||||
#define __DISABLE_AVX512CD__
|
||||
#endif /* __AVX512CD__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512CD__
|
||||
#undef __DISABLE_AVX512CD__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512CD__ */
|
||||
|
||||
#endif /* _AVX512CDINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,536 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512ERINTRIN_H_INCLUDED
|
||||
#define _AVX512ERINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512ER__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512er")
|
||||
#define __DISABLE_AVX512ER__
|
||||
#endif /* __AVX512ER__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef double __v8df __attribute__ ((__vector_size__ (64)));
|
||||
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_undefined_pd (),
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_undefined_ps (),
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_undefined_pd (),
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_undefined_ps (),
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_undefined_pd (),
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_undefined_ps (),
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_exp2a23_round_pd(A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_exp2a23_round_ps(A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_pd(A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_ps(A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_pd(A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_ps(A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm_rcp28_round_sd(A, B, R) \
|
||||
__builtin_ia32_rcp28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
|
||||
__builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rcp28_round_sd(U, A, B, R) \
|
||||
__builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
|
||||
(U), (R))
|
||||
|
||||
#define _mm_rcp28_round_ss(A, B, R) \
|
||||
__builtin_ia32_rcp28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
|
||||
__builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rcp28_round_ss(U, A, B, R) \
|
||||
__builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
|
||||
(U), (R))
|
||||
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) \
|
||||
__builtin_ia32_rsqrt28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
|
||||
(U), (R))
|
||||
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) \
|
||||
__builtin_ia32_rsqrt28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
|
||||
__builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
|
||||
(U), (R))
|
||||
|
||||
#endif
|
||||
|
||||
#define _mm_mask_rcp28_sd(W, U, A, B)\
|
||||
_mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rcp28_sd(U, A, B)\
|
||||
_mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rcp28_ss(W, U, A, B)\
|
||||
_mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rcp28_ss(U, A, B)\
|
||||
_mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rsqrt28_sd(W, U, A, B)\
|
||||
_mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rsqrt28_sd(U, A, B)\
|
||||
_mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rsqrt28_ss(W, U, A, B)\
|
||||
_mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rsqrt28_ss(U, A, B)\
|
||||
_mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_pd(A) \
|
||||
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_pd(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_pd(U, A) \
|
||||
_mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_ps(A) \
|
||||
_mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_ps(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_ps(U, A) \
|
||||
_mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rcp28_pd(A) \
|
||||
_mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(W, U, A) \
|
||||
_mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_pd(U, A) \
|
||||
_mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rcp28_ps(A) \
|
||||
_mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(W, U, A) \
|
||||
_mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_ps(U, A) \
|
||||
_mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_pd(A) \
|
||||
_mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_pd(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_pd(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_ps(A) \
|
||||
_mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_ps(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_ps(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_sd(A, B) \
|
||||
__builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_ss(A, B) \
|
||||
__builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_sd(A, B) \
|
||||
__builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_ss(A, B) \
|
||||
__builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#ifdef __DISABLE_AVX512ER__
|
||||
#undef __DISABLE_AVX512ER__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512ER__ */
|
||||
|
||||
#endif /* _AVX512ERINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,104 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
|
||||
#define _AVX512IFMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512IFMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512ifma")
|
||||
#define __DISABLE_AVX512IFMA__
|
||||
#endif /* __AVX512IFMA__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512IFMA__
|
||||
#undef __DISABLE_AVX512IFMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512IFMA__ */
|
||||
|
||||
#endif /* _AVX512IFMAINTRIN_H_INCLUDED */
|
@ -0,0 +1,145 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||
#define _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512ifma,avx512vl")
|
||||
#define __DISABLE_AVX512IFMAVL__
|
||||
#endif /* __AVX512IFMAVL__ */
|
||||
|
||||
#define _mm_madd52lo_epu64(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) (A), \
|
||||
(__v2di) (B), \
|
||||
(__v2di) (C)))
|
||||
|
||||
#define _mm_madd52hi_epu64(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) (A), \
|
||||
(__v2di) (B), \
|
||||
(__v2di) (C)))
|
||||
|
||||
#define _mm256_madd52lo_epu64(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) (A), \
|
||||
(__v4di) (B), \
|
||||
(__v4di) (C)))
|
||||
|
||||
|
||||
#define _mm256_madd52hi_epu64(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) (A), \
|
||||
(__v4di) (B), \
|
||||
(__v4di) (C)))
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512IFMAVL__
|
||||
#undef __DISABLE_AVX512IFMAVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512IFMAVL__ */
|
||||
|
||||
#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */
|
@ -0,0 +1,269 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512PFINTRIN_H_INCLUDED
|
||||
#define _AVX512PFINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512PF__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512pf")
|
||||
#define __DISABLE_AVX512PF__
|
||||
#endif /* __AVX512PF__ */
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), \
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), \
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX),\
|
||||
(void const *) (ADDR), (int) (SCALE), \
|
||||
(int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX),\
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps ((__mmask16) (MASK), \
|
||||
(__v16si)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
|
||||
(void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AVX512PF__
|
||||
#undef __DISABLE_AVX512PF__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512PF__ */
|
||||
|
||||
#endif /* _AVX512PFINTRIN_H_INCLUDED */
|
@ -0,0 +1,557 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
|
||||
#define __AVX512VBMI2INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VBMI2__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi2")
|
||||
#define __DISABLE_AVX512VBMI2__
|
||||
#endif /* __AVX512VBMI2__ */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C,
|
||||
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B,
|
||||
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||
__E, (__v8di) __A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v16si ((__v16si)__A, (__v16si) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__C,
|
||||
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__B,
|
||||
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||
__E, (__v8di) __A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||
}
|
||||
#else
|
||||
#define _mm512_shrdi_epi16(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_shrdi_epi32(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \
|
||||
(__v16si)(__m512i)(D), \
|
||||
(int)(E), \
|
||||
(__v16si)(__m512i)(A), \
|
||||
(__mmask16)(B)))
|
||||
#define _mm512_maskz_shrdi_epi32(A, B, C, D) \
|
||||
((__m512i) \
|
||||
__builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \
|
||||
(__v16si)(__m512i)(C),(int)(D), \
|
||||
(__v16si)(__m512i)_mm512_setzero_si512 (), \
|
||||
(__mmask16)(A)))
|
||||
#define _mm512_shrdi_epi64(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \
|
||||
(__v8di)(__m512i)(D), (int)(E), \
|
||||
(__v8di)(__m512i)(A), \
|
||||
(__mmask8)(B)))
|
||||
#define _mm512_maskz_shrdi_epi64(A, B, C, D) \
|
||||
((__m512i) \
|
||||
__builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \
|
||||
(__v8di)(__m512i)(C),(int)(D), \
|
||||
(__v8di)(__m512i)_mm512_setzero_si512 (), \
|
||||
(__mmask8)(A)))
|
||||
#define _mm512_shldi_epi16(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_shldi_epi32(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shldi_epi32(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), \
|
||||
(__v16si)(__m512i)(D), \
|
||||
(int)(E), \
|
||||
(__v16si)(__m512i)(A), \
|
||||
(__mmask16)(B)))
|
||||
#define _mm512_maskz_shldi_epi32(A, B, C, D) \
|
||||
((__m512i) \
|
||||
__builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), \
|
||||
(__v16si)(__m512i)(C),(int)(D), \
|
||||
(__v16si)(__m512i)_mm512_setzero_si512 (), \
|
||||
(__mmask16)(A)))
|
||||
#define _mm512_shldi_epi64(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_shldi_epi64(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), \
|
||||
(__v8di)(__m512i)(D), (int)(E), \
|
||||
(__v8di)(__m512i)(A), \
|
||||
(__mmask8)(B)))
|
||||
#define _mm512_maskz_shldi_epi64(A, B, C, D) \
|
||||
((__m512i) \
|
||||
__builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), \
|
||||
(__v8di)(__m512i)(C),(int)(D), \
|
||||
(__v8di)(__m512i)_mm512_setzero_si512 (), \
|
||||
(__mmask8)(A)))
|
||||
#endif
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
(__v32hi) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B,
|
||||
(__v8di) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
(__v32hi) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B,
|
||||
(__v8di) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI2__
|
||||
#undef __DISABLE_AVX512VBMI2__
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI2__ */
|
||||
|
||||
#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi2,avx512bw")
|
||||
#define __DISABLE_AVX512VBMI2BW__
|
||||
#endif /* __AVX512VBMI2BW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__C,
|
||||
(__v64qi)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi8 (__mmask64 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__B,
|
||||
(__v64qi)_mm512_setzero_si512 (), (__mmask64)__A);
|
||||
}
|
||||
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi8 (void * __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
__builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__C,
|
||||
(__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi16 (__mmask32 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__B,
|
||||
(__v32hi)_mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi16 (void * __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
__builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C,
|
||||
(__mmask32) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __C,
|
||||
(__v64qi) __A,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi8 (__mmask64 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_maskz ((__v64qi) __B,
|
||||
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi8 (__m512i __A, __mmask64 __B, const void * __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *) __C,
|
||||
(__v64qi) __A, (__mmask64) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi8 (__mmask64 __A, const void * __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_maskz ((const __v64qi *) __B,
|
||||
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __C,
|
||||
(__v32hi) __A,
|
||||
(__mmask32) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi16 (__mmask32 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_maskz ((__v32hi) __B,
|
||||
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi16 (__m512i __A, __mmask32 __B, const void * __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *) __C,
|
||||
(__v32hi) __A, (__mmask32) __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_maskz ((const __v32hi *) __B,
|
||||
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C,
|
||||
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B,
|
||||
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__C,
|
||||
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__B,
|
||||
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \
|
||||
(__v32hi)(__m512i)(D), \
|
||||
(int)(E), \
|
||||
(__v32hi)(__m512i)(A), \
|
||||
(__mmask32)(B)))
|
||||
#define _mm512_maskz_shrdi_epi16(A, B, C, D) \
|
||||
((__m512i) \
|
||||
__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \
|
||||
(__v32hi)(__m512i)(C),(int)(D), \
|
||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), \
|
||||
(__mmask32)(A)))
|
||||
#define _mm512_mask_shldi_epi16(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), \
|
||||
(__v32hi)(__m512i)(D), \
|
||||
(int)(E), \
|
||||
(__v32hi)(__m512i)(A), \
|
||||
(__mmask32)(B)))
|
||||
#define _mm512_maskz_shldi_epi16(A, B, C, D) \
|
||||
((__m512i) \
|
||||
__builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), \
|
||||
(__v32hi)(__m512i)(C),(int)(D), \
|
||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), \
|
||||
(__mmask32)(A)))
|
||||
#endif
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask ((__v32hi)__A,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz ((__v32hi)__B,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask ((__v32hi)__A,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz ((__v32hi)__B,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI2BW__
|
||||
#undef __DISABLE_AVX512VBMI2BW__
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI2BW__ */
|
||||
|
||||
#endif /* __AVX512VBMI2INTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,158 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
|
||||
#define _AVX512VBMIINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512VBMI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi")
|
||||
#define __DISABLE_AVX512VBMI__
|
||||
#endif /* __AVX512VBMI__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi)
|
||||
_mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512(),
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
|
||||
(__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
|
||||
/* idx */ ,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI__
|
||||
#undef __DISABLE_AVX512VBMI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI__ */
|
||||
|
||||
#endif /* _AVX512VBMIINTRIN_H_INCLUDED */
|
@ -0,0 +1,273 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi,avx512vl")
|
||||
#define __DISABLE_AVX512VBMIVL__
|
||||
#endif /* __AVX512VBMIVL__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
|
||||
(__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
|
||||
/* idx */ ,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
|
||||
(__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
|
||||
/* idx */ ,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMIVL__
|
||||
#undef __DISABLE_AVX512VBMIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMIVL__ */
|
||||
|
||||
#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,144 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
|
||||
#define __AVX512VNNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vnni")
|
||||
#define __DISABLE_AVX512VNNI__
|
||||
#endif /* __AVX512VNNI__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VNNI__
|
||||
#undef __DISABLE_AVX512VNNI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VNNI__ */
|
||||
|
||||
#endif /* __AVX512VNNIINTRIN_H_INCLUDED */
|
@ -0,0 +1,210 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vnni,avx512vl")
|
||||
#define __DISABLE_AVX512VNNIVL__
|
||||
#endif /* __AVX512VNNIVL__ */
|
||||
|
||||
#define _mm256_dpbusd_epi32(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), \
|
||||
(__v8si) (B), \
|
||||
(__v8si) (C)))
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||
(__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm_dpbusd_epi32(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), \
|
||||
(__v4si) (B), \
|
||||
(__v4si) (C)))
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||
(__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm256_dpbusds_epi32(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), \
|
||||
(__v8si) (B), \
|
||||
(__v8si) (C)))
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm_dpbusds_epi32(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), \
|
||||
(__v4si) (B), \
|
||||
(__v4si) (C)))
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm256_dpwssd_epi32(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), \
|
||||
(__v8si) (B), \
|
||||
(__v8si) (C)))
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||
(__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm_dpwssd_epi32(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), \
|
||||
(__v4si) (B), \
|
||||
(__v4si) (C)))
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||
(__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm256_dpwssds_epi32(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), \
|
||||
(__v8si) (B), \
|
||||
(__v8si) (C)))
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#define _mm_dpwssds_epi32(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), \
|
||||
(__v4si) (B), \
|
||||
(__v4si) (C)))
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VNNIVL__
|
||||
#undef __DISABLE_AVX512VNNIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
@ -0,0 +1,58 @@
|
||||
/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VP2INTERSECT__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vp2intersect")
|
||||
#define __DISABLE_AVX512VP2INTERSECT__
|
||||
#endif /* __AVX512VP2INTERSECT__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_2intersect_epi32 (__m512i __A, __m512i __B, __mmask16 *__U,
|
||||
__mmask16 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd512 (__U, __M, (__v16si) __A, (__v16si) __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_2intersect_epi64 (__m512i __A, __m512i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq512 (__U, __M, (__v8di) __A, (__v8di) __B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VP2INTERSECT__
|
||||
#undef __DISABLE_AVX512VP2INTERSECT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VP2INTERSECT__ */
|
||||
|
||||
#endif /* _AVX512VP2INTERSECTINTRIN_H_INCLUDED */
|
@ -0,0 +1,72 @@
|
||||
/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vp2intersect,avx512vl")
|
||||
#define __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#endif /* __AVX512VP2INTERSECTVL__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_2intersect_epi32 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd128 (__U, __M, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_2intersect_epi32 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd256 (__U, __M, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_2intersect_epi64 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq128 (__U, __M, (__v2di) __A, (__v2di) __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_2intersect_epi64 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq256 (__U, __M, (__v4di) __A, (__v4di) __B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#undef __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VP2INTERSECTVL__ */
|
||||
|
||||
#endif /* _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED */
|
@ -0,0 +1,94 @@
|
||||
/* Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512VPOPCNTDQ__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vpopcntdq")
|
||||
#define __DISABLE_AVX512VPOPCNTDQ__
|
||||
#endif /* __AVX512VPOPCNTDQ__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VPOPCNTDQ__
|
||||
#undef __DISABLE_AVX512VPOPCNTDQ__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VPOPCNTDQ__ */
|
||||
|
||||
#endif /* _AVX512VPOPCNTDQINTRIN_H_INCLUDED */
|
@ -0,0 +1,146 @@
|
||||
/* Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vpopcntdq,avx512vl")
|
||||
#define __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#endif /* __AVX512VPOPCNTDQVL__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi32 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi32 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||
(__v4si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||
(__v4si)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi32 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi32 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||
(__v8si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi64 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||
(__v2di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||
(__v2di)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi64 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||
(__v4di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||
(__v4di)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#undef __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
|
||||
|
||||
#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
|
@ -0,0 +1,78 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVXIFMAINTRIN_H_INCLUDED
|
||||
#define _AVXIFMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVXIFMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avxifma")
|
||||
#define __DISABLE_AVXIFMA__
|
||||
#endif /* __AVXIFMA__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128 ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128 ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256 ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256 ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVXIFMA__
|
||||
#undef __DISABLE_AVXIFMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVXIFMA__ */
|
||||
|
||||
#endif /* _AVXIFMAINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,140 @@
|
||||
/* Copyright (C) 2021-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxneconvertintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVXNECONVERTINTRIN_H_INCLUDED
|
||||
#define _AVXNECONVERTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVXNECONVERT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("avxneconvert")
|
||||
#define __DISABLE_AVXNECONVERT__
|
||||
#endif /* __AVXNECONVERT__ */
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_bcstnebf16_ps (const void *__P)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vbcstnebf162ps128 ((const __bf16 *) __P);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_bcstnebf16_ps (const void *__P)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vbcstnebf162ps256 ((const __bf16 *) __P);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_bcstnesh_ps (const void *__P)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vbcstnesh2ps128 ((const _Float16 *) __P);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_bcstnesh_ps (const void *__P)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vbcstnesh2ps256 ((const _Float16 *) __P);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneebf16_ps (const __m128bh *__A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtneebf162ps128 ((const __v8bf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneebf16_ps (const __m256bh *__A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtneebf162ps256 ((const __v16bf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneeph_ps (const __m128h *__A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtneeph2ps128 ((const __v8hf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneeph_ps (const __m256h *__A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtneeph2ps256 ((const __v16hf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneobf16_ps (const __m128bh *__A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtneobf162ps128 ((const __v8bf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneobf16_ps (const __m256bh *__A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtneobf162ps256 ((const __v16bf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneoph_ps (const __m128h *__A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtneoph2ps128 ((const __v8hf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneoph_ps (const __m256h *__A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtneoph2ps256 ((const __v16hf *) __A);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneps_avx_pbh (__m128 __A)
|
||||
{
|
||||
return (__m128bh) __builtin_ia32_cvtneps2bf16_v4sf (__A);
|
||||
}
|
||||
|
||||
extern __inline __m128bh
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneps_avx_pbh (__m256 __A)
|
||||
{
|
||||
return (__m128bh) __builtin_ia32_cvtneps2bf16_v8sf (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVXNECONVERT__
|
||||
#undef __DISABLE_AVXNECONVERT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVXNECONVERT__ */
|
||||
|
||||
#endif /* _AVXNECONVERTINTRIN_H_INCLUDED */
|
@ -0,0 +1,138 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED
|
||||
#define _AVXVNNIINT8INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVXVNNIINT8__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avxvnniint8")
|
||||
#define __DISABLE_AVXVNNIINT8__
|
||||
#endif /* __AVXVNNIINT8__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVXVNNIINT8__
|
||||
#undef __DISABLE_AVXVNNIINT8__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVXVNNIINT8__ */
|
||||
|
||||
#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */
|
@ -0,0 +1,113 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVXVNNIINTRIN_H_INCLUDED
|
||||
#define _AVXVNNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVXVNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avxvnni")
|
||||
#define __DISABLE_AVXVNNIVL__
|
||||
#endif /* __AVXVNNIVL__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVXVNNIVL__
|
||||
#undef __DISABLE_AVXVNNIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVXVNNIVL__ */
|
||||
#endif /* _AVXVNNIINTRIN_H_INCLUDED */
|
@ -0,0 +1,109 @@
|
||||
/* Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _BMI2INTRIN_H_INCLUDED
|
||||
#define _BMI2INTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __BMI2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("bmi2")
|
||||
#define __DISABLE_BMI2__
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_si (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_si (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_si (__X, __Y);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_di (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_di (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_di (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
|
||||
unsigned long long *__P)
|
||||
{
|
||||
unsigned __int128 __res = (unsigned __int128) __X * __Y;
|
||||
*__P = (unsigned long long) (__res >> 64);
|
||||
return (unsigned long long) __res;
|
||||
}
|
||||
|
||||
#else /* !__x86_64__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
unsigned long long __res = (unsigned long long) __X * __Y;
|
||||
*__P = (unsigned int) (__res >> 32);
|
||||
return (unsigned int) __res;
|
||||
}
|
||||
|
||||
#endif /* !__x86_64__ */
|
||||
|
||||
#ifdef __DISABLE_BMI2__
|
||||
#undef __DISABLE_BMI2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_BMI2__ */
|
||||
|
||||
#endif /* _BMI2INTRIN_H_INCLUDED */
|
@ -0,0 +1,202 @@
|
||||
/* Copyright (C) 2010-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _BMIINTRIN_H_INCLUDED
|
||||
#define _BMIINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __BMI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("bmi")
|
||||
#define __DISABLE_BMI__
|
||||
#endif /* __BMI__ */
|
||||
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_andn_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __andn_u32 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u32 (unsigned int __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsi_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u32 (unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsmsk_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u32 (unsigned int __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsr_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __andn_u64 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsi_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsmsk_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsr_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifdef __DISABLE_BMI__
|
||||
#undef __DISABLE_BMI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_BMI__ */
|
||||
|
||||
#endif /* _BMIINTRIN_H_INCLUDED */
|
@ -0,0 +1,29 @@
|
||||
/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _BMMINTRIN_H_INCLUDED
|
||||
#define _BMMINTRIN_H_INCLUDED
|
||||
|
||||
# error "SSE5 instruction set removed from compiler"
|
||||
|
||||
#endif /* _BMMINTRIN_H_INCLUDED */
|
@ -0,0 +1,93 @@
|
||||
/* ELF program property for Intel CET.
|
||||
Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation; either version 3, or (at your option) any
|
||||
later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* Add x86 feature with IBT and/or SHSTK bits to ELF program property
|
||||
if they are enabled. Otherwise, contents in this header file are
|
||||
unused. Define _CET_ENDBR for assembly codes. _CET_ENDBR should be
|
||||
placed unconditionally at the entrance of a function whose address
|
||||
may be taken. */
|
||||
|
||||
#ifndef _CET_H_INCLUDED
|
||||
#define _CET_H_INCLUDED
|
||||
|
||||
#ifdef __ASSEMBLER__
|
||||
|
||||
# if defined __CET__ && (__CET__ & 1) != 0
|
||||
# ifdef __x86_64__
|
||||
# define _CET_ENDBR endbr64
|
||||
# else
|
||||
# define _CET_ENDBR endbr32
|
||||
# endif
|
||||
# else
|
||||
# define _CET_ENDBR
|
||||
# endif
|
||||
|
||||
# ifdef __ELF__
|
||||
# ifdef __CET__
|
||||
# if (__CET__ & 1) != 0
|
||||
/* GNU_PROPERTY_X86_FEATURE_1_IBT. */
|
||||
# define __PROPERTY_IBT 0x1
|
||||
# else
|
||||
# define __PROPERTY_IBT 0x0
|
||||
# endif
|
||||
|
||||
# if (__CET__ & 2) != 0
|
||||
/* GNU_PROPERTY_X86_FEATURE_1_SHSTK. */
|
||||
# define __PROPERTY_SHSTK 0x2
|
||||
# else
|
||||
# define __PROPERTY_SHSTK 0x0
|
||||
# endif
|
||||
|
||||
# define __PROPERTY_BITS (__PROPERTY_IBT | __PROPERTY_SHSTK)
|
||||
|
||||
# ifdef __LP64__
|
||||
# define __PROPERTY_ALIGN 3
|
||||
# else
|
||||
# define __PROPERTY_ALIGN 2
|
||||
# endif
|
||||
|
||||
.pushsection ".note.gnu.property", "a"
|
||||
.p2align __PROPERTY_ALIGN
|
||||
.long 1f - 0f /* name length. */
|
||||
.long 4f - 1f /* data length. */
|
||||
/* NT_GNU_PROPERTY_TYPE_0. */
|
||||
.long 5 /* note type. */
|
||||
0:
|
||||
.asciz "GNU" /* vendor name. */
|
||||
1:
|
||||
.p2align __PROPERTY_ALIGN
|
||||
/* GNU_PROPERTY_X86_FEATURE_1_AND. */
|
||||
.long 0xc0000002 /* pr_type. */
|
||||
.long 3f - 2f /* pr_datasz. */
|
||||
2:
|
||||
/* GNU_PROPERTY_X86_FEATURE_1_XXX. */
|
||||
.long __PROPERTY_BITS
|
||||
3:
|
||||
.p2align __PROPERTY_ALIGN
|
||||
4:
|
||||
.popsection
|
||||
# endif /* __CET__ */
|
||||
# endif /* __ELF__ */
|
||||
#endif /* __ASSEMBLER__ */
|
||||
|
||||
#endif /* _CET_H_INCLUDED */
|
@ -0,0 +1,129 @@
|
||||
/* Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CETINTRIN_H_INCLUDED
|
||||
#define _CETINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SHSTK__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("shstk")
|
||||
#define __DISABLE_SHSTK__
|
||||
#endif /* __SHSTK__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp (void)
|
||||
{
|
||||
return __builtin_ia32_rdsspq ();
|
||||
}
|
||||
#else
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp (void)
|
||||
{
|
||||
return __builtin_ia32_rdsspd ();
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_inc_ssp (unsigned int __B)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
__builtin_ia32_incsspq ((unsigned long long) __B);
|
||||
#else
|
||||
__builtin_ia32_incsspd (__B);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_saveprevssp (void)
|
||||
{
|
||||
__builtin_ia32_saveprevssp ();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rstorssp (void *__B)
|
||||
{
|
||||
__builtin_ia32_rstorssp (__B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssd (unsigned int __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrssd (__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssq (unsigned long long __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrssq (__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussd (unsigned int __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrussd (__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussq (unsigned long long __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrussq (__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_setssbsy (void)
|
||||
{
|
||||
__builtin_ia32_setssbsy ();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_clrssbsy (void *__B)
|
||||
{
|
||||
__builtin_ia32_clrssbsy (__B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SHSTK__
|
||||
#undef __DISABLE_SHSTK__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SHSTK__ */
|
||||
|
||||
#endif /* _CETINTRIN_H_INCLUDED. */
|
@ -0,0 +1,47 @@
|
||||
/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLDEMOTE_H_INCLUDED
|
||||
#define _CLDEMOTE_H_INCLUDED
|
||||
|
||||
#ifndef __CLDEMOTE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("cldemote")
|
||||
#define __DISABLE_CLDEMOTE__
|
||||
#endif /* __CLDEMOTE__ */
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cldemote (void *__A)
|
||||
{
|
||||
__builtin_ia32_cldemote (__A);
|
||||
}
|
||||
#ifdef __DISABLE_CLDEMOTE__
|
||||
#undef __DISABLE_CLDEMOTE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLDEMOTE__ */
|
||||
|
||||
#endif /* _CLDEMOTE_H_INCLUDED */
|
@ -0,0 +1,49 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||
#define _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLFLUSHOPT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clflushopt")
|
||||
#define __DISABLE_CLFLUSHOPT__
|
||||
#endif /* __CLFLUSHOPT__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clflushopt (void *__A)
|
||||
{
|
||||
__builtin_ia32_clflushopt (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLFLUSHOPT__
|
||||
#undef __DISABLE_CLFLUSHOPT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLFLUSHOPT__ */
|
||||
|
||||
#endif /* _CLFLUSHOPTINTRIN_H_INCLUDED */
|
@ -0,0 +1,49 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLWBINTRIN_H_INCLUDED
|
||||
#define _CLWBINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLWB__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clwb")
|
||||
#define __DISABLE_CLWB__
|
||||
#endif /* __CLWB__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clwb (void *__A)
|
||||
{
|
||||
__builtin_ia32_clwb (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLWB__
|
||||
#undef __DISABLE_CLWB__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLWB__ */
|
||||
|
||||
#endif /* _CLWBINTRIN_H_INCLUDED */
|
@ -0,0 +1,44 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _CLZEROINTRIN_H_INCLUDED
|
||||
#define _CLZEROINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLZERO__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clzero")
|
||||
#define __DISABLE_CLZERO__
|
||||
#endif /* __CLZERO__ */
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clzero (void * __I)
|
||||
{
|
||||
__builtin_ia32_clzero (__I);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLZERO__
|
||||
#undef __DISABLE_CLZERO__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLZERO__ */
|
||||
|
||||
#endif /* _CLZEROINTRIN_H_INCLUDED */
|
@ -0,0 +1,89 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
#error "Never use <cmpccxaddintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CMPCCXADDINTRIN_H_INCLUDED
|
||||
#define _CMPCCXADDINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#ifndef __CMPCCXADD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("cmpccxadd")
|
||||
#define __DISABLE_CMPCCXADD__
|
||||
#endif /* __CMPCCXADD__ */
|
||||
|
||||
typedef enum {
|
||||
_CMPCCX_O, /* Overflow. */
|
||||
_CMPCCX_NO, /* No overflow. */
|
||||
_CMPCCX_B, /* Below. */
|
||||
_CMPCCX_NB, /* Not below. */
|
||||
_CMPCCX_Z, /* Zero. */
|
||||
_CMPCCX_NZ, /* Not zero. */
|
||||
_CMPCCX_BE, /* Below or equal. */
|
||||
_CMPCCX_NBE, /* Neither below nor equal. */
|
||||
_CMPCCX_S, /* Sign. */
|
||||
_CMPCCX_NS, /* No sign. */
|
||||
_CMPCCX_P, /* Parity. */
|
||||
_CMPCCX_NP, /* No parity. */
|
||||
_CMPCCX_L, /* Less. */
|
||||
_CMPCCX_NL, /* Not less. */
|
||||
_CMPCCX_LE, /* Less or equal. */
|
||||
_CMPCCX_NLE, /* Neither less nor equal. */
|
||||
} _CMPCCX_ENUM;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cmpccxadd_epi32 (int *__A, int __B, int __C, const _CMPCCX_ENUM __D)
|
||||
{
|
||||
return __builtin_ia32_cmpccxadd (__A, __B, __C, __D);
|
||||
}
|
||||
|
||||
extern __inline long long
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cmpccxadd_epi64 (long long *__A, long long __B, long long __C,
|
||||
const _CMPCCX_ENUM __D)
|
||||
{
|
||||
return __builtin_ia32_cmpccxadd64 (__A, __B, __C, __D);
|
||||
}
|
||||
#else
|
||||
#define _cmpccxadd_epi32(A,B,C,D) \
|
||||
__builtin_ia32_cmpccxadd ((int *) (A), (int) (B), (int) (C), \
|
||||
(_CMPCCX_ENUM) (D))
|
||||
#define _cmpccxadd_epi64(A,B,C,D) \
|
||||
__builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \
|
||||
(long long) (C), (_CMPCCX_ENUM) (D))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_CMPCCXADD__
|
||||
#undef __DISABLE_CMPCCXADD__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CMPCCXADD__ */
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _CMPCCXADDINTRIN_H_INCLUDED */
|
@ -0,0 +1,348 @@
|
||||
/*
|
||||
* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 3, or (at your option) any
|
||||
* later version.
|
||||
*
|
||||
* This file is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Under Section 7 of GPL version 3, you are granted additional
|
||||
* permissions described in the GCC Runtime Library Exception, version
|
||||
* 3.1, as published by the Free Software Foundation.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License and
|
||||
* a copy of the GCC Runtime Library Exception along with this program;
|
||||
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
* <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _CPUID_H_INCLUDED
|
||||
#define _CPUID_H_INCLUDED
|
||||
|
||||
/* %eax */
|
||||
#define bit_RAOINT (1 << 3)
|
||||
#define bit_AVXVNNI (1 << 4)
|
||||
#define bit_AVX512BF16 (1 << 5)
|
||||
#define bit_CMPCCXADD (1 << 7)
|
||||
#define bit_AMX_FP16 (1 << 21)
|
||||
#define bit_HRESET (1 << 22)
|
||||
#define bit_AVXIFMA (1 << 23)
|
||||
|
||||
/* %ecx */
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_PCLMUL (1 << 1)
|
||||
#define bit_LZCNT (1 << 5)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_FMA (1 << 12)
|
||||
#define bit_CMPXCHG16B (1 << 13)
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
#define bit_SSE4_2 (1 << 20)
|
||||
#define bit_MOVBE (1 << 22)
|
||||
#define bit_POPCNT (1 << 23)
|
||||
#define bit_AES (1 << 25)
|
||||
#define bit_XSAVE (1 << 26)
|
||||
#define bit_OSXSAVE (1 << 27)
|
||||
#define bit_AVX (1 << 28)
|
||||
#define bit_F16C (1 << 29)
|
||||
#define bit_RDRND (1 << 30)
|
||||
|
||||
/* %edx */
|
||||
#define bit_AVXVNNIINT8 (1 << 4)
|
||||
#define bit_AVXNECONVERT (1 << 5)
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
#define bit_PREFETCHI (1 << 14)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
#define bit_SSE (1 << 25)
|
||||
#define bit_SSE2 (1 << 26)
|
||||
|
||||
/* Extended Features (%eax == 0x80000001) */
|
||||
/* %ecx */
|
||||
#define bit_LAHF_LM (1 << 0)
|
||||
#define bit_ABM (1 << 5)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
#define bit_PRFCHW (1 << 8)
|
||||
#define bit_XOP (1 << 11)
|
||||
#define bit_LWP (1 << 15)
|
||||
#define bit_FMA4 (1 << 16)
|
||||
#define bit_TBM (1 << 21)
|
||||
#define bit_MWAITX (1 << 29)
|
||||
|
||||
/* %edx */
|
||||
#define bit_MMXEXT (1 << 22)
|
||||
#define bit_LM (1 << 29)
|
||||
#define bit_3DNOWP (1 << 30)
|
||||
#define bit_3DNOW (1u << 31)
|
||||
|
||||
/* %ebx */
|
||||
#define bit_CLZERO (1 << 0)
|
||||
#define bit_WBNOINVD (1 << 9)
|
||||
|
||||
/* Extended Features (%eax == 7) */
|
||||
/* %ebx */
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_SGX (1 << 2)
|
||||
#define bit_BMI (1 << 3)
|
||||
#define bit_HLE (1 << 4)
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#define bit_BMI2 (1 << 8)
|
||||
#define bit_RTM (1 << 11)
|
||||
#define bit_AVX512F (1 << 16)
|
||||
#define bit_AVX512DQ (1 << 17)
|
||||
#define bit_RDSEED (1 << 18)
|
||||
#define bit_ADX (1 << 19)
|
||||
#define bit_AVX512IFMA (1 << 21)
|
||||
#define bit_CLFLUSHOPT (1 << 23)
|
||||
#define bit_CLWB (1 << 24)
|
||||
#define bit_AVX512PF (1 << 26)
|
||||
#define bit_AVX512ER (1 << 27)
|
||||
#define bit_AVX512CD (1 << 28)
|
||||
#define bit_SHA (1 << 29)
|
||||
#define bit_AVX512BW (1 << 30)
|
||||
#define bit_AVX512VL (1u << 31)
|
||||
|
||||
/* %ecx */
|
||||
#define bit_PREFETCHWT1 (1 << 0)
|
||||
#define bit_AVX512VBMI (1 << 1)
|
||||
#define bit_PKU (1 << 3)
|
||||
#define bit_OSPKE (1 << 4)
|
||||
#define bit_WAITPKG (1 << 5)
|
||||
#define bit_AVX512VBMI2 (1 << 6)
|
||||
#define bit_SHSTK (1 << 7)
|
||||
#define bit_GFNI (1 << 8)
|
||||
#define bit_VAES (1 << 9)
|
||||
#define bit_AVX512VNNI (1 << 11)
|
||||
#define bit_VPCLMULQDQ (1 << 10)
|
||||
#define bit_AVX512BITALG (1 << 12)
|
||||
#define bit_AVX512VPOPCNTDQ (1 << 14)
|
||||
#define bit_RDPID (1 << 22)
|
||||
#define bit_MOVDIRI (1 << 27)
|
||||
#define bit_MOVDIR64B (1 << 28)
|
||||
#define bit_ENQCMD (1 << 29)
|
||||
#define bit_CLDEMOTE (1 << 25)
|
||||
#define bit_KL (1 << 23)
|
||||
|
||||
/* %edx */
|
||||
#define bit_AVX5124VNNIW (1 << 2)
|
||||
#define bit_AVX5124FMAPS (1 << 3)
|
||||
#define bit_AVX512VP2INTERSECT (1 << 8)
|
||||
#define bit_AVX512FP16 (1 << 23)
|
||||
#define bit_IBT (1 << 20)
|
||||
#define bit_UINTR (1 << 5)
|
||||
#define bit_PCONFIG (1 << 18)
|
||||
#define bit_SERIALIZE (1 << 14)
|
||||
#define bit_TSXLDTRK (1 << 16)
|
||||
#define bit_AMX_BF16 (1 << 22)
|
||||
#define bit_AMX_TILE (1 << 24)
|
||||
#define bit_AMX_INT8 (1 << 25)
|
||||
#define bit_AMX_COMPLEX (1 << 8)
|
||||
|
||||
/* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */
|
||||
#define bit_XSAVEOPT (1 << 0)
|
||||
#define bit_XSAVEC (1 << 1)
|
||||
#define bit_XSAVES (1 << 3)
|
||||
|
||||
/* PT sub leaf (%eax == 0x14, %ecx == 0) */
|
||||
/* %ebx */
|
||||
#define bit_PTWRITE (1 << 4)
|
||||
|
||||
/* Keylocker leaf (%eax == 0x19) */
|
||||
/* %ebx */
|
||||
#define bit_AESKLE ( 1<<0 )
|
||||
#define bit_WIDEKL ( 1<<2 )
|
||||
|
||||
|
||||
/* Signatures for different CPU implementations as returned in uses
|
||||
of cpuid with level 0. */
|
||||
#define signature_AMD_ebx 0x68747541
|
||||
#define signature_AMD_ecx 0x444d4163
|
||||
#define signature_AMD_edx 0x69746e65
|
||||
|
||||
#define signature_CENTAUR_ebx 0x746e6543
|
||||
#define signature_CENTAUR_ecx 0x736c7561
|
||||
#define signature_CENTAUR_edx 0x48727561
|
||||
|
||||
#define signature_CYRIX_ebx 0x69727943
|
||||
#define signature_CYRIX_ecx 0x64616574
|
||||
#define signature_CYRIX_edx 0x736e4978
|
||||
|
||||
#define signature_INTEL_ebx 0x756e6547
|
||||
#define signature_INTEL_ecx 0x6c65746e
|
||||
#define signature_INTEL_edx 0x49656e69
|
||||
|
||||
#define signature_TM1_ebx 0x6e617254
|
||||
#define signature_TM1_ecx 0x55504361
|
||||
#define signature_TM1_edx 0x74656d73
|
||||
|
||||
#define signature_TM2_ebx 0x756e6547
|
||||
#define signature_TM2_ecx 0x3638784d
|
||||
#define signature_TM2_edx 0x54656e69
|
||||
|
||||
#define signature_NSC_ebx 0x646f6547
|
||||
#define signature_NSC_ecx 0x43534e20
|
||||
#define signature_NSC_edx 0x79622065
|
||||
|
||||
#define signature_NEXGEN_ebx 0x4778654e
|
||||
#define signature_NEXGEN_ecx 0x6e657669
|
||||
#define signature_NEXGEN_edx 0x72446e65
|
||||
|
||||
#define signature_RISE_ebx 0x65736952
|
||||
#define signature_RISE_ecx 0x65736952
|
||||
#define signature_RISE_edx 0x65736952
|
||||
|
||||
#define signature_SIS_ebx 0x20536953
|
||||
#define signature_SIS_ecx 0x20536953
|
||||
#define signature_SIS_edx 0x20536953
|
||||
|
||||
#define signature_UMC_ebx 0x20434d55
|
||||
#define signature_UMC_ecx 0x20434d55
|
||||
#define signature_UMC_edx 0x20434d55
|
||||
|
||||
#define signature_VIA_ebx 0x20414956
|
||||
#define signature_VIA_ecx 0x20414956
|
||||
#define signature_VIA_edx 0x20414956
|
||||
|
||||
#define signature_VORTEX_ebx 0x74726f56
|
||||
#define signature_VORTEX_ecx 0x436f5320
|
||||
#define signature_VORTEX_edx 0x36387865
|
||||
|
||||
#define signature_SHANGHAI_ebx 0x68532020
|
||||
#define signature_SHANGHAI_ecx 0x20206961
|
||||
#define signature_SHANGHAI_edx 0x68676e61
|
||||
|
||||
#ifndef __x86_64__
|
||||
/* At least one cpu (Winchip 2) does not set %ebx and %ecx
|
||||
for cpuid leaf 1. Forcibly zero the two registers before
|
||||
calling cpuid as a precaution. */
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
do { \
|
||||
if (__builtin_constant_p (level) && (level) != 1) \
|
||||
__asm__ __volatile__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "1" (0), "2" (0)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ __volatile__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
#endif
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ __volatile__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
|
||||
|
||||
/* Return highest supported input value for cpuid instruction. ext can
|
||||
be either 0x0 or 0x80000000 to return highest supported value for
|
||||
basic or extended cpuid information. Function returns 0 if cpuid
|
||||
is not supported or whatever cpuid returns in eax register. If sig
|
||||
pointer is non-null, then first four bytes of the signature
|
||||
(as found in ebx register) are returned in location pointed by sig. */
|
||||
|
||||
static __inline unsigned int
|
||||
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
|
||||
#ifndef __x86_64__
|
||||
/* See if we can use cpuid. On AMD64 we always can. */
|
||||
#if __GNUC__ >= 3
|
||||
__asm__ ("pushf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"mov{l}\t{%0, %1|%1, %0}\n\t"
|
||||
"xor{l}\t{%2, %0|%0, %2}\n\t"
|
||||
"push{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#else
|
||||
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
|
||||
nor alternatives in i386 code. */
|
||||
__asm__ ("pushfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"movl\t%0, %1\n\t"
|
||||
"xorl\t%2, %0\n\t"
|
||||
"pushl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#endif
|
||||
|
||||
if (!((__eax ^ __ebx) & 0x00200000))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
/* Host supports cpuid. Return highest supported cpuid input value. */
|
||||
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
||||
|
||||
if (__sig)
|
||||
*__sig = __ebx;
|
||||
|
||||
return __eax;
|
||||
}
|
||||
|
||||
/* Return cpuid data for requested cpuid leaf, as found in returned
|
||||
eax, ebx, ecx and edx registers. The function checks if cpuid is
|
||||
supported and returns 1 for valid cpuid information or 0 for
|
||||
unsupported cpuid leaf. All pointers are required to be non-null. */
|
||||
|
||||
static __inline int
|
||||
__get_cpuid (unsigned int __leaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||
return 0;
|
||||
|
||||
__cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Same as above, but sub-leaf can be specified. */
|
||||
|
||||
static __inline int
|
||||
__get_cpuid_count (unsigned int __leaf, unsigned int __subleaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||
return 0;
|
||||
|
||||
__cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static __inline void
|
||||
__cpuidex (int __cpuid_info[4], int __leaf, int __subleaf)
|
||||
{
|
||||
__cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
|
||||
__cpuid_info[2], __cpuid_info[3]);
|
||||
}
|
||||
|
||||
#endif /* _CPUID_H_INCLUDED */
|
@ -0,0 +1,72 @@
|
||||
/* Copyright (C) 2002-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef __CROSS_STDARG_H_INCLUDED
|
||||
#define __CROSS_STDARG_H_INCLUDED
|
||||
|
||||
/* Make sure that for non x64 targets cross builtins are defined. */
|
||||
#ifndef __x86_64__
|
||||
/* Call abi ms_abi. */
|
||||
#define __builtin_ms_va_list __builtin_va_list
|
||||
#define __builtin_ms_va_copy __builtin_va_copy
|
||||
#define __builtin_ms_va_start __builtin_va_start
|
||||
#define __builtin_ms_va_end __builtin_va_end
|
||||
|
||||
/* Call abi sysv_abi. */
|
||||
#define __builtin_sysv_va_list __builtin_va_list
|
||||
#define __builtin_sysv_va_copy __builtin_va_copy
|
||||
#define __builtin_sysv_va_start __builtin_va_start
|
||||
#define __builtin_sysv_va_end __builtin_va_end
|
||||
#endif
|
||||
|
||||
#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
|
||||
#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
|
||||
#define __ms_va_arg(__v,__l) __builtin_va_arg(__v,__l)
|
||||
#define __ms_va_end(__v) __builtin_ms_va_end(__v)
|
||||
|
||||
#define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s)
|
||||
#define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l)
|
||||
#define __sysv_va_arg(__v,__l) __builtin_va_arg(__v,__l)
|
||||
#define __sysv_va_end(__v) __builtin_sysv_va_end(__v)
|
||||
|
||||
#ifndef __GNUC_SYSV_VA_LIST
|
||||
#define __GNUC_SYSV_VA_LIST
|
||||
typedef __builtin_sysv_va_list __gnuc_sysv_va_list;
|
||||
#endif
|
||||
|
||||
#ifndef _SYSV_VA_LIST_DEFINED
|
||||
#define _SYSV_VA_LIST_DEFINED
|
||||
typedef __gnuc_sysv_va_list sysv_va_list;
|
||||
#endif
|
||||
|
||||
#ifndef __GNUC_MS_VA_LIST
|
||||
#define __GNUC_MS_VA_LIST
|
||||
typedef __builtin_ms_va_list __gnuc_ms_va_list;
|
||||
#endif
|
||||
|
||||
#ifndef _MS_VA_LIST_DEFINED
|
||||
#define _MS_VA_LIST_DEFINED
|
||||
typedef __gnuc_ms_va_list ms_va_list;
|
||||
#endif
|
||||
|
||||
#endif /* __CROSS_STDARG_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,55 @@
|
||||
/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _ENQCMDINTRIN_H_INCLUDED
|
||||
#define _ENQCMDINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __ENQCMD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("enqcmd")
|
||||
#define __DISABLE_ENQCMD__
|
||||
#endif /* __ENQCMD__ */
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enqcmd (void * __P, const void * __Q)
|
||||
{
|
||||
return __builtin_ia32_enqcmd (__P, __Q);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enqcmds (void * __P, const void * __Q)
|
||||
{
|
||||
return __builtin_ia32_enqcmds (__P, __Q);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_ENQCMD__
|
||||
#undef __DISABLE_ENQCMD__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_ENQCMD__ */
|
||||
#endif /* _ENQCMDINTRIN_H_INCLUDED. */
|
@ -0,0 +1,98 @@
|
||||
/* Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _F16CINTRIN_H_INCLUDED
|
||||
#define _F16CINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __F16C__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("f16c")
|
||||
#define __DISABLE_F16C__
|
||||
#endif /* __F16C__ */
|
||||
|
||||
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtsh_ss (unsigned short __S)
|
||||
{
|
||||
__v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
|
||||
__v4sf __A = __builtin_ia32_vcvtph2ps (__H);
|
||||
return __builtin_ia32_vec_ext_v4sf (__A, 0);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtph_ps (__m128i __A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtph_ps (__m128i __A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtss_sh (float __F, const int __I)
|
||||
{
|
||||
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
|
||||
return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtps_ph (__m128 __A, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtps_ph (__m256 __A, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
|
||||
}
|
||||
#else
|
||||
#define _cvtss_sh(__F, __I) \
|
||||
(__extension__ \
|
||||
({ \
|
||||
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; \
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); \
|
||||
(unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); \
|
||||
}))
|
||||
|
||||
#define _mm_cvtps_ph(A, I) \
|
||||
((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) (A), (int) (I)))
|
||||
|
||||
#define _mm256_cvtps_ph(A, I) \
|
||||
((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) (A), (int) (I)))
|
||||
#endif /* __OPTIMIZE */
|
||||
|
||||
#ifdef __DISABLE_F16C__
|
||||
#undef __DISABLE_F16C__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_F16C__ */
|
||||
|
||||
#endif /* _F16CINTRIN_H_INCLUDED */
|
@ -0,0 +1,631 @@
|
||||
/* Copyright (C) 2002-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
* ISO C Standard: 5.2.4.2.2 Characteristics of floating types <float.h>
|
||||
*/
|
||||
|
||||
#ifndef _FLOAT_H___
|
||||
#define _FLOAT_H___
|
||||
|
||||
/* Radix of exponent representation, b. */
|
||||
#undef FLT_RADIX
|
||||
#define FLT_RADIX __FLT_RADIX__
|
||||
|
||||
/* Number of base-FLT_RADIX digits in the significand, p. */
|
||||
#undef FLT_MANT_DIG
|
||||
#undef DBL_MANT_DIG
|
||||
#undef LDBL_MANT_DIG
|
||||
#define FLT_MANT_DIG __FLT_MANT_DIG__
|
||||
#define DBL_MANT_DIG __DBL_MANT_DIG__
|
||||
#define LDBL_MANT_DIG __LDBL_MANT_DIG__
|
||||
|
||||
/* Number of decimal digits, q, such that any floating-point number with q
|
||||
decimal digits can be rounded into a floating-point number with p radix b
|
||||
digits and back again without change to the q decimal digits,
|
||||
|
||||
p * log10(b) if b is a power of 10
|
||||
floor((p - 1) * log10(b)) otherwise
|
||||
*/
|
||||
#undef FLT_DIG
|
||||
#undef DBL_DIG
|
||||
#undef LDBL_DIG
|
||||
#define FLT_DIG __FLT_DIG__
|
||||
#define DBL_DIG __DBL_DIG__
|
||||
#define LDBL_DIG __LDBL_DIG__
|
||||
|
||||
/* Minimum int x such that FLT_RADIX**(x-1) is a normalized float, emin */
|
||||
#undef FLT_MIN_EXP
|
||||
#undef DBL_MIN_EXP
|
||||
#undef LDBL_MIN_EXP
|
||||
#define FLT_MIN_EXP __FLT_MIN_EXP__
|
||||
#define DBL_MIN_EXP __DBL_MIN_EXP__
|
||||
#define LDBL_MIN_EXP __LDBL_MIN_EXP__
|
||||
|
||||
/* Minimum negative integer such that 10 raised to that power is in the
|
||||
range of normalized floating-point numbers,
|
||||
|
||||
ceil(log10(b) * (emin - 1))
|
||||
*/
|
||||
#undef FLT_MIN_10_EXP
|
||||
#undef DBL_MIN_10_EXP
|
||||
#undef LDBL_MIN_10_EXP
|
||||
#define FLT_MIN_10_EXP __FLT_MIN_10_EXP__
|
||||
#define DBL_MIN_10_EXP __DBL_MIN_10_EXP__
|
||||
#define LDBL_MIN_10_EXP __LDBL_MIN_10_EXP__
|
||||
|
||||
/* Maximum int x such that FLT_RADIX**(x-1) is a representable float, emax. */
|
||||
#undef FLT_MAX_EXP
|
||||
#undef DBL_MAX_EXP
|
||||
#undef LDBL_MAX_EXP
|
||||
#define FLT_MAX_EXP __FLT_MAX_EXP__
|
||||
#define DBL_MAX_EXP __DBL_MAX_EXP__
|
||||
#define LDBL_MAX_EXP __LDBL_MAX_EXP__
|
||||
|
||||
/* Maximum integer such that 10 raised to that power is in the range of
|
||||
representable finite floating-point numbers,
|
||||
|
||||
floor(log10((1 - b**-p) * b**emax))
|
||||
*/
|
||||
#undef FLT_MAX_10_EXP
|
||||
#undef DBL_MAX_10_EXP
|
||||
#undef LDBL_MAX_10_EXP
|
||||
#define FLT_MAX_10_EXP __FLT_MAX_10_EXP__
|
||||
#define DBL_MAX_10_EXP __DBL_MAX_10_EXP__
|
||||
#define LDBL_MAX_10_EXP __LDBL_MAX_10_EXP__
|
||||
|
||||
/* Maximum representable finite floating-point number,
|
||||
|
||||
(1 - b**-p) * b**emax
|
||||
*/
|
||||
#undef FLT_MAX
|
||||
#undef DBL_MAX
|
||||
#undef LDBL_MAX
|
||||
#define FLT_MAX __FLT_MAX__
|
||||
#define DBL_MAX __DBL_MAX__
|
||||
#define LDBL_MAX __LDBL_MAX__
|
||||
|
||||
/* The difference between 1 and the least value greater than 1 that is
|
||||
representable in the given floating point type, b**1-p. */
|
||||
#undef FLT_EPSILON
|
||||
#undef DBL_EPSILON
|
||||
#undef LDBL_EPSILON
|
||||
#define FLT_EPSILON __FLT_EPSILON__
|
||||
#define DBL_EPSILON __DBL_EPSILON__
|
||||
#define LDBL_EPSILON __LDBL_EPSILON__
|
||||
|
||||
/* Minimum normalized positive floating-point number, b**(emin - 1). */
|
||||
#undef FLT_MIN
|
||||
#undef DBL_MIN
|
||||
#undef LDBL_MIN
|
||||
#define FLT_MIN __FLT_MIN__
|
||||
#define DBL_MIN __DBL_MIN__
|
||||
#define LDBL_MIN __LDBL_MIN__
|
||||
|
||||
/* Addition rounds to 0: zero, 1: nearest, 2: +inf, 3: -inf, -1: unknown. */
|
||||
/* ??? This is supposed to change with calls to fesetround in <fenv.h>. */
|
||||
#undef FLT_ROUNDS
|
||||
#define FLT_ROUNDS 1
|
||||
|
||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) \
|
||||
|| (defined (__cplusplus) && __cplusplus >= 201103L)
|
||||
/* The floating-point expression evaluation method. The precise
|
||||
definitions of these values are generalised to include support for
|
||||
the interchange and extended types defined in ISO/IEC TS 18661-3.
|
||||
Prior to this (for C99/C11) the definitions were:
|
||||
|
||||
-1 indeterminate
|
||||
0 evaluate all operations and constants just to the range and
|
||||
precision of the type
|
||||
1 evaluate operations and constants of type float and double
|
||||
to the range and precision of the double type, evaluate
|
||||
long double operations and constants to the range and
|
||||
precision of the long double type
|
||||
2 evaluate all operations and constants to the range and
|
||||
precision of the long double type
|
||||
|
||||
The TS 18661-3 definitions are:
|
||||
|
||||
-1 indeterminate
|
||||
0 evaluate all operations and constants, whose semantic type has
|
||||
at most the range and precision of float, to the range and
|
||||
precision of float; evaluate all other operations and constants
|
||||
to the range and precision of the semantic type.
|
||||
1 evaluate all operations and constants, whose semantic type has
|
||||
at most the range and precision of double, to the range and
|
||||
precision of double; evaluate all other operations and constants
|
||||
to the range and precision of the semantic type.
|
||||
2 evaluate all operations and constants, whose semantic type has
|
||||
at most the range and precision of long double, to the range and
|
||||
precision of long double; evaluate all other operations and
|
||||
constants to the range and precision of the semantic type.
|
||||
N where _FloatN is a supported interchange floating type
|
||||
evaluate all operations and constants, whose semantic type has
|
||||
at most the range and precision of the _FloatN type, to the
|
||||
range and precision of the _FloatN type; evaluate all other
|
||||
operations and constants to the range and precision of the
|
||||
semantic type.
|
||||
N + 1, where _FloatNx is a supported extended floating type
|
||||
evaluate operations and constants, whose semantic type has at
|
||||
most the range and precision of the _FloatNx type, to the range
|
||||
and precision of the _FloatNx type; evaluate all other
|
||||
operations and constants to the range and precision of the
|
||||
semantic type.
|
||||
|
||||
The compiler predefines two macros:
|
||||
|
||||
__FLT_EVAL_METHOD__
|
||||
Which, depending on the value given for
|
||||
-fpermitted-flt-eval-methods, may be limited to only those values
|
||||
for FLT_EVAL_METHOD defined in C99/C11.
|
||||
|
||||
__FLT_EVAL_METHOD_TS_18661_3__
|
||||
Which always permits the values for FLT_EVAL_METHOD defined in
|
||||
ISO/IEC TS 18661-3.
|
||||
|
||||
Here we want to use __FLT_EVAL_METHOD__, unless
|
||||
__STDC_WANT_IEC_60559_TYPES_EXT__ is defined, in which case the user
|
||||
is specifically asking for the ISO/IEC TS 18661-3 types, so we use
|
||||
__FLT_EVAL_METHOD_TS_18661_3__.
|
||||
|
||||
??? This ought to change with the setting of the fp control word;
|
||||
the value provided by the compiler assumes the widest setting. */
|
||||
#undef FLT_EVAL_METHOD
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
#define FLT_EVAL_METHOD __FLT_EVAL_METHOD_TS_18661_3__
|
||||
#else
|
||||
#define FLT_EVAL_METHOD __FLT_EVAL_METHOD__
|
||||
#endif
|
||||
|
||||
/* Number of decimal digits, n, such that any floating-point number in the
|
||||
widest supported floating type with pmax radix b digits can be rounded
|
||||
to a floating-point number with n decimal digits and back again without
|
||||
change to the value,
|
||||
|
||||
pmax * log10(b) if b is a power of 10
|
||||
ceil(1 + pmax * log10(b)) otherwise
|
||||
*/
|
||||
#undef DECIMAL_DIG
|
||||
#define DECIMAL_DIG __DECIMAL_DIG__
|
||||
|
||||
#endif /* C99 */
|
||||
|
||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) \
|
||||
|| (defined (__cplusplus) && __cplusplus >= 201703L)
|
||||
/* Versions of DECIMAL_DIG for each floating-point type. */
|
||||
#undef FLT_DECIMAL_DIG
|
||||
#undef DBL_DECIMAL_DIG
|
||||
#undef LDBL_DECIMAL_DIG
|
||||
#define FLT_DECIMAL_DIG __FLT_DECIMAL_DIG__
|
||||
#define DBL_DECIMAL_DIG __DBL_DECIMAL_DIG__
|
||||
#define LDBL_DECIMAL_DIG __LDBL_DECIMAL_DIG__
|
||||
|
||||
/* Whether types support subnormal numbers. */
|
||||
#undef FLT_HAS_SUBNORM
|
||||
#undef DBL_HAS_SUBNORM
|
||||
#undef LDBL_HAS_SUBNORM
|
||||
#define FLT_HAS_SUBNORM __FLT_HAS_DENORM__
|
||||
#define DBL_HAS_SUBNORM __DBL_HAS_DENORM__
|
||||
#define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
|
||||
|
||||
/* Minimum positive values, including subnormals. */
|
||||
#undef FLT_TRUE_MIN
|
||||
#undef DBL_TRUE_MIN
|
||||
#undef LDBL_TRUE_MIN
|
||||
#define FLT_TRUE_MIN __FLT_DENORM_MIN__
|
||||
#define DBL_TRUE_MIN __DBL_DENORM_MIN__
|
||||
#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
|
||||
|
||||
#endif /* C11 */
|
||||
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
/* Maximum finite positive value with MANT_DIG digits in the
|
||||
significand taking their maximum value. */
|
||||
#undef FLT_NORM_MAX
|
||||
#undef DBL_NORM_MAX
|
||||
#undef LDBL_NORM_MAX
|
||||
#define FLT_NORM_MAX __FLT_NORM_MAX__
|
||||
#define DBL_NORM_MAX __DBL_NORM_MAX__
|
||||
#define LDBL_NORM_MAX __LDBL_NORM_MAX__
|
||||
|
||||
/* Whether each type matches an IEC 60559 format. */
|
||||
#undef FLT_IS_IEC_60559
|
||||
#undef DBL_IS_IEC_60559
|
||||
#undef LDBL_IS_IEC_60559
|
||||
#define FLT_IS_IEC_60559 __FLT_IS_IEC_60559__
|
||||
#define DBL_IS_IEC_60559 __DBL_IS_IEC_60559__
|
||||
#define LDBL_IS_IEC_60559 __LDBL_IS_IEC_60559__
|
||||
|
||||
/* Infinity in type float; not defined if infinity not supported. */
|
||||
#if __FLT_HAS_INFINITY__
|
||||
#undef INFINITY
|
||||
#define INFINITY (__builtin_inff ())
|
||||
#endif
|
||||
|
||||
/* Quiet NaN, if supported for float. */
|
||||
#if __FLT_HAS_QUIET_NAN__
|
||||
#undef NAN
|
||||
#define NAN (__builtin_nanf (""))
|
||||
#endif
|
||||
|
||||
/* Signaling NaN, if supported for each type. All formats supported
|
||||
by GCC support either both quiet and signaling NaNs, or neither
|
||||
kind of NaN. */
|
||||
#if __FLT_HAS_QUIET_NAN__
|
||||
#undef FLT_SNAN
|
||||
#define FLT_SNAN (__builtin_nansf (""))
|
||||
#endif
|
||||
#if __DBL_HAS_QUIET_NAN__
|
||||
#undef DBL_SNAN
|
||||
#define DBL_SNAN (__builtin_nans (""))
|
||||
#endif
|
||||
#if __LDBL_HAS_QUIET_NAN__
|
||||
#undef LDBL_SNAN
|
||||
#define LDBL_SNAN (__builtin_nansl (""))
|
||||
#endif
|
||||
|
||||
#endif /* C2X */
|
||||
|
||||
#if (defined __STDC_WANT_IEC_60559_BFP_EXT__ \
|
||||
|| defined __STDC_WANT_IEC_60559_EXT__)
|
||||
/* Number of decimal digits for which conversions between decimal
|
||||
character strings and binary formats, in both directions, are
|
||||
correctly rounded. */
|
||||
#define CR_DECIMAL_DIG __UINTMAX_MAX__
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
/* Constants for _FloatN and _FloatNx types from TS 18661-3. See
|
||||
comments above for their semantics. */
|
||||
|
||||
#ifdef __FLT16_MANT_DIG__
|
||||
#undef FLT16_MANT_DIG
|
||||
#define FLT16_MANT_DIG __FLT16_MANT_DIG__
|
||||
#undef FLT16_DIG
|
||||
#define FLT16_DIG __FLT16_DIG__
|
||||
#undef FLT16_MIN_EXP
|
||||
#define FLT16_MIN_EXP __FLT16_MIN_EXP__
|
||||
#undef FLT16_MIN_10_EXP
|
||||
#define FLT16_MIN_10_EXP __FLT16_MIN_10_EXP__
|
||||
#undef FLT16_MAX_EXP
|
||||
#define FLT16_MAX_EXP __FLT16_MAX_EXP__
|
||||
#undef FLT16_MAX_10_EXP
|
||||
#define FLT16_MAX_10_EXP __FLT16_MAX_10_EXP__
|
||||
#undef FLT16_MAX
|
||||
#define FLT16_MAX __FLT16_MAX__
|
||||
#undef FLT16_EPSILON
|
||||
#define FLT16_EPSILON __FLT16_EPSILON__
|
||||
#undef FLT16_MIN
|
||||
#define FLT16_MIN __FLT16_MIN__
|
||||
#undef FLT16_DECIMAL_DIG
|
||||
#define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
|
||||
#undef FLT16_TRUE_MIN
|
||||
#define FLT16_TRUE_MIN __FLT16_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT16_SNAN
|
||||
#define FLT16_SNAN (__builtin_nansf16 (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT16_MANT_DIG__. */
|
||||
|
||||
#ifdef __FLT32_MANT_DIG__
|
||||
#undef FLT32_MANT_DIG
|
||||
#define FLT32_MANT_DIG __FLT32_MANT_DIG__
|
||||
#undef FLT32_DIG
|
||||
#define FLT32_DIG __FLT32_DIG__
|
||||
#undef FLT32_MIN_EXP
|
||||
#define FLT32_MIN_EXP __FLT32_MIN_EXP__
|
||||
#undef FLT32_MIN_10_EXP
|
||||
#define FLT32_MIN_10_EXP __FLT32_MIN_10_EXP__
|
||||
#undef FLT32_MAX_EXP
|
||||
#define FLT32_MAX_EXP __FLT32_MAX_EXP__
|
||||
#undef FLT32_MAX_10_EXP
|
||||
#define FLT32_MAX_10_EXP __FLT32_MAX_10_EXP__
|
||||
#undef FLT32_MAX
|
||||
#define FLT32_MAX __FLT32_MAX__
|
||||
#undef FLT32_EPSILON
|
||||
#define FLT32_EPSILON __FLT32_EPSILON__
|
||||
#undef FLT32_MIN
|
||||
#define FLT32_MIN __FLT32_MIN__
|
||||
#undef FLT32_DECIMAL_DIG
|
||||
#define FLT32_DECIMAL_DIG __FLT32_DECIMAL_DIG__
|
||||
#undef FLT32_TRUE_MIN
|
||||
#define FLT32_TRUE_MIN __FLT32_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT32_SNAN
|
||||
#define FLT32_SNAN (__builtin_nansf32 (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT32_MANT_DIG__. */
|
||||
|
||||
#ifdef __FLT64_MANT_DIG__
|
||||
#undef FLT64_MANT_DIG
|
||||
#define FLT64_MANT_DIG __FLT64_MANT_DIG__
|
||||
#undef FLT64_DIG
|
||||
#define FLT64_DIG __FLT64_DIG__
|
||||
#undef FLT64_MIN_EXP
|
||||
#define FLT64_MIN_EXP __FLT64_MIN_EXP__
|
||||
#undef FLT64_MIN_10_EXP
|
||||
#define FLT64_MIN_10_EXP __FLT64_MIN_10_EXP__
|
||||
#undef FLT64_MAX_EXP
|
||||
#define FLT64_MAX_EXP __FLT64_MAX_EXP__
|
||||
#undef FLT64_MAX_10_EXP
|
||||
#define FLT64_MAX_10_EXP __FLT64_MAX_10_EXP__
|
||||
#undef FLT64_MAX
|
||||
#define FLT64_MAX __FLT64_MAX__
|
||||
#undef FLT64_EPSILON
|
||||
#define FLT64_EPSILON __FLT64_EPSILON__
|
||||
#undef FLT64_MIN
|
||||
#define FLT64_MIN __FLT64_MIN__
|
||||
#undef FLT64_DECIMAL_DIG
|
||||
#define FLT64_DECIMAL_DIG __FLT64_DECIMAL_DIG__
|
||||
#undef FLT64_TRUE_MIN
|
||||
#define FLT64_TRUE_MIN __FLT64_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT64_SNAN
|
||||
#define FLT64_SNAN (__builtin_nansf64 (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT64_MANT_DIG__. */
|
||||
|
||||
#ifdef __FLT128_MANT_DIG__
|
||||
#undef FLT128_MANT_DIG
|
||||
#define FLT128_MANT_DIG __FLT128_MANT_DIG__
|
||||
#undef FLT128_DIG
|
||||
#define FLT128_DIG __FLT128_DIG__
|
||||
#undef FLT128_MIN_EXP
|
||||
#define FLT128_MIN_EXP __FLT128_MIN_EXP__
|
||||
#undef FLT128_MIN_10_EXP
|
||||
#define FLT128_MIN_10_EXP __FLT128_MIN_10_EXP__
|
||||
#undef FLT128_MAX_EXP
|
||||
#define FLT128_MAX_EXP __FLT128_MAX_EXP__
|
||||
#undef FLT128_MAX_10_EXP
|
||||
#define FLT128_MAX_10_EXP __FLT128_MAX_10_EXP__
|
||||
#undef FLT128_MAX
|
||||
#define FLT128_MAX __FLT128_MAX__
|
||||
#undef FLT128_EPSILON
|
||||
#define FLT128_EPSILON __FLT128_EPSILON__
|
||||
#undef FLT128_MIN
|
||||
#define FLT128_MIN __FLT128_MIN__
|
||||
#undef FLT128_DECIMAL_DIG
|
||||
#define FLT128_DECIMAL_DIG __FLT128_DECIMAL_DIG__
|
||||
#undef FLT128_TRUE_MIN
|
||||
#define FLT128_TRUE_MIN __FLT128_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT128_SNAN
|
||||
#define FLT128_SNAN (__builtin_nansf128 (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT128_MANT_DIG__. */
|
||||
|
||||
#ifdef __FLT32X_MANT_DIG__
|
||||
#undef FLT32X_MANT_DIG
|
||||
#define FLT32X_MANT_DIG __FLT32X_MANT_DIG__
|
||||
#undef FLT32X_DIG
|
||||
#define FLT32X_DIG __FLT32X_DIG__
|
||||
#undef FLT32X_MIN_EXP
|
||||
#define FLT32X_MIN_EXP __FLT32X_MIN_EXP__
|
||||
#undef FLT32X_MIN_10_EXP
|
||||
#define FLT32X_MIN_10_EXP __FLT32X_MIN_10_EXP__
|
||||
#undef FLT32X_MAX_EXP
|
||||
#define FLT32X_MAX_EXP __FLT32X_MAX_EXP__
|
||||
#undef FLT32X_MAX_10_EXP
|
||||
#define FLT32X_MAX_10_EXP __FLT32X_MAX_10_EXP__
|
||||
#undef FLT32X_MAX
|
||||
#define FLT32X_MAX __FLT32X_MAX__
|
||||
#undef FLT32X_EPSILON
|
||||
#define FLT32X_EPSILON __FLT32X_EPSILON__
|
||||
#undef FLT32X_MIN
|
||||
#define FLT32X_MIN __FLT32X_MIN__
|
||||
#undef FLT32X_DECIMAL_DIG
|
||||
#define FLT32X_DECIMAL_DIG __FLT32X_DECIMAL_DIG__
|
||||
#undef FLT32X_TRUE_MIN
|
||||
#define FLT32X_TRUE_MIN __FLT32X_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT32X_SNAN
|
||||
#define FLT32X_SNAN (__builtin_nansf32x (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT32X_MANT_DIG__. */
|
||||
|
||||
#ifdef __FLT64X_MANT_DIG__
|
||||
#undef FLT64X_MANT_DIG
|
||||
#define FLT64X_MANT_DIG __FLT64X_MANT_DIG__
|
||||
#undef FLT64X_DIG
|
||||
#define FLT64X_DIG __FLT64X_DIG__
|
||||
#undef FLT64X_MIN_EXP
|
||||
#define FLT64X_MIN_EXP __FLT64X_MIN_EXP__
|
||||
#undef FLT64X_MIN_10_EXP
|
||||
#define FLT64X_MIN_10_EXP __FLT64X_MIN_10_EXP__
|
||||
#undef FLT64X_MAX_EXP
|
||||
#define FLT64X_MAX_EXP __FLT64X_MAX_EXP__
|
||||
#undef FLT64X_MAX_10_EXP
|
||||
#define FLT64X_MAX_10_EXP __FLT64X_MAX_10_EXP__
|
||||
#undef FLT64X_MAX
|
||||
#define FLT64X_MAX __FLT64X_MAX__
|
||||
#undef FLT64X_EPSILON
|
||||
#define FLT64X_EPSILON __FLT64X_EPSILON__
|
||||
#undef FLT64X_MIN
|
||||
#define FLT64X_MIN __FLT64X_MIN__
|
||||
#undef FLT64X_DECIMAL_DIG
|
||||
#define FLT64X_DECIMAL_DIG __FLT64X_DECIMAL_DIG__
|
||||
#undef FLT64X_TRUE_MIN
|
||||
#define FLT64X_TRUE_MIN __FLT64X_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT64X_SNAN
|
||||
#define FLT64X_SNAN (__builtin_nansf64x (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT64X_MANT_DIG__. */
|
||||
|
||||
#ifdef __FLT128X_MANT_DIG__
|
||||
#undef FLT128X_MANT_DIG
|
||||
#define FLT128X_MANT_DIG __FLT128X_MANT_DIG__
|
||||
#undef FLT128X_DIG
|
||||
#define FLT128X_DIG __FLT128X_DIG__
|
||||
#undef FLT128X_MIN_EXP
|
||||
#define FLT128X_MIN_EXP __FLT128X_MIN_EXP__
|
||||
#undef FLT128X_MIN_10_EXP
|
||||
#define FLT128X_MIN_10_EXP __FLT128X_MIN_10_EXP__
|
||||
#undef FLT128X_MAX_EXP
|
||||
#define FLT128X_MAX_EXP __FLT128X_MAX_EXP__
|
||||
#undef FLT128X_MAX_10_EXP
|
||||
#define FLT128X_MAX_10_EXP __FLT128X_MAX_10_EXP__
|
||||
#undef FLT128X_MAX
|
||||
#define FLT128X_MAX __FLT128X_MAX__
|
||||
#undef FLT128X_EPSILON
|
||||
#define FLT128X_EPSILON __FLT128X_EPSILON__
|
||||
#undef FLT128X_MIN
|
||||
#define FLT128X_MIN __FLT128X_MIN__
|
||||
#undef FLT128X_DECIMAL_DIG
|
||||
#define FLT128X_DECIMAL_DIG __FLT128X_DECIMAL_DIG__
|
||||
#undef FLT128X_TRUE_MIN
|
||||
#define FLT128X_TRUE_MIN __FLT128X_DENORM_MIN__
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#undef FLT128X_SNAN
|
||||
#define FLT128X_SNAN (__builtin_nansf128x (""))
|
||||
#endif /* C2X */
|
||||
#endif /* __FLT128X_MANT_DIG__. */
|
||||
|
||||
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__. */
|
||||
|
||||
#ifdef __DEC32_MANT_DIG__
|
||||
#if (defined __STDC_WANT_DEC_FP__ \
|
||||
|| defined __STDC_WANT_IEC_60559_DFP_EXT__ \
|
||||
|| (defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L))
|
||||
/* C2X; formerly Technical Report 24732, extension for decimal
|
||||
floating-point arithmetic: Characteristic of decimal floating types
|
||||
<float.h>, and TS 18661-2. */
|
||||
|
||||
/* Number of base-FLT_RADIX digits in the significand, p. */
|
||||
#undef DEC32_MANT_DIG
|
||||
#undef DEC64_MANT_DIG
|
||||
#undef DEC128_MANT_DIG
|
||||
#define DEC32_MANT_DIG __DEC32_MANT_DIG__
|
||||
#define DEC64_MANT_DIG __DEC64_MANT_DIG__
|
||||
#define DEC128_MANT_DIG __DEC128_MANT_DIG__
|
||||
|
||||
/* Minimum exponent. */
|
||||
#undef DEC32_MIN_EXP
|
||||
#undef DEC64_MIN_EXP
|
||||
#undef DEC128_MIN_EXP
|
||||
#define DEC32_MIN_EXP __DEC32_MIN_EXP__
|
||||
#define DEC64_MIN_EXP __DEC64_MIN_EXP__
|
||||
#define DEC128_MIN_EXP __DEC128_MIN_EXP__
|
||||
|
||||
/* Maximum exponent. */
|
||||
#undef DEC32_MAX_EXP
|
||||
#undef DEC64_MAX_EXP
|
||||
#undef DEC128_MAX_EXP
|
||||
#define DEC32_MAX_EXP __DEC32_MAX_EXP__
|
||||
#define DEC64_MAX_EXP __DEC64_MAX_EXP__
|
||||
#define DEC128_MAX_EXP __DEC128_MAX_EXP__
|
||||
|
||||
/* Maximum representable finite decimal floating-point number
|
||||
(there are 6, 15, and 33 9s after the decimal points respectively). */
|
||||
#undef DEC32_MAX
|
||||
#undef DEC64_MAX
|
||||
#undef DEC128_MAX
|
||||
#define DEC32_MAX __DEC32_MAX__
|
||||
#define DEC64_MAX __DEC64_MAX__
|
||||
#define DEC128_MAX __DEC128_MAX__
|
||||
|
||||
/* The difference between 1 and the least value greater than 1 that is
|
||||
representable in the given floating point type. */
|
||||
#undef DEC32_EPSILON
|
||||
#undef DEC64_EPSILON
|
||||
#undef DEC128_EPSILON
|
||||
#define DEC32_EPSILON __DEC32_EPSILON__
|
||||
#define DEC64_EPSILON __DEC64_EPSILON__
|
||||
#define DEC128_EPSILON __DEC128_EPSILON__
|
||||
|
||||
/* Minimum normalized positive floating-point number. */
|
||||
#undef DEC32_MIN
|
||||
#undef DEC64_MIN
|
||||
#undef DEC128_MIN
|
||||
#define DEC32_MIN __DEC32_MIN__
|
||||
#define DEC64_MIN __DEC64_MIN__
|
||||
#define DEC128_MIN __DEC128_MIN__
|
||||
|
||||
/* The floating-point expression evaluation method.
|
||||
-1 indeterminate
|
||||
0 evaluate all operations and constants just to the range and
|
||||
precision of the type
|
||||
1 evaluate operations and constants of type _Decimal32
|
||||
and _Decimal64 to the range and precision of the _Decimal64
|
||||
type, evaluate _Decimal128 operations and constants to the
|
||||
range and precision of the _Decimal128 type;
|
||||
2 evaluate all operations and constants to the range and
|
||||
precision of the _Decimal128 type. */
|
||||
|
||||
#undef DEC_EVAL_METHOD
|
||||
#define DEC_EVAL_METHOD __DEC_EVAL_METHOD__
|
||||
|
||||
#endif /* __STDC_WANT_DEC_FP__ || __STDC_WANT_IEC_60559_DFP_EXT__ || C2X. */
|
||||
|
||||
#ifdef __STDC_WANT_DEC_FP__
|
||||
|
||||
/* Minimum subnormal positive floating-point number. */
|
||||
#undef DEC32_SUBNORMAL_MIN
|
||||
#undef DEC64_SUBNORMAL_MIN
|
||||
#undef DEC128_SUBNORMAL_MIN
|
||||
#define DEC32_SUBNORMAL_MIN __DEC32_SUBNORMAL_MIN__
|
||||
#define DEC64_SUBNORMAL_MIN __DEC64_SUBNORMAL_MIN__
|
||||
#define DEC128_SUBNORMAL_MIN __DEC128_SUBNORMAL_MIN__
|
||||
|
||||
#endif /* __STDC_WANT_DEC_FP__. */
|
||||
|
||||
#if (defined __STDC_WANT_IEC_60559_DFP_EXT__ \
|
||||
|| (defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L))
|
||||
|
||||
/* Minimum subnormal positive floating-point number. */
|
||||
#undef DEC32_TRUE_MIN
|
||||
#undef DEC64_TRUE_MIN
|
||||
#undef DEC128_TRUE_MIN
|
||||
#define DEC32_TRUE_MIN __DEC32_SUBNORMAL_MIN__
|
||||
#define DEC64_TRUE_MIN __DEC64_SUBNORMAL_MIN__
|
||||
#define DEC128_TRUE_MIN __DEC128_SUBNORMAL_MIN__
|
||||
|
||||
#endif /* __STDC_WANT_IEC_60559_DFP_EXT__ || C2X. */
|
||||
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
|
||||
/* Infinity in type _Decimal32. */
|
||||
#undef DEC_INFINITY
|
||||
#define DEC_INFINITY (__builtin_infd32 ())
|
||||
|
||||
/* Quiet NaN in type _Decimal32. */
|
||||
#undef DEC_NAN
|
||||
#define DEC_NAN (__builtin_nand32 (""))
|
||||
|
||||
/* Signaling NaN in each decimal floating-point type. */
|
||||
#undef DEC32_SNAN
|
||||
#define DEC32_SNAN (__builtin_nansd32 (""))
|
||||
#undef DEC64_SNAN
|
||||
#define DEC64_SNAN (__builtin_nansd64 (""))
|
||||
#undef DEC128_SNAN
|
||||
#define DEC128_SNAN (__builtin_nansd128 (""))
|
||||
|
||||
#endif /* C2X */
|
||||
|
||||
#endif /* __DEC32_MANT_DIG__ */
|
||||
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#define __STDC_VERSION_FLOAT_H__ 202311L
|
||||
#endif
|
||||
|
||||
#endif /* _FLOAT_H___ */
|
@ -0,0 +1,241 @@
|
||||
/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FMA4INTRIN_H_INCLUDED
|
||||
#define _FMA4INTRIN_H_INCLUDED
|
||||
|
||||
/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
|
||||
#include <ammintrin.h>
|
||||
|
||||
#ifndef __FMA4__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fma4")
|
||||
#define __DISABLE_FMA4__
|
||||
#endif /* __FMA4__ */
|
||||
|
||||
/* 128b Floating point multiply/add type instructions. */
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
/* 256b Floating point multiply/add type instructions. */
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_FMA4__
|
||||
#undef __DISABLE_FMA4__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FMA4__ */
|
||||
|
||||
#endif
|
@ -0,0 +1,302 @@
|
||||
/* Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FMAINTRIN_H_INCLUDED
|
||||
#define _FMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __FMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fma")
|
||||
#define __DISABLE_FMA__
|
||||
#endif /* __FMA__ */
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||
(__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||
(__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||
(__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||
(__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_FMA__
|
||||
#undef __DISABLE_FMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FMA__ */
|
||||
|
||||
#endif
|
@ -0,0 +1,73 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FXSRINTRIN_H_INCLUDED
|
||||
#define _FXSRINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __FXSR__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fxsr")
|
||||
#define __DISABLE_FXSR__
|
||||
#endif /* __FXSR__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxsave (__P);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxrstor (__P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave64 (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxsave64 (__P);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor64 (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxrstor64 (__P);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_FXSR__
|
||||
#undef __DISABLE_FXSR__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FXSR__ */
|
||||
|
||||
|
||||
#endif /* _FXSRINTRIN_H_INCLUDED */
|
@ -0,0 +1,70 @@
|
||||
/* GCOV interface routines.
|
||||
Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GCC_GCOV_H
|
||||
#define GCC_GCOV_H
|
||||
|
||||
struct gcov_info;
|
||||
|
||||
/* Set all counters to zero. */
|
||||
|
||||
extern void __gcov_reset (void);
|
||||
|
||||
/* Write profile information to a file. */
|
||||
|
||||
extern void __gcov_dump (void);
|
||||
|
||||
/* Convert the gcov information referenced by INFO to a gcda data stream.
|
||||
The FILENAME_FN callback is called exactly once with the filename associated
|
||||
with the gcov information. The filename may be NULL. Afterwards, the
|
||||
DUMP_FN callback is subsequently called with chunks (the begin and length of
|
||||
the chunk are passed as the first two callback parameters) of the gcda data
|
||||
stream. The ALLOCATE_FN callback shall allocate memory with a size in
|
||||
characters specified by the first callback parameter. The ARG parameter is
|
||||
a user-provided argument passed as the last argument to the callback
|
||||
functions. It is recommended to use the __gcov_filename_to_gcfn()
|
||||
in the filename callback function. */
|
||||
|
||||
extern void
|
||||
__gcov_info_to_gcda (const struct gcov_info *__info,
|
||||
void (*__filename_fn) (const char *, void *),
|
||||
void (*__dump_fn) (const void *, unsigned, void *),
|
||||
void *(*__allocate_fn) (unsigned, void *),
|
||||
void *__arg);
|
||||
|
||||
/* Convert the FILENAME to a gcfn data stream. The DUMP_FN callback is
|
||||
subsequently called with chunks (the begin and length of the chunk are
|
||||
passed as the first two callback parameters) of the gcfn data stream.
|
||||
The ARG parameter is a user-provided argument passed as the last
|
||||
argument to the DUMP_FN callback function. This function is intended
|
||||
to be used by the filename callback of __gcov_info_to_gcda(). The gcfn
|
||||
data stream is used by the merge-stream subcommand of the gcov-tool to
|
||||
get the filename associated with a gcda data stream. */
|
||||
|
||||
extern void
|
||||
__gcov_filename_to_gcfn (const char *__filename,
|
||||
void (*__dump_fn) (const void *, unsigned, void *),
|
||||
void *__arg);
|
||||
|
||||
#endif /* GCC_GCOV_H */
|
@ -0,0 +1,414 @@
|
||||
/* Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _GFNIINTRIN_H_INCLUDED
|
||||
#define _GFNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__SSE2__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,sse2")
|
||||
#define __DISABLE_GFNI__
|
||||
#endif /* __GFNI__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||
(__v16qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
|
||||
(__v16qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNI__
|
||||
#undef __DISABLE_GFNI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_GFNI__ */
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__AVX__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx")
|
||||
#define __DISABLE_GFNIAVX__
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(int)(C)))
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), \
|
||||
( __v32qi)(__m256i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX__
|
||||
#undef __DISABLE_GFNIAVX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512vl")
|
||||
#define __DISABLE_GFNIAVX512VL__
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D,
|
||||
(__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D,
|
||||
__E,
|
||||
(__v16qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, __D,
|
||||
(__v16qi) _mm_setzero_si128 (),
|
||||
__A);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D, __E, (__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
|
||||
}
|
||||
#else
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), \
|
||||
(int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), \
|
||||
(int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \
|
||||
(__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\
|
||||
(__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\
|
||||
(__v16qi)(__m128i)(C), (int)(D), \
|
||||
(__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512VL__
|
||||
#undef __DISABLE_GFNIAVX512VL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512vl,avx512bw")
|
||||
#define __DISABLE_GFNIAVX512VLBW__
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
(__v32qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D, const int __E)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
__E,
|
||||
(__v32qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, __D,
|
||||
(__v32qi) _mm256_setzero_si256 (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D, const int __E)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
__E,
|
||||
(__v32qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
|
||||
}
|
||||
#else
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
||||
(__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\
|
||||
(__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\
|
||||
(__v32qi)(__m256i)(C), (int)(D), \
|
||||
(__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512VLBW__
|
||||
#undef __DISABLE_GFNIAVX512VLBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512f,avx512bw")
|
||||
#define __DISABLE_GFNIAVX512FBW__
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D,
|
||||
__E,
|
||||
(__v64qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
|
||||
__m512i __C, const int __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, __D,
|
||||
(__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B, __C);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D, __E, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
||||
(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \
|
||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
|
||||
(__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
|
||||
(__v64qi)(__m512i)(C), (int)(D), \
|
||||
(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512FBW__
|
||||
#undef __DISABLE_GFNIAVX512FBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
#endif /* _GFNIINTRIN_H_INCLUDED */
|
@ -0,0 +1,48 @@
|
||||
/* Copyright (C) 2020-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _HRESETINTRIN_H_INCLUDED
|
||||
#define _HRESETINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __HRESET__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("hreset")
|
||||
#define __DISABLE_HRESET__
|
||||
#endif /* __HRESET__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_hreset (unsigned int __EAX)
|
||||
{
|
||||
__builtin_ia32_hreset (__EAX);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_HRESET__
|
||||
#undef __DISABLE_HRESET__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_HRESET__ */
|
||||
#endif /* _HRESETINTRIN_H_INCLUDED. */
|
@ -0,0 +1,317 @@
|
||||
/* Copyright (C) 2009-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
/* 32bit bsf */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfd (int __X)
|
||||
{
|
||||
return __builtin_ctz (__X);
|
||||
}
|
||||
|
||||
/* 32bit bsr */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrd (int __X)
|
||||
{
|
||||
return __builtin_ia32_bsrsi (__X);
|
||||
}
|
||||
|
||||
/* 32bit bswap */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapd (int __X)
|
||||
{
|
||||
return __builtin_bswap32 (__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
#ifndef __CRC32__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("crc32")
|
||||
#define __DISABLE_CRC32__
|
||||
#endif /* __CRC32__ */
|
||||
|
||||
/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32b (unsigned int __C, unsigned char __V)
|
||||
{
|
||||
return __builtin_ia32_crc32qi (__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32w (unsigned int __C, unsigned short __V)
|
||||
{
|
||||
return __builtin_ia32_crc32hi (__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32d (unsigned int __C, unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_crc32si (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CRC32__
|
||||
#undef __DISABLE_CRC32__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CRC32__ */
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
/* 32bit popcnt */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntd (unsigned int __X)
|
||||
{
|
||||
return __builtin_popcount (__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
/* rdpmc */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdpmc (int __S)
|
||||
{
|
||||
return __builtin_ia32_rdpmc (__S);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
/* rdtsc */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdtsc (void)
|
||||
{
|
||||
return __builtin_ia32_rdtsc ();
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
/* rdtscp */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdtscp (unsigned int *__A)
|
||||
{
|
||||
return __builtin_ia32_rdtscp (__A);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
/* 8bit rol */
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolb (unsigned char __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rolqi (__X, __C);
|
||||
}
|
||||
|
||||
/* 16bit rol */
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolw (unsigned short __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rolhi (__X, __C);
|
||||
}
|
||||
|
||||
/* 32bit rol */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rold (unsigned int __X, int __C)
|
||||
{
|
||||
__C &= 31;
|
||||
return (__X << __C) | (__X >> (-__C & 31));
|
||||
}
|
||||
|
||||
/* 8bit ror */
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorb (unsigned char __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rorqi (__X, __C);
|
||||
}
|
||||
|
||||
/* 16bit ror */
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorw (unsigned short __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rorhi (__X, __C);
|
||||
}
|
||||
|
||||
/* 32bit ror */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rord (unsigned int __X, int __C)
|
||||
{
|
||||
__C &= 31;
|
||||
return (__X >> __C) | (__X << (-__C & 31));
|
||||
}
|
||||
|
||||
/* Pause */
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__pause (void)
|
||||
{
|
||||
__builtin_ia32_pause ();
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/* 64bit bsf */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfq (long long __X)
|
||||
{
|
||||
return __builtin_ctzll (__X);
|
||||
}
|
||||
|
||||
/* 64bit bsr */
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrq (long long __X)
|
||||
{
|
||||
return __builtin_ia32_bsrdi (__X);
|
||||
}
|
||||
|
||||
/* 64bit bswap */
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapq (long long __X)
|
||||
{
|
||||
return __builtin_bswap64 (__X);
|
||||
}
|
||||
|
||||
#ifndef __CRC32__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("crc32")
|
||||
#define __DISABLE_CRC32__
|
||||
#endif /* __CRC32__ */
|
||||
|
||||
/* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32q (unsigned long long __C, unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_crc32di (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CRC32__
|
||||
#undef __DISABLE_CRC32__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CRC32__ */
|
||||
|
||||
/* 64bit popcnt */
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntq (unsigned long long __X)
|
||||
{
|
||||
return __builtin_popcountll (__X);
|
||||
}
|
||||
|
||||
/* 64bit rol */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolq (unsigned long long __X, int __C)
|
||||
{
|
||||
__C &= 63;
|
||||
return (__X << __C) | (__X >> (-__C & 63));
|
||||
}
|
||||
|
||||
/* 64bit ror */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorq (unsigned long long __X, int __C)
|
||||
{
|
||||
__C &= 63;
|
||||
return (__X >> __C) | (__X << (-__C & 63));
|
||||
}
|
||||
|
||||
/* Read flags register */
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags (void)
|
||||
{
|
||||
return __builtin_ia32_readeflags_u64 ();
|
||||
}
|
||||
|
||||
/* Write flags register */
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags (unsigned long long __X)
|
||||
{
|
||||
__builtin_ia32_writeeflags_u64 (__X);
|
||||
}
|
||||
|
||||
#define _bswap64(a) __bswapq(a)
|
||||
#define _popcnt64(a) __popcntq(a)
|
||||
#else
|
||||
|
||||
/* Read flags register */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags (void)
|
||||
{
|
||||
return __builtin_ia32_readeflags_u32 ();
|
||||
}
|
||||
|
||||
/* Write flags register */
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags (unsigned int __X)
|
||||
{
|
||||
__builtin_ia32_writeeflags_u32 (__X);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* On LP64 systems, longs are 64-bit. Use the appropriate rotate
|
||||
* function. */
|
||||
#ifdef __LP64__
|
||||
#define _lrotl(a,b) __rolq((a), (b))
|
||||
#define _lrotr(a,b) __rorq((a), (b))
|
||||
#else
|
||||
#define _lrotl(a,b) __rold((a), (b))
|
||||
#define _lrotr(a,b) __rord((a), (b))
|
||||
#endif
|
||||
|
||||
#define _bit_scan_forward(a) __bsfd(a)
|
||||
#define _bit_scan_reverse(a) __bsrd(a)
|
||||
#define _bswap(a) __bswapd(a)
|
||||
#define _popcnt32(a) __popcntd(a)
|
||||
#ifndef __iamcu__
|
||||
#define _rdpmc(a) __rdpmc(a)
|
||||
#define _rdtscp(a) __rdtscp(a)
|
||||
#endif /* __iamcu__ */
|
||||
#define _rdtsc() __rdtsc()
|
||||
#define _rotwl(a,b) __rolw((a), (b))
|
||||
#define _rotwr(a,b) __rorw((a), (b))
|
||||
#define _rotl(a,b) __rold((a), (b))
|
||||
#define _rotr(a,b) __rord((a), (b))
|
@ -0,0 +1,143 @@
|
||||
/* Copyright (C) 2008-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#define _IMMINTRIN_H_INCLUDED
|
||||
|
||||
#include <x86gprintrin.h>
|
||||
|
||||
#include <mmintrin.h>
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include <pmmintrin.h>
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
#include <wmmintrin.h>
|
||||
|
||||
#include <avxintrin.h>
|
||||
|
||||
#include <avxvnniintrin.h>
|
||||
|
||||
#include <avxifmaintrin.h>
|
||||
|
||||
#include <avxvnniint8intrin.h>
|
||||
|
||||
#include <avx2intrin.h>
|
||||
|
||||
#include <avx512fintrin.h>
|
||||
|
||||
#include <avx512erintrin.h>
|
||||
|
||||
#include <avx512pfintrin.h>
|
||||
|
||||
#include <avx512cdintrin.h>
|
||||
|
||||
#include <avx512vlintrin.h>
|
||||
|
||||
#include <avx512bwintrin.h>
|
||||
|
||||
#include <avx512dqintrin.h>
|
||||
|
||||
#include <avx512vlbwintrin.h>
|
||||
|
||||
#include <avx512vldqintrin.h>
|
||||
|
||||
#include <avx512ifmaintrin.h>
|
||||
|
||||
#include <avx512ifmavlintrin.h>
|
||||
|
||||
#include <avx512vbmiintrin.h>
|
||||
|
||||
#include <avx512vbmivlintrin.h>
|
||||
|
||||
#include <avx5124fmapsintrin.h>
|
||||
|
||||
#include <avx5124vnniwintrin.h>
|
||||
|
||||
#include <avx512vpopcntdqintrin.h>
|
||||
|
||||
#include <avx512vbmi2intrin.h>
|
||||
|
||||
#include <avx512vbmi2vlintrin.h>
|
||||
|
||||
#include <avx512vnniintrin.h>
|
||||
|
||||
#include <avx512vnnivlintrin.h>
|
||||
|
||||
#include <avx512vpopcntdqvlintrin.h>
|
||||
|
||||
#include <avx512bitalgintrin.h>
|
||||
|
||||
#include <avx512vp2intersectintrin.h>
|
||||
|
||||
#include <avx512vp2intersectvlintrin.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <avx512fp16intrin.h>
|
||||
|
||||
#include <avx512fp16vlintrin.h>
|
||||
#endif
|
||||
|
||||
#include <shaintrin.h>
|
||||
|
||||
#include <fmaintrin.h>
|
||||
|
||||
#include <f16cintrin.h>
|
||||
|
||||
#include <rtmintrin.h>
|
||||
|
||||
#include <gfniintrin.h>
|
||||
|
||||
#include <vaesintrin.h>
|
||||
|
||||
#include <vpclmulqdqintrin.h>
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <avx512bf16vlintrin.h>
|
||||
|
||||
#include <avx512bf16intrin.h>
|
||||
|
||||
#include <avxneconvertintrin.h>
|
||||
#endif
|
||||
|
||||
#include <amxtileintrin.h>
|
||||
|
||||
#include <amxint8intrin.h>
|
||||
|
||||
#include <amxbf16intrin.h>
|
||||
|
||||
#include <amxcomplexintrin.h>
|
||||
|
||||
#include <prfchwintrin.h>
|
||||
|
||||
#include <keylockerintrin.h>
|
||||
|
||||
#include <amxfp16intrin.h>
|
||||
|
||||
#endif /* _IMMINTRIN_H_INCLUDED */
|
@ -0,0 +1,45 @@
|
||||
/* Copyright (C) 1997-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
* ISO C Standard: 7.9 Alternative spellings <iso646.h>
|
||||
*/
|
||||
|
||||
#ifndef _ISO646_H
|
||||
#define _ISO646_H
|
||||
|
||||
#ifndef __cplusplus
|
||||
#define and &&
|
||||
#define and_eq &=
|
||||
#define bitand &
|
||||
#define bitor |
|
||||
#define compl ~
|
||||
#define not !
|
||||
#define not_eq !=
|
||||
#define or ||
|
||||
#define or_eq |=
|
||||
#define xor ^
|
||||
#define xor_eq ^=
|
||||
#endif
|
||||
|
||||
#endif
|
@ -0,0 +1,129 @@
|
||||
/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
|
||||
#define _KEYLOCKERINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __KL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("kl")
|
||||
#define __DISABLE_KL__
|
||||
#endif /* __KL__ */
|
||||
|
||||
|
||||
extern __inline
|
||||
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_KL__
|
||||
#undef __DISABLE_KL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_KL__ */
|
||||
|
||||
#ifndef __WIDEKL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("widekl")
|
||||
#define __DISABLE_WIDEKL__
|
||||
#endif /* __WIDEKL__ */
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
#ifdef __DISABLE_WIDEKL__
|
||||
#undef __DISABLE_WIDEKL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_WIDEKL__ */
|
||||
#endif /* _KEYLOCKERINTRIN_H_INCLUDED */
|
@ -0,0 +1,208 @@
|
||||
/* Copyright (C) 1992-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* This administrivia gets added to the beginning of limits.h
|
||||
if the system has its own version of limits.h. */
|
||||
|
||||
/* We use _GCC_LIMITS_H_ because we want this not to match
|
||||
any macros that the system's limits.h uses for its own purposes. */
|
||||
#ifndef _GCC_LIMITS_H_ /* Terminated in limity.h. */
|
||||
#define _GCC_LIMITS_H_
|
||||
|
||||
#ifndef _LIBC_LIMITS_H_
|
||||
/* Use "..." so that we find syslimits.h only in this same directory. */
|
||||
#include "syslimits.h"
|
||||
#endif
|
||||
/* Copyright (C) 1991-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _LIMITS_H___
|
||||
#define _LIMITS_H___
|
||||
|
||||
/* Number of bits in a `char'. */
|
||||
#undef CHAR_BIT
|
||||
#define CHAR_BIT __CHAR_BIT__
|
||||
|
||||
/* Maximum length of a multibyte character. */
|
||||
#ifndef MB_LEN_MAX
|
||||
#define MB_LEN_MAX 1
|
||||
#endif
|
||||
|
||||
/* Minimum and maximum values a `signed char' can hold. */
|
||||
#undef SCHAR_MIN
|
||||
#define SCHAR_MIN (-SCHAR_MAX - 1)
|
||||
#undef SCHAR_MAX
|
||||
#define SCHAR_MAX __SCHAR_MAX__
|
||||
|
||||
/* Maximum value an `unsigned char' can hold. (Minimum is 0). */
|
||||
#undef UCHAR_MAX
|
||||
#if __SCHAR_MAX__ == __INT_MAX__
|
||||
# define UCHAR_MAX (SCHAR_MAX * 2U + 1U)
|
||||
#else
|
||||
# define UCHAR_MAX (SCHAR_MAX * 2 + 1)
|
||||
#endif
|
||||
|
||||
/* Minimum and maximum values a `char' can hold. */
|
||||
#ifdef __CHAR_UNSIGNED__
|
||||
# undef CHAR_MIN
|
||||
# if __SCHAR_MAX__ == __INT_MAX__
|
||||
# define CHAR_MIN 0U
|
||||
# else
|
||||
# define CHAR_MIN 0
|
||||
# endif
|
||||
# undef CHAR_MAX
|
||||
# define CHAR_MAX UCHAR_MAX
|
||||
#else
|
||||
# undef CHAR_MIN
|
||||
# define CHAR_MIN SCHAR_MIN
|
||||
# undef CHAR_MAX
|
||||
# define CHAR_MAX SCHAR_MAX
|
||||
#endif
|
||||
|
||||
/* Minimum and maximum values a `signed short int' can hold. */
|
||||
#undef SHRT_MIN
|
||||
#define SHRT_MIN (-SHRT_MAX - 1)
|
||||
#undef SHRT_MAX
|
||||
#define SHRT_MAX __SHRT_MAX__
|
||||
|
||||
/* Maximum value an `unsigned short int' can hold. (Minimum is 0). */
|
||||
#undef USHRT_MAX
|
||||
#if __SHRT_MAX__ == __INT_MAX__
|
||||
# define USHRT_MAX (SHRT_MAX * 2U + 1U)
|
||||
#else
|
||||
# define USHRT_MAX (SHRT_MAX * 2 + 1)
|
||||
#endif
|
||||
|
||||
/* Minimum and maximum values a `signed int' can hold. */
|
||||
#undef INT_MIN
|
||||
#define INT_MIN (-INT_MAX - 1)
|
||||
#undef INT_MAX
|
||||
#define INT_MAX __INT_MAX__
|
||||
|
||||
/* Maximum value an `unsigned int' can hold. (Minimum is 0). */
|
||||
#undef UINT_MAX
|
||||
#define UINT_MAX (INT_MAX * 2U + 1U)
|
||||
|
||||
/* Minimum and maximum values a `signed long int' can hold.
|
||||
(Same as `int'). */
|
||||
#undef LONG_MIN
|
||||
#define LONG_MIN (-LONG_MAX - 1L)
|
||||
#undef LONG_MAX
|
||||
#define LONG_MAX __LONG_MAX__
|
||||
|
||||
/* Maximum value an `unsigned long int' can hold. (Minimum is 0). */
|
||||
#undef ULONG_MAX
|
||||
#define ULONG_MAX (LONG_MAX * 2UL + 1UL)
|
||||
|
||||
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
|
||||
/* Minimum and maximum values a `signed long long int' can hold. */
|
||||
# undef LLONG_MIN
|
||||
# define LLONG_MIN (-LLONG_MAX - 1LL)
|
||||
# undef LLONG_MAX
|
||||
# define LLONG_MAX __LONG_LONG_MAX__
|
||||
|
||||
/* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */
|
||||
# undef ULLONG_MAX
|
||||
# define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
|
||||
#endif
|
||||
|
||||
#if defined (__GNU_LIBRARY__) ? defined (__USE_GNU) : !defined (__STRICT_ANSI__)
|
||||
/* Minimum and maximum values a `signed long long int' can hold. */
|
||||
# undef LONG_LONG_MIN
|
||||
# define LONG_LONG_MIN (-LONG_LONG_MAX - 1LL)
|
||||
# undef LONG_LONG_MAX
|
||||
# define LONG_LONG_MAX __LONG_LONG_MAX__
|
||||
|
||||
/* Maximum value an `unsigned long long int' can hold. (Minimum is 0). */
|
||||
# undef ULONG_LONG_MAX
|
||||
# define ULONG_LONG_MAX (LONG_LONG_MAX * 2ULL + 1ULL)
|
||||
#endif
|
||||
|
||||
#if (defined __STDC_WANT_IEC_60559_BFP_EXT__ \
|
||||
|| (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L))
|
||||
/* TS 18661-1 / C2X widths of integer types. */
|
||||
# undef CHAR_WIDTH
|
||||
# define CHAR_WIDTH __SCHAR_WIDTH__
|
||||
# undef SCHAR_WIDTH
|
||||
# define SCHAR_WIDTH __SCHAR_WIDTH__
|
||||
# undef UCHAR_WIDTH
|
||||
# define UCHAR_WIDTH __SCHAR_WIDTH__
|
||||
# undef SHRT_WIDTH
|
||||
# define SHRT_WIDTH __SHRT_WIDTH__
|
||||
# undef USHRT_WIDTH
|
||||
# define USHRT_WIDTH __SHRT_WIDTH__
|
||||
# undef INT_WIDTH
|
||||
# define INT_WIDTH __INT_WIDTH__
|
||||
# undef UINT_WIDTH
|
||||
# define UINT_WIDTH __INT_WIDTH__
|
||||
# undef LONG_WIDTH
|
||||
# define LONG_WIDTH __LONG_WIDTH__
|
||||
# undef ULONG_WIDTH
|
||||
# define ULONG_WIDTH __LONG_WIDTH__
|
||||
# undef LLONG_WIDTH
|
||||
# define LLONG_WIDTH __LONG_LONG_WIDTH__
|
||||
# undef ULLONG_WIDTH
|
||||
# define ULLONG_WIDTH __LONG_LONG_WIDTH__
|
||||
#endif
|
||||
|
||||
#if defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L
|
||||
/* C2X width and limit of _Bool. */
|
||||
# undef BOOL_MAX
|
||||
# define BOOL_MAX 1
|
||||
# undef BOOL_WIDTH
|
||||
# define BOOL_WIDTH 1
|
||||
|
||||
# define __STDC_VERSION_LIMITS_H__ 202311L
|
||||
#endif
|
||||
|
||||
#endif /* _LIMITS_H___ */
|
||||
/* This administrivia gets added to the end of limits.h
|
||||
if the system has its own version of limits.h. */
|
||||
|
||||
#else /* not _GCC_LIMITS_H_ */
|
||||
|
||||
#ifdef _GCC_NEXT_LIMITS_H
|
||||
#include_next <limits.h> /* recurse down to the real one */
|
||||
#endif
|
||||
|
||||
#endif /* not _GCC_LIMITS_H_ */
|
@ -0,0 +1,107 @@
|
||||
/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _LWPINTRIN_H_INCLUDED
|
||||
#define _LWPINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __LWP__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("lwp")
|
||||
#define __DISABLE_LWP__
|
||||
#endif /* __LWP__ */
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__llwpcb (void *__pcbAddress)
|
||||
{
|
||||
__builtin_ia32_llwpcb (__pcbAddress);
|
||||
}
|
||||
|
||||
extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__slwpcb (void)
|
||||
{
|
||||
return __builtin_ia32_slwpcb ();
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||
{
|
||||
__builtin_ia32_lwpval32 (__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval64 (unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags)
|
||||
{
|
||||
__builtin_ia32_lwpval64 (__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define __lwpval32(D2, D1, F) \
|
||||
(__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), \
|
||||
(unsigned int) (F)))
|
||||
#ifdef __x86_64__
|
||||
#define __lwpval64(D2, D1, F) \
|
||||
(__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), \
|
||||
(unsigned int) (F)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||
{
|
||||
return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins64 (unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags)
|
||||
{
|
||||
return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define __lwpins32(D2, D1, F) \
|
||||
(__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), \
|
||||
(unsigned int) (F)))
|
||||
#ifdef __x86_64__
|
||||
#define __lwpins64(D2, D1, F) \
|
||||
(__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), \
|
||||
(unsigned int) (F)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_LWP__
|
||||
#undef __DISABLE_LWP__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_LWP__ */
|
||||
|
||||
#endif /* _LWPINTRIN_H_INCLUDED */
|
@ -0,0 +1,75 @@
|
||||
/* Copyright (C) 2009-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef _LZCNTINTRIN_H_INCLUDED
|
||||
#define _LZCNTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __LZCNT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("lzcnt")
|
||||
#define __DISABLE_LZCNT__
|
||||
#endif /* __LZCNT__ */
|
||||
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u64 (__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_LZCNT__
|
||||
#undef __DISABLE_LZCNT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_LZCNT__ */
|
||||
|
||||
#endif /* _LZCNTINTRIN_H_INCLUDED */
|
@ -0,0 +1,233 @@
|
||||
/* Copyright (C) 2004-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
|
||||
MSVC 7.1. */
|
||||
|
||||
#ifndef _MM3DNOW_H_INCLUDED
|
||||
#define _MM3DNOW_H_INCLUDED
|
||||
|
||||
#include <mmintrin.h>
|
||||
#include <prfchwintrin.h>
|
||||
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
|
||||
#pragma GCC push_options
|
||||
#ifdef __x86_64__
|
||||
#pragma GCC target("sse,3dnow")
|
||||
#else
|
||||
#pragma GCC target("3dnow")
|
||||
#endif
|
||||
#define __DISABLE_3dNOW__
|
||||
#endif /* __3dNOW__ */
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_femms (void)
|
||||
{
|
||||
__builtin_ia32_femms();
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pavgusb (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2id (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfadd (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpeq (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpge (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpgt (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmax (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmin (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmul (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcp (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit1 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit2 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqrt (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqit1 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsub (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsubr (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fd (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmulhrw (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetch (void *__P)
|
||||
{
|
||||
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_float (float __A)
|
||||
{
|
||||
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
|
||||
}
|
||||
|
||||
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_float (__m64 __A)
|
||||
{
|
||||
union { __v2sf v; float a[2]; } __tmp;
|
||||
__tmp.v = (__v2sf)__A;
|
||||
return __tmp.a[0];
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_3dNOW__
|
||||
#undef __DISABLE_3dNOW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_3dNOW__ */
|
||||
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
|
||||
#pragma GCC push_options
|
||||
#ifdef __x86_64__
|
||||
#pragma GCC target("sse,3dnowa")
|
||||
#else
|
||||
#pragma GCC target("3dnowa")
|
||||
#endif
|
||||
#define __DISABLE_3dNOW_A__
|
||||
#endif /* __3dNOW_A__ */
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2iw (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfnacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfpnacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fw (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pswapd (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_3dNOW_A__
|
||||
#undef __DISABLE_3dNOW_A__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_3dNOW_A__ */
|
||||
|
||||
#endif /* _MM3DNOW_H_INCLUDED */
|
@ -0,0 +1,78 @@
|
||||
/* Copyright (C) 2004-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
#if __STDC_HOSTED__
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
static __inline__ void *
|
||||
_mm_malloc (size_t __size, size_t __align)
|
||||
{
|
||||
void * __malloc_ptr;
|
||||
void * __aligned_ptr;
|
||||
|
||||
/* Error if align is not a power of two. */
|
||||
if (__align & (__align - 1))
|
||||
{
|
||||
#if __STDC_HOSTED__
|
||||
errno = EINVAL;
|
||||
#endif
|
||||
return ((void *) 0);
|
||||
}
|
||||
|
||||
if (__size == 0)
|
||||
return ((void *) 0);
|
||||
|
||||
/* Assume malloc'd pointer is aligned at least to sizeof (void*).
|
||||
If necessary, add another sizeof (void*) to store the value
|
||||
returned by malloc. Effectively this enforces a minimum alignment
|
||||
of sizeof double. */
|
||||
if (__align < 2 * sizeof (void *))
|
||||
__align = 2 * sizeof (void *);
|
||||
|
||||
__malloc_ptr = malloc (__size + __align);
|
||||
if (!__malloc_ptr)
|
||||
return ((void *) 0);
|
||||
|
||||
/* Align We have at least sizeof (void *) space below malloc'd ptr. */
|
||||
__aligned_ptr = (void *) (((size_t) __malloc_ptr + __align)
|
||||
& ~((size_t) (__align) - 1));
|
||||
|
||||
/* Store the original pointer just before p. */
|
||||
((void **) __aligned_ptr)[-1] = __malloc_ptr;
|
||||
|
||||
return __aligned_ptr;
|
||||
}
|
||||
|
||||
static __inline__ void
|
||||
_mm_free (void *__aligned_ptr)
|
||||
{
|
||||
if (__aligned_ptr)
|
||||
free (((void **) __aligned_ptr)[-1]);
|
||||
}
|
||||
|
||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
@ -0,0 +1,965 @@
|
||||
/* Copyright (C) 2002-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.0. */
|
||||
|
||||
#ifndef _MMINTRIN_H_INCLUDED
|
||||
#define _MMINTRIN_H_INCLUDED
|
||||
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
|
||||
#pragma GCC push_options
|
||||
#ifdef __MMX_WITH_SSE__
|
||||
#pragma GCC target("sse2")
|
||||
#elif defined __x86_64__
|
||||
#pragma GCC target("sse,mmx")
|
||||
#else
|
||||
#pragma GCC target("mmx")
|
||||
#endif
|
||||
#define __DISABLE_MMX__
|
||||
#endif /* __MMX__ */
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
|
||||
typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
|
||||
typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
|
||||
|
||||
/* Unaligned version of the same type */
|
||||
typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
|
||||
typedef int __m32_u __attribute__ ((__vector_size__ (4), \
|
||||
__may_alias__, __aligned__ (1)));
|
||||
typedef short __m16_u __attribute__ ((__vector_size__ (2), \
|
||||
__may_alias__, __aligned__ (1)));
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef int __v2si __attribute__ ((__vector_size__ (8)));
|
||||
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
|
||||
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
|
||||
typedef long long __v1di __attribute__ ((__vector_size__ (8)));
|
||||
typedef float __v2sf __attribute__ ((__vector_size__ (8)));
|
||||
|
||||
/* Empty the multimedia state. */
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_empty (void)
|
||||
{
|
||||
__builtin_ia32_emms ();
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_empty (void)
|
||||
{
|
||||
_mm_empty ();
|
||||
}
|
||||
|
||||
/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi32_si64 (int __i)
|
||||
{
|
||||
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_int (int __i)
|
||||
{
|
||||
return _mm_cvtsi32_si64 (__i);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/* Convert I to a __m64 object. */
|
||||
|
||||
/* Intel intrinsic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_int64 (long long __i)
|
||||
{
|
||||
return (__m64) __i;
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64_m64 (long long __i)
|
||||
{
|
||||
return (__m64) __i;
|
||||
}
|
||||
|
||||
/* Microsoft intrinsic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64x_si64 (long long __i)
|
||||
{
|
||||
return (__m64) __i;
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi64x (long long __i)
|
||||
{
|
||||
return (__m64) __i;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Convert the lower 32 bits of the __m64 object into an integer. */
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64_si32 (__m64 __i)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_int (__m64 __i)
|
||||
{
|
||||
return _mm_cvtsi64_si32 (__i);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/* Convert the __m64 object to a 64bit integer. */
|
||||
|
||||
/* Intel intrinsic. */
|
||||
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_int64 (__m64 __i)
|
||||
{
|
||||
return (long long)__i;
|
||||
}
|
||||
|
||||
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtm64_si64 (__m64 __i)
|
||||
{
|
||||
return (long long)__i;
|
||||
}
|
||||
|
||||
/* Microsoft intrinsic. */
|
||||
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtsi64_si64x (__m64 __i)
|
||||
{
|
||||
return (long long)__i;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
|
||||
the result, and the four 16-bit values from M2 into the upper four 8-bit
|
||||
values of the result, all with signed saturation. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_packsswb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_packs_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
|
||||
the result, and the two 32-bit values from M2 into the upper two 16-bit
|
||||
values of the result, all with signed saturation. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_packssdw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_packs_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
|
||||
the result, and the four 16-bit values from M2 into the upper four 8-bit
|
||||
values of the result, all with unsigned saturation. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_packuswb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_packs_pu16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Interleave the four 8-bit values from the high half of M1 with the four
|
||||
8-bit values from the high half of M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckhbw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_unpackhi_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Interleave the two 16-bit values from the high half of M1 with the two
|
||||
16-bit values from the high half of M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckhwd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_unpackhi_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Interleave the 32-bit value from the high half of M1 with the 32-bit
|
||||
value from the high half of M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckhdq (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_unpackhi_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Interleave the four 8-bit values from the low half of M1 with the four
|
||||
8-bit values from the low half of M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpcklbw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_unpacklo_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Interleave the two 16-bit values from the low half of M1 with the two
|
||||
16-bit values from the low half of M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpcklwd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_unpacklo_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Interleave the 32-bit value from the low half of M1 with the 32-bit
|
||||
value from the low half of M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_punpckldq (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_unpacklo_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 8-bit values in M1 to the 8-bit values in M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_add_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 16-bit values in M1 to the 16-bit values in M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_add_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 32-bit values in M1 to the 32-bit values in M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_add_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
|
||||
#ifndef __SSE2__
|
||||
#pragma GCC push_options
|
||||
#ifdef __MMX_WITH_SSE__
|
||||
#pragma GCC target("sse2")
|
||||
#else
|
||||
#pragma GCC target("sse2,mmx")
|
||||
#endif
|
||||
#define __DISABLE_SSE2__
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_add_si64 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
#ifdef __DISABLE_SSE2__
|
||||
#undef __DISABLE_SSE2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE2__ */
|
||||
|
||||
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
|
||||
saturated arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddsb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_adds_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
|
||||
saturated arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddsw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_adds_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
|
||||
saturated arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddusb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_adds_pu8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
|
||||
saturated arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_paddusw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_adds_pu16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_sub_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_sub_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_sub_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
|
||||
#ifndef __SSE2__
|
||||
#pragma GCC push_options
|
||||
#ifdef __MMX_WITH_SSE__
|
||||
#pragma GCC target("sse2")
|
||||
#else
|
||||
#pragma GCC target("sse2,mmx")
|
||||
#endif
|
||||
#define __DISABLE_SSE2__
|
||||
#endif /* __SSE2__ */
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sub_si64 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
|
||||
}
|
||||
#ifdef __DISABLE_SSE2__
|
||||
#undef __DISABLE_SSE2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE2__ */
|
||||
|
||||
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
|
||||
saturating arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubsb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_subs_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
|
||||
signed saturating arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubsw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_subs_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
|
||||
unsigned saturating arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubusb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_subs_pu8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
|
||||
unsigned saturating arithmetic. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psubusw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_subs_pu16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
|
||||
four 32-bit intermediate results, which are then summed by pairs to
|
||||
produce two 32-bit results. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmaddwd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_madd_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
|
||||
M2 and produce the high 16 bits of the 32-bit results. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmulhw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_mulhi_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
|
||||
the low 16 bits of the results. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmullw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_mullo_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Shift four 16-bit values in M left by COUNT. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sll_pi16 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllw (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_sll_pi16 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_slli_pi16 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllwi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_slli_pi16 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift two 32-bit values in M left by COUNT. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sll_pi32 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pslld (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_sll_pi32 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_slli_pi32 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pslldi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_slli_pi32 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift the 64-bit value in M left by COUNT. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sll_si64 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllq (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_sll_si64 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_slli_si64 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psllqi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_slli_si64 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sra_pi16 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psraw (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_sra_pi16 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srai_pi16 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrawi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_srai_pi16 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sra_pi32 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrad (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_sra_pi32 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srai_pi32 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psradi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_srai_pi32 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srl_pi16 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlw (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_srl_pi16 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srli_pi16 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlwi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_srli_pi16 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srl_pi32 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrld (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_srl_pi32 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srli_pi32 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrldi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_srli_pi32 (__m, __count);
|
||||
}
|
||||
|
||||
/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srl_si64 (__m64 __m, __m64 __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlq (__m64 __m, __m64 __count)
|
||||
{
|
||||
return _mm_srl_si64 (__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_srli_si64 (__m64 __m, int __count)
|
||||
{
|
||||
return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_psrlqi (__m64 __m, int __count)
|
||||
{
|
||||
return _mm_srli_si64 (__m, __count);
|
||||
}
|
||||
|
||||
/* Bit-wise AND the 64-bit values in M1 and M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_and_si64 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return __builtin_ia32_pand (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pand (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_and_si64 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
|
||||
64-bit value in M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return __builtin_ia32_pandn (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pandn (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_andnot_si64 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_or_si64 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return __builtin_ia32_por (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_por (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_or_si64 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_xor_si64 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return __builtin_ia32_pxor (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pxor (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_xor_si64 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
|
||||
test is true and zero if false. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpeqb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_cmpeq_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpgtb (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_cmpgt_pi8 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
|
||||
the test is true and zero if false. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpeqw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_cmpeq_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpgtw (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_cmpgt_pi16 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
|
||||
the test is true and zero if false. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpeqd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_cmpeq_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pcmpgtd (__m64 __m1, __m64 __m2)
|
||||
{
|
||||
return _mm_cmpgt_pi32 (__m1, __m2);
|
||||
}
|
||||
|
||||
/* Creates a 64-bit zero. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setzero_si64 (void)
|
||||
{
|
||||
return (__m64)0LL;
|
||||
}
|
||||
|
||||
/* Creates a vector of two 32-bit values; I0 is least significant. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi32 (int __i1, int __i0)
|
||||
{
|
||||
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
|
||||
}
|
||||
|
||||
/* Creates a vector of four 16-bit values; W0 is least significant. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
|
||||
{
|
||||
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
|
||||
}
|
||||
|
||||
/* Creates a vector of eight 8-bit values; B0 is least significant. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
|
||||
char __b3, char __b2, char __b1, char __b0)
|
||||
{
|
||||
return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
|
||||
__b4, __b5, __b6, __b7);
|
||||
}
|
||||
|
||||
/* Similar, but with the arguments in reverse order. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setr_pi32 (int __i0, int __i1)
|
||||
{
|
||||
return _mm_set_pi32 (__i1, __i0);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
|
||||
{
|
||||
return _mm_set_pi16 (__w3, __w2, __w1, __w0);
|
||||
}
|
||||
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
|
||||
char __b4, char __b5, char __b6, char __b7)
|
||||
{
|
||||
return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
||||
}
|
||||
|
||||
/* Creates a vector of two 32-bit values, both elements containing I. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set1_pi32 (int __i)
|
||||
{
|
||||
return _mm_set_pi32 (__i, __i);
|
||||
}
|
||||
|
||||
/* Creates a vector of four 16-bit values, all elements containing W. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set1_pi16 (short __w)
|
||||
{
|
||||
return _mm_set_pi16 (__w, __w, __w, __w);
|
||||
}
|
||||
|
||||
/* Creates a vector of eight 8-bit values, all elements containing B. */
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_set1_pi8 (char __b)
|
||||
{
|
||||
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
|
||||
}
|
||||
#ifdef __DISABLE_MMX__
|
||||
#undef __DISABLE_MMX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MMX__ */
|
||||
|
||||
#endif /* _MMINTRIN_H_INCLUDED */
|
@ -0,0 +1,74 @@
|
||||
/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <movdirintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _MOVDIRINTRIN_H_INCLUDED
|
||||
#define _MOVDIRINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __MOVDIRI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("movdiri")
|
||||
#define __DISABLE_MOVDIRI__
|
||||
#endif /* __MOVDIRI__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_directstoreu_u32 (void * __P, unsigned int __A)
|
||||
{
|
||||
__builtin_ia32_directstoreu_u32 ((unsigned int *)__P, __A);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_directstoreu_u64 (void * __P, unsigned long long __A)
|
||||
{
|
||||
__builtin_ia32_directstoreu_u64 ((unsigned long long *)__P, __A);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_MOVDIRI__
|
||||
#undef __DISABLE_MOVDIRI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MOVDIRI__ */
|
||||
|
||||
#ifndef __MOVDIR64B__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("movdir64b")
|
||||
#define __DISABLE_MOVDIR64B__
|
||||
#endif /* __MOVDIR64B__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_movdir64b (void * __P, const void * __Q)
|
||||
{
|
||||
__builtin_ia32_movdir64b (__P, __Q);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_MOVDIR64B__
|
||||
#undef __DISABLE_MOVDIR64B__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MOVDIR64B__ */
|
||||
#endif /* _MOVDIRINTRIN_H_INCLUDED. */
|
@ -0,0 +1,52 @@
|
||||
/* Copyright (C) 2021-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _MWAITINTRIN_H_INCLUDED
|
||||
#define _MWAITINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __MWAIT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("mwait")
|
||||
#define __DISABLE_MWAIT__
|
||||
#endif /* __MWAIT__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_monitor (__P, __E, __H);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mwait (unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_mwait (__E, __H);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_MWAIT__
|
||||
#undef __DISABLE_MWAIT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MWAIT__ */
|
||||
|
||||
#endif /* _MWAITINTRIN_H_INCLUDED */
|
@ -0,0 +1,50 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _MWAITXINTRIN_H_INCLUDED
|
||||
#define _MWAITXINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __MWAITX__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("mwaitx")
|
||||
#define __DISABLE_MWAITX__
|
||||
#endif /* __MWAITX__ */
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_monitorx (__P, __E, __H);
|
||||
}
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
|
||||
{
|
||||
__builtin_ia32_mwaitx (__E, __H, __C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_MWAITX__
|
||||
#undef __DISABLE_MWAITX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MWAITX__ */
|
||||
|
||||
#endif /* _MWAITXINTRIN_H_INCLUDED */
|
@ -0,0 +1,33 @@
|
||||
/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 10.0. */
|
||||
|
||||
#ifndef _NMMINTRIN_H_INCLUDED
|
||||
#define _NMMINTRIN_H_INCLUDED
|
||||
|
||||
/* We just include SSE4.1 header file. */
|
||||
#include <smmintrin.h>
|
||||
|
||||
#endif /* _NMMINTRIN_H_INCLUDED */
|
@ -0,0 +1,78 @@
|
||||
/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <pconfigintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _PCONFIGINTRIN_H_INCLUDED
|
||||
#define _PCONFIGINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __PCONFIG__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("pconfig")
|
||||
#define __DISABLE_PCONFIG__
|
||||
#endif /* __PCONFIG__ */
|
||||
|
||||
#define __pconfig_b(leaf, b, retval) \
|
||||
__asm__ __volatile__ ("pconfig\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "b" (b) \
|
||||
: "cc")
|
||||
|
||||
#define __pconfig_generic(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__ ("pconfig\n\t" \
|
||||
: "=a" (retval), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "a" (leaf), "b" (b), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pconfig_u32 (const unsigned int __L, size_t __D[])
|
||||
{
|
||||
enum __pconfig_type
|
||||
{
|
||||
__PCONFIG_KEY_PROGRAM = 0x01,
|
||||
};
|
||||
|
||||
unsigned int __R = 0;
|
||||
|
||||
if (!__builtin_constant_p (__L))
|
||||
__pconfig_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
else switch (__L)
|
||||
{
|
||||
case __PCONFIG_KEY_PROGRAM:
|
||||
__pconfig_b (__L, __D[0], __R);
|
||||
break;
|
||||
default:
|
||||
__pconfig_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
}
|
||||
return __R;
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_PCONFIG__
|
||||
#undef __DISABLE_PCONFIG__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PCONFIG__ */
|
||||
|
||||
#endif /* _PCONFIGINTRIN_H_INCLUDED */
|
@ -0,0 +1,56 @@
|
||||
/* Copyright (C) 2015-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <pkuintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _PKUINTRIN_H_INCLUDED
|
||||
#define _PKUINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __PKU__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("pku")
|
||||
#define __DISABLE_PKU__
|
||||
#endif /* __PKU__ */
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdpkru_u32 (void)
|
||||
{
|
||||
return __builtin_ia32_rdpkru ();
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrpkru (unsigned int __key)
|
||||
{
|
||||
__builtin_ia32_wrpkru (__key);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_PKU__
|
||||
#undef __DISABLE_PKU__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PKU__ */
|
||||
|
||||
#endif /* _PKUINTRIN_H_INCLUDED */
|
@ -0,0 +1,121 @@
|
||||
/* Copyright (C) 2003-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 9.0. */
|
||||
|
||||
#ifndef _PMMINTRIN_H_INCLUDED
|
||||
#define _PMMINTRIN_H_INCLUDED
|
||||
|
||||
/* We need definitions from the SSE2 and SSE header files*/
|
||||
#include <emmintrin.h>
|
||||
#include <mwaitintrin.h>
|
||||
|
||||
#ifndef __SSE3__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse3")
|
||||
#define __DISABLE_SSE3__
|
||||
#endif /* __SSE3__ */
|
||||
|
||||
/* Additional bits in the MXCSR. */
|
||||
#define _MM_DENORMALS_ZERO_MASK 0x0040
|
||||
#define _MM_DENORMALS_ZERO_ON 0x0040
|
||||
#define _MM_DENORMALS_ZERO_OFF 0x0000
|
||||
|
||||
#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
|
||||
_mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
|
||||
#define _MM_GET_DENORMALS_ZERO_MODE() \
|
||||
(_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movehdup_ps (__m128 __X)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_moveldup_ps (__m128 __X)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loaddup_pd (double const *__P)
|
||||
{
|
||||
return _mm_load1_pd (__P);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movedup_pd (__m128d __X)
|
||||
{
|
||||
return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_lddqu_si128 (__m128i const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE3__
|
||||
#undef __DISABLE_SSE3__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE3__ */
|
||||
|
||||
#endif /* _PMMINTRIN_H_INCLUDED */
|
@ -0,0 +1,53 @@
|
||||
/* Copyright (C) 2009-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _POPCNTINTRIN_H_INCLUDED
|
||||
#define _POPCNTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __POPCNT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("popcnt")
|
||||
#define __DISABLE_POPCNT__
|
||||
#endif /* __POPCNT__ */
|
||||
|
||||
/* Calculate a number of bits set to 1. */
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_popcount (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_popcountll (__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_POPCNT__
|
||||
#undef __DISABLE_POPCNT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_POPCNT__ */
|
||||
|
||||
#endif /* _POPCNTINTRIN_H_INCLUDED */
|
@ -0,0 +1,61 @@
|
||||
/* Copyright (C) 2022-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <prfchiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _PRFCHIINTRIN_H_INCLUDED
|
||||
#define _PRFCHIINTRIN_H_INCLUDED
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
|
||||
#ifndef __PREFETCHI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("prefetchi")
|
||||
#define __DISABLE_PREFETCHI__
|
||||
#endif /* __PREFETCHI__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetchit0 (void* __P)
|
||||
{
|
||||
__builtin_ia32_prefetchi (__P, 3);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetchit1 (void* __P)
|
||||
{
|
||||
__builtin_ia32_prefetchi (__P, 2);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_PREFETCHI__
|
||||
#undef __DISABLE_PREFETCHI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PREFETCHI__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#endif /* _PRFCHIINTRIN_H_INCLUDED */
|
@ -0,0 +1,37 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED && !defined _MM3DNOW_H_INCLUDED
|
||||
# error "Never use <prfchwintrin.h> directly; include <immintrin.h> or <mm3dnow.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _PRFCHWINTRIN_H_INCLUDED
|
||||
#define _PRFCHWINTRIN_H_INCLUDED
|
||||
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetchw (void *__P)
|
||||
{
|
||||
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
|
||||
}
|
||||
|
||||
#endif /* _PRFCHWINTRIN_H_INCLUDED */
|
@ -0,0 +1,100 @@
|
||||
/* Copyright (C) 2019-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
#error "Never use <raointintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif // _X86GPRINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __RAOINTINTRIN_H_INCLUDED
|
||||
#define __RAOINTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __RAOINT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("raoint")
|
||||
#define __DISABLE_RAOINT__
|
||||
#endif /* __RAOINT__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_aadd_i32 (int *__A, int __B)
|
||||
{
|
||||
__builtin_ia32_aadd32 ((int *)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_aand_i32 (int *__A, int __B)
|
||||
{
|
||||
__builtin_ia32_aand32 ((int *)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_aor_i32 (int *__A, int __B)
|
||||
{
|
||||
__builtin_ia32_aor32 ((int *)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_axor_i32 (int *__A, int __B)
|
||||
{
|
||||
__builtin_ia32_axor32 ((int *)__A, __B);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_aadd_i64 (long long *__A, long long __B)
|
||||
{
|
||||
__builtin_ia32_aadd64 ((long long *)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_aand_i64 (long long *__A, long long __B)
|
||||
{
|
||||
__builtin_ia32_aand64 ((long long *)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_aor_i64 (long long *__A, long long __B)
|
||||
{
|
||||
__builtin_ia32_aor64 ((long long *)__A, __B);
|
||||
}
|
||||
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_axor_i64 (long long *__A, long long __B)
|
||||
{
|
||||
__builtin_ia32_axor64 ((long long *)__A, __B);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifdef __DISABLE_RAOINT__
|
||||
#undef __DISABLE_RAOINT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RAOINT__ */
|
||||
|
||||
#endif /* __RAOINTINTRIN_H_INCLUDED */
|
@ -0,0 +1,66 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <rdseedintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _RDSEEDINTRIN_H_INCLUDED
|
||||
#define _RDSEEDINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __RDSEED__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdseed")
|
||||
#define __DISABLE_RDSEED__
|
||||
#endif /* __RDSEED__ */
|
||||
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdseed16_step (unsigned short *__p)
|
||||
{
|
||||
return __builtin_ia32_rdseed_hi_step (__p);
|
||||
}
|
||||
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdseed32_step (unsigned int *__p)
|
||||
{
|
||||
return __builtin_ia32_rdseed_si_step (__p);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdseed64_step (unsigned long long *__p)
|
||||
{
|
||||
return __builtin_ia32_rdseed_di_step (__p);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_RDSEED__
|
||||
#undef __DISABLE_RDSEED__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDSEED__ */
|
||||
|
||||
#endif /* _RDSEEDINTRIN_H_INCLUDED */
|
@ -0,0 +1,84 @@
|
||||
/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <rtmintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _RTMINTRIN_H_INCLUDED
|
||||
#define _RTMINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __RTM__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rtm")
|
||||
#define __DISABLE_RTM__
|
||||
#endif /* __RTM__ */
|
||||
|
||||
#define _XBEGIN_STARTED (~0u)
|
||||
#define _XABORT_EXPLICIT (1 << 0)
|
||||
#define _XABORT_RETRY (1 << 1)
|
||||
#define _XABORT_CONFLICT (1 << 2)
|
||||
#define _XABORT_CAPACITY (1 << 3)
|
||||
#define _XABORT_DEBUG (1 << 4)
|
||||
#define _XABORT_NESTED (1 << 5)
|
||||
#define _XABORT_CODE(x) (((x) >> 24) & 0xFF)
|
||||
|
||||
/* Start an RTM code region. Return _XBEGIN_STARTED on success and the
|
||||
abort condition otherwise. */
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xbegin (void)
|
||||
{
|
||||
return __builtin_ia32_xbegin ();
|
||||
}
|
||||
|
||||
/* Specify the end of an RTM code region. If it corresponds to the
|
||||
outermost transaction, then attempts the transaction commit. If the
|
||||
commit fails, then control is transferred to the outermost transaction
|
||||
fallback handler. */
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xend (void)
|
||||
{
|
||||
__builtin_ia32_xend ();
|
||||
}
|
||||
|
||||
/* Force an RTM abort condition. The control is transferred to the
|
||||
outermost transaction fallback handler with the abort condition IMM. */
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_xabort (const unsigned int __imm)
|
||||
{
|
||||
__builtin_ia32_xabort (__imm);
|
||||
}
|
||||
#else
|
||||
#define _xabort(N) __builtin_ia32_xabort (N)
|
||||
#endif /* __OPTIMIZE__ */
|
||||
|
||||
#ifdef __DISABLE_RTM__
|
||||
#undef __DISABLE_RTM__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RTM__ */
|
||||
|
||||
#endif /* _RTMINTRIN_H_INCLUDED */
|
@ -0,0 +1,49 @@
|
||||
/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <serializeintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _SERIALIZE_H_INCLUDED
|
||||
#define _SERIALIZE_H_INCLUDED
|
||||
|
||||
#ifndef __SERIALIZE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("serialize")
|
||||
#define __DISABLE_SERIALIZE__
|
||||
#endif /* __SERIALIZE__ */
|
||||
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_serialize (void)
|
||||
{
|
||||
__builtin_ia32_serialize ();
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SERIALIZE__
|
||||
#undef __DISABLE_SERIALIZE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SERIALIZE__ */
|
||||
|
||||
#endif /* _SERIALIZE_H_INCLUDED. */
|
@ -0,0 +1,253 @@
|
||||
/* Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _SGXINTRIN_H_INCLUDED
|
||||
#define _SGXINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SGX__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sgx")
|
||||
#define __DISABLE_SGX__
|
||||
#endif /* __SGX__ */
|
||||
|
||||
#define __encls_bc(leaf, b, c, retval) \
|
||||
__asm__ __volatile__ ("encls\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "b" (b), "c" (c) \
|
||||
: "cc")
|
||||
|
||||
#define __encls_bcd(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__("encls\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "b" (b), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
#define __encls_c(leaf, c, retval) \
|
||||
__asm__ __volatile__("encls\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "c" (c) \
|
||||
: "cc")
|
||||
|
||||
#define __encls_edbgrd(leaf, b, c, retval) \
|
||||
__asm__ __volatile__("encls\n\t" \
|
||||
: "=a" (retval), "=b" (b) \
|
||||
: "a" (leaf), "c" (c))
|
||||
|
||||
#define __encls_generic(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__("encls\n\t" \
|
||||
: "=a" (retval), "=b" (b), "=c" (c), "=d" (d)\
|
||||
: "a" (leaf), "b" (b), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
#define __enclu_bc(leaf, b, c, retval) \
|
||||
__asm__ __volatile__("enclu\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "b" (b), "c" (c) \
|
||||
: "cc")
|
||||
|
||||
#define __enclu_bcd(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__("enclu\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "b" (b), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
#define __enclu_eenter(leaf, b, c, retval) \
|
||||
__asm__ __volatile__("enclu\n\t" \
|
||||
: "=a" (retval), "=c" (c) \
|
||||
: "a" (leaf), "b" (b), "c" (c) \
|
||||
: "cc")
|
||||
|
||||
#define __enclu_eexit(leaf, b, c, retval) \
|
||||
__asm__ __volatile__("enclu\n\t" \
|
||||
: "=a" (retval), "=c" (c) \
|
||||
: "a" (leaf), "b" (b) \
|
||||
: "cc")
|
||||
|
||||
#define __enclu_generic(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__("enclu\n\t" \
|
||||
: "=a" (retval), "=b" (b), "=c" (c), "=d" (d)\
|
||||
: "a" (leaf), "b" (b), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
#define __enclv_bc(leaf, b, c, retval) \
|
||||
__asm__ __volatile__("enclv\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "b" (b), "c" (c) \
|
||||
: "cc")
|
||||
|
||||
#define __enclv_cd(leaf, c, d, retval) \
|
||||
__asm__ __volatile__("enclv\n\t" \
|
||||
: "=a" (retval) \
|
||||
: "a" (leaf), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
#define __enclv_generic(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__("enclv\n\t" \
|
||||
: "=a" (retval), "=b" (b), "=c" (b), "=d" (d)\
|
||||
: "a" (leaf), "b" (b), "c" (c), "d" (d) \
|
||||
: "cc")
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_encls_u32 (const unsigned int __L, size_t __D[])
|
||||
{
|
||||
enum __encls_type
|
||||
{
|
||||
__SGX_ECREATE = 0x00,
|
||||
__SGX_EADD = 0x01,
|
||||
__SGX_EINIT = 0x02,
|
||||
__SGX_EREMOVE = 0x03,
|
||||
__SGX_EDBGRD = 0x04,
|
||||
__SGX_EDBGWR = 0x05,
|
||||
__SGX_EEXTEND = 0x06,
|
||||
__SGX_ELDB = 0x07,
|
||||
__SGX_ELDU = 0x08,
|
||||
__SGX_EBLOCK = 0x09,
|
||||
__SGX_EPA = 0x0A,
|
||||
__SGX_EWB = 0x0B,
|
||||
__SGX_ETRACK = 0x0C,
|
||||
__SGX_EAUG = 0x0D,
|
||||
__SGX_EMODPR = 0x0E,
|
||||
__SGX_EMODT = 0x0F,
|
||||
__SGX_ERDINFO = 0x10,
|
||||
__SGX_ETRACKC = 0x11,
|
||||
__SGX_ELDBC = 0x12,
|
||||
__SGX_ELDUC = 0x13
|
||||
};
|
||||
enum __encls_type __T = (enum __encls_type)__L;
|
||||
unsigned int __R = 0;
|
||||
if (!__builtin_constant_p (__T))
|
||||
__encls_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
else switch (__T)
|
||||
{
|
||||
case __SGX_ECREATE:
|
||||
case __SGX_EADD:
|
||||
case __SGX_EDBGWR:
|
||||
case __SGX_EEXTEND:
|
||||
case __SGX_EPA:
|
||||
case __SGX_EMODPR:
|
||||
case __SGX_EMODT:
|
||||
case __SGX_EAUG:
|
||||
case __SGX_ERDINFO:
|
||||
__encls_bc (__L, __D[0], __D[1], __R);
|
||||
break;
|
||||
case __SGX_EINIT:
|
||||
case __SGX_ELDB:
|
||||
case __SGX_ELDU:
|
||||
case __SGX_EWB:
|
||||
case __SGX_ELDBC:
|
||||
case __SGX_ELDUC:
|
||||
__encls_bcd (__L, __D[0], __D[1], __D[2], __R);
|
||||
break;
|
||||
case __SGX_EREMOVE:
|
||||
case __SGX_EBLOCK:
|
||||
case __SGX_ETRACK:
|
||||
case __SGX_ETRACKC:
|
||||
__encls_c (__L, __D[1], __R);
|
||||
break;
|
||||
case __SGX_EDBGRD:
|
||||
__encls_edbgrd (__L, __D[0], __D[1], __R);
|
||||
break;
|
||||
default:
|
||||
__encls_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
}
|
||||
return __R;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enclu_u32 (const unsigned int __L, size_t __D[])
|
||||
{
|
||||
enum __enclu_type
|
||||
{
|
||||
__SGX_EREPORT = 0x00,
|
||||
__SGX_EGETKEY = 0x01,
|
||||
__SGX_EENTER = 0x02,
|
||||
__SGX_ERESUME = 0x03,
|
||||
__SGX_EEXIT = 0x04,
|
||||
__SGX_EACCEPT = 0x05,
|
||||
__SGX_EMODPE = 0x06,
|
||||
__SGX_EACCEPTCOPY = 0x07
|
||||
};
|
||||
enum __enclu_type __T = (enum __enclu_type) __L;
|
||||
unsigned int __R = 0;
|
||||
if (!__builtin_constant_p (__T))
|
||||
__enclu_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
else switch (__T)
|
||||
{
|
||||
case __SGX_EREPORT:
|
||||
case __SGX_EACCEPTCOPY:
|
||||
__enclu_bcd (__L, __D[0], __D[1], __D[2], __R);
|
||||
break;
|
||||
case __SGX_EGETKEY:
|
||||
case __SGX_ERESUME:
|
||||
case __SGX_EACCEPT:
|
||||
case __SGX_EMODPE:
|
||||
__enclu_bc (__L, __D[0], __D[1], __R);
|
||||
break;
|
||||
case __SGX_EENTER:
|
||||
__enclu_eenter (__L, __D[0], __D[1], __R);
|
||||
break;
|
||||
case __SGX_EEXIT:
|
||||
__enclu_eexit (__L, __D[0], __D[1], __R);
|
||||
break;
|
||||
default:
|
||||
__enclu_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
}
|
||||
return __R;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enclv_u32 (const unsigned int __L, size_t __D[])
|
||||
{
|
||||
enum __enclv_type
|
||||
{
|
||||
__SGX_EDECVIRTCHILD = 0x00,
|
||||
__SGX_EINCVIRTCHILD = 0x01,
|
||||
__SGX_ESETCONTEXT = 0x02
|
||||
};
|
||||
unsigned int __R = 0;
|
||||
if (!__builtin_constant_p (__L))
|
||||
__enclv_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
else switch (__L)
|
||||
{
|
||||
case __SGX_EDECVIRTCHILD:
|
||||
case __SGX_EINCVIRTCHILD:
|
||||
__enclv_bc (__L, __D[0], __D[1], __R);
|
||||
break;
|
||||
case __SGX_ESETCONTEXT:
|
||||
__enclv_cd (__L, __D[1], __D[2], __R);
|
||||
break;
|
||||
default:
|
||||
__enclv_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
}
|
||||
return __R;
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SGX__
|
||||
#undef __DISABLE_SGX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SGX__ */
|
||||
|
||||
#endif /* _SGXINTRIN_H_INCLUDED */
|
@ -0,0 +1,98 @@
|
||||
/* Copyright (C) 2013-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <shaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _SHAINTRIN_H_INCLUDED
|
||||
#define _SHAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SHA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sha")
|
||||
#define __DISABLE_SHA__
|
||||
#endif /* __SHA__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1msg1_epu32 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha1msg1 ((__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1msg2_epu32 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha1msg2 ((__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1nexte_epu32 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha1nexte ((__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha1rnds4_epu32 (__m128i __A, __m128i __B, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha1rnds4 ((__v4si) __A, (__v4si) __B, __I);
|
||||
}
|
||||
#else
|
||||
#define _mm_sha1rnds4_epu32(A, B, I) \
|
||||
((__m128i) __builtin_ia32_sha1rnds4 ((__v4si)(__m128i)(A), \
|
||||
(__v4si)(__m128i)(B), (int)(I)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha256msg1_epu32 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha256msg1 ((__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha256msg2_epu32 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha256msg2 ((__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_sha256rnds2_epu32 (__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_sha256rnds2 ((__v4si) __A, (__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SHA__
|
||||
#undef __DISABLE_SHA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SHA__ */
|
||||
|
||||
#endif /* _SHAINTRIN_H_INCLUDED */
|
@ -0,0 +1,852 @@
|
||||
/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 10.0. */
|
||||
|
||||
#ifndef _SMMINTRIN_H_INCLUDED
|
||||
#define _SMMINTRIN_H_INCLUDED
|
||||
|
||||
/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
|
||||
files. */
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.1")
|
||||
#define __DISABLE_SSE4_1__
|
||||
#endif /* __SSE4_1__ */
|
||||
|
||||
/* Rounding mode macros. */
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
#define _MM_FROUND_TO_NEG_INF 0x01
|
||||
#define _MM_FROUND_TO_POS_INF 0x02
|
||||
#define _MM_FROUND_TO_ZERO 0x03
|
||||
#define _MM_FROUND_CUR_DIRECTION 0x04
|
||||
|
||||
#define _MM_FROUND_RAISE_EXC 0x00
|
||||
#define _MM_FROUND_NO_EXC 0x08
|
||||
|
||||
#define _MM_FROUND_NINT \
|
||||
(_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_FLOOR \
|
||||
(_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_CEIL \
|
||||
(_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_TRUNC \
|
||||
(_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_RINT \
|
||||
(_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NEARBYINT \
|
||||
(_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
|
||||
|
||||
/* Test Instruction */
|
||||
/* Packed integer 128-bit bitwise comparison. Return 1 if
|
||||
(__V & __M) == 0. */
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testz_si128 (__m128i __M, __m128i __V)
|
||||
{
|
||||
return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/* Packed integer 128-bit bitwise comparison. Return 1 if
|
||||
(__V & ~__M) == 0. */
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testc_si128 (__m128i __M, __m128i __V)
|
||||
{
|
||||
return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/* Packed integer 128-bit bitwise comparison. Return 1 if
|
||||
(__V & __M) != 0 && (__V & ~__M) != 0. */
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_testnzc_si128 (__m128i __M, __m128i __V)
|
||||
{
|
||||
return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/* Macros for packed integer 128-bit comparison intrinsics. */
|
||||
#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
|
||||
|
||||
#define _mm_test_all_ones(V) \
|
||||
_mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
|
||||
|
||||
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
|
||||
|
||||
/* Packed/scalar double precision floating point rounding. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_pd (__m128d __V, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_sd(__m128d __D, __m128d __V, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
|
||||
(__v2df)__V,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_round_pd(V, M) \
|
||||
((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
|
||||
|
||||
#define _mm_round_sd(D, V, M) \
|
||||
((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \
|
||||
(__v2df)(__m128d)(V), (int)(M)))
|
||||
#endif
|
||||
|
||||
/* Packed/scalar single precision floating point rounding. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_ps (__m128 __V, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_round_ss (__m128 __D, __m128 __V, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
|
||||
(__v4sf)__V,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_round_ps(V, M) \
|
||||
((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
|
||||
|
||||
#define _mm_round_ss(D, V, M) \
|
||||
((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \
|
||||
(__v4sf)(__m128)(V), (int)(M)))
|
||||
#endif
|
||||
|
||||
/* Macros for ceil/floor intrinsics. */
|
||||
#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
/* SSE4.1 */
|
||||
|
||||
/* Integer blend instructions - select data from 2 sources using
|
||||
constant/variable mask. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
|
||||
(__v8hi)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_blend_epi16(X, Y, M) \
|
||||
((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X), \
|
||||
(__v8hi)(__m128i)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
(__v16qi)__M);
|
||||
}
|
||||
|
||||
/* Single precision floating point blend instructions - select data
|
||||
from 2 sources using constant/variable mask. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_blend_ps(X, Y, M) \
|
||||
((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X), \
|
||||
(__v4sf)(__m128)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
(__v4sf)__M);
|
||||
}
|
||||
|
||||
/* Double precision floating point blend instructions - select data
|
||||
from 2 sources using constant/variable mask. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_blend_pd(X, Y, M) \
|
||||
((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
(__v2df)__M);
|
||||
}
|
||||
|
||||
/* Dot product instructions with mask-defined summing and zeroing parts
|
||||
of result. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_dp_ps(X, Y, M) \
|
||||
((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X), \
|
||||
(__v4sf)(__m128)(Y), (int)(M)))
|
||||
|
||||
#define _mm_dp_pd(X, Y, M) \
|
||||
((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
/* Packed integer 64-bit comparison, zeroing or filling with ones
|
||||
corresponding parts of result. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) ((__v2di)__X == (__v2di)__Y);
|
||||
}
|
||||
|
||||
/* Min/max packed integer instructions. */
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epu16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epu16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_min_epu32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_max_epu32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Packed integer 32-bit multiplication with truncation of upper
|
||||
halves of results. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mullo_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) ((__v4su)__X * (__v4su)__Y);
|
||||
}
|
||||
|
||||
/* Packed integer 32-bit multiplication of 2 pairs of operands
|
||||
with two 64-bit results. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mul_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Insert single precision float into packed single precision array
|
||||
element selected by index N. The bits [7-6] of N define S
|
||||
index, the bits [5-4] define D index, and bits [3-0] define
|
||||
zeroing mask for D. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
|
||||
{
|
||||
return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
|
||||
(__v4sf)__S,
|
||||
__N);
|
||||
}
|
||||
#else
|
||||
#define _mm_insert_ps(D, S, N) \
|
||||
((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D), \
|
||||
(__v4sf)(__m128)(S), (int)(N)))
|
||||
#endif
|
||||
|
||||
/* Helper macro to create the N value for _mm_insert_ps. */
|
||||
#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
|
||||
|
||||
/* Extract binary representation of single precision float from packed
|
||||
single precision array element of X selected by index N. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_ps (__m128 __X, const int __N)
|
||||
{
|
||||
union { int __i; float __f; } __tmp;
|
||||
__tmp.__f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
|
||||
return __tmp.__i;
|
||||
}
|
||||
#else
|
||||
#define _mm_extract_ps(X, N) \
|
||||
(__extension__ \
|
||||
({ \
|
||||
union { int __i; float __f; } __tmp; \
|
||||
__tmp.__f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), \
|
||||
(int)(N)); \
|
||||
__tmp.__i; \
|
||||
}))
|
||||
#endif
|
||||
|
||||
/* Extract binary representation of single precision float into
|
||||
D from packed single precision array element of S selected
|
||||
by index N. */
|
||||
#define _MM_EXTRACT_FLOAT(D, S, N) \
|
||||
{ (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
|
||||
|
||||
/* Extract specified single precision float element into the lower
|
||||
part of __m128. */
|
||||
#define _MM_PICK_OUT_PS(X, N) \
|
||||
_mm_insert_ps (_mm_setzero_ps (), (X), \
|
||||
_MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
|
||||
|
||||
/* Insert integer, S, into packed integer array element of D
|
||||
selected by index N. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi8 (__m128i __D, int __S, const int __N)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
|
||||
__S, __N);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi32 (__m128i __D, int __S, const int __N)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
|
||||
__S, __N);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
|
||||
__S, __N);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define _mm_insert_epi8(D, S, N) \
|
||||
((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D), \
|
||||
(int)(S), (int)(N)))
|
||||
|
||||
#define _mm_insert_epi32(D, S, N) \
|
||||
((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \
|
||||
(int)(S), (int)(N)))
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_insert_epi64(D, S, N) \
|
||||
((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D), \
|
||||
(long long)(S), (int)(N)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Extract integer from packed integer array element of X selected by
|
||||
index N. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi8 (__m128i __X, const int __N)
|
||||
{
|
||||
return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi32 (__m128i __X, const int __N)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_epi64 (__m128i __X, const int __N)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define _mm_extract_epi8(X, N) \
|
||||
((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
|
||||
#define _mm_extract_epi32(X, N) \
|
||||
((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_extract_epi64(X, N) \
|
||||
((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Return horizontal packed word minimum and its index in bits [15:0]
|
||||
and bits [18:16] respectively. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_minpos_epu16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
/* Packed integer sign-extension. */
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi16_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi32_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi16_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepi8_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
/* Packed integer zero-extension. */
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu16_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu32_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu16_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtepu8_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
/* Pack 8 double words from 2 operands into 8 words of result with
|
||||
unsigned saturation. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_packus_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Sum absolute 8-bit integer difference of adjacent groups of 4
|
||||
byte integers in the first 2 operands. Starting offsets within
|
||||
operands are determined by the 3rd mask operand. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
|
||||
(__v16qi)__Y, __M);
|
||||
}
|
||||
#else
|
||||
#define _mm_mpsadbw_epu8(X, Y, M) \
|
||||
((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#endif
|
||||
|
||||
/* Load double quadword using non-temporal aligned hint. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_load_si128 (__m128i *__X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
|
||||
}
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.2")
|
||||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
/* These macros specify the source data format. */
|
||||
#define _SIDD_UBYTE_OPS 0x00
|
||||
#define _SIDD_UWORD_OPS 0x01
|
||||
#define _SIDD_SBYTE_OPS 0x02
|
||||
#define _SIDD_SWORD_OPS 0x03
|
||||
|
||||
/* These macros specify the comparison operation. */
|
||||
#define _SIDD_CMP_EQUAL_ANY 0x00
|
||||
#define _SIDD_CMP_RANGES 0x04
|
||||
#define _SIDD_CMP_EQUAL_EACH 0x08
|
||||
#define _SIDD_CMP_EQUAL_ORDERED 0x0c
|
||||
|
||||
/* These macros specify the polarity. */
|
||||
#define _SIDD_POSITIVE_POLARITY 0x00
|
||||
#define _SIDD_NEGATIVE_POLARITY 0x10
|
||||
#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
|
||||
#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
|
||||
|
||||
/* These macros specify the output selection in _mm_cmpXstri (). */
|
||||
#define _SIDD_LEAST_SIGNIFICANT 0x00
|
||||
#define _SIDD_MOST_SIGNIFICANT 0x40
|
||||
|
||||
/* These macros specify the output selection in _mm_cmpXstrm (). */
|
||||
#define _SIDD_BIT_MASK 0x00
|
||||
#define _SIDD_UNIT_MASK 0x40
|
||||
|
||||
/* Intrinsics for text/string processing. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_cmpistrm(X, Y, M) \
|
||||
((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#define _mm_cmpistri(X, Y, M) \
|
||||
((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
|
||||
#define _mm_cmpestrm(X, LX, Y, LY, M) \
|
||||
((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X), \
|
||||
(int)(LX), (__v16qi)(__m128i)(Y), \
|
||||
(int)(LY), (int)(M)))
|
||||
#define _mm_cmpestri(X, LX, Y, LY, M) \
|
||||
((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \
|
||||
(__v16qi)(__m128i)(Y), (int)(LY), \
|
||||
(int)(M)))
|
||||
#endif
|
||||
|
||||
/* Intrinsics for text/string processing and reading values of
|
||||
EFlags. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
|
||||
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
|
||||
{
|
||||
return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
|
||||
(__v16qi)__Y, __LY,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_cmpistra(X, Y, M) \
|
||||
((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#define _mm_cmpistrc(X, Y, M) \
|
||||
((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#define _mm_cmpistro(X, Y, M) \
|
||||
((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#define _mm_cmpistrs(X, Y, M) \
|
||||
((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
#define _mm_cmpistrz(X, Y, M) \
|
||||
((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X), \
|
||||
(__v16qi)(__m128i)(Y), (int)(M)))
|
||||
|
||||
#define _mm_cmpestra(X, LX, Y, LY, M) \
|
||||
((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
|
||||
(__v16qi)(__m128i)(Y), (int)(LY), \
|
||||
(int)(M)))
|
||||
#define _mm_cmpestrc(X, LX, Y, LY, M) \
|
||||
((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
|
||||
(__v16qi)(__m128i)(Y), (int)(LY), \
|
||||
(int)(M)))
|
||||
#define _mm_cmpestro(X, LX, Y, LY, M) \
|
||||
((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
|
||||
(__v16qi)(__m128i)(Y), (int)(LY), \
|
||||
(int)(M)))
|
||||
#define _mm_cmpestrs(X, LX, Y, LY, M) \
|
||||
((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
|
||||
(__v16qi)(__m128i)(Y), (int)(LY), \
|
||||
(int)(M)))
|
||||
#define _mm_cmpestrz(X, LX, Y, LY, M) \
|
||||
((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
|
||||
(__v16qi)(__m128i)(Y), (int)(LY), \
|
||||
(int)(M)))
|
||||
#endif
|
||||
|
||||
/* Packed integer 64-bit comparison, zeroing or filling with ones
|
||||
corresponding parts of result. */
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) ((__v2di)__X > (__v2di)__Y);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE4_2__
|
||||
#undef __DISABLE_SSE4_2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_2__ */
|
||||
|
||||
#ifdef __DISABLE_SSE4_1__
|
||||
#undef __DISABLE_SSE4_1__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_1__ */
|
||||
|
||||
#include <popcntintrin.h>
|
||||
|
||||
#ifndef __CRC32__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("crc32")
|
||||
#define __DISABLE_CRC32__
|
||||
#endif /* __CRC32__ */
|
||||
|
||||
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u8 (unsigned int __C, unsigned char __V)
|
||||
{
|
||||
return __builtin_ia32_crc32qi (__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u16 (unsigned int __C, unsigned short __V)
|
||||
{
|
||||
return __builtin_ia32_crc32hi (__C, __V);
|
||||
}
|
||||
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u32 (unsigned int __C, unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_crc32si (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_crc32di (__C, __V);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_CRC32__
|
||||
#undef __DISABLE_CRC32__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CRC32__ */
|
||||
|
||||
#endif /* _SMMINTRIN_H_INCLUDED */
|
@ -0,0 +1,40 @@
|
||||
/* Copyright (C) 2011-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* ISO C1X: 7.15 Alignment <stdalign.h>. */
|
||||
|
||||
#ifndef _STDALIGN_H
|
||||
#define _STDALIGN_H
|
||||
|
||||
#if (!defined __cplusplus \
|
||||
&& !(defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L))
|
||||
|
||||
#define alignas _Alignas
|
||||
#define alignof _Alignof
|
||||
|
||||
#define __alignas_is_defined 1
|
||||
#define __alignof_is_defined 1
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* stdalign.h */
|
@ -0,0 +1,135 @@
|
||||
/* Copyright (C) 1989-2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
* ISO C Standard: 7.15 Variable arguments <stdarg.h>
|
||||
*/
|
||||
|
||||
#ifndef _STDARG_H
|
||||
#ifndef _ANSI_STDARG_H_
|
||||
#ifndef __need___va_list
|
||||
#define _STDARG_H
|
||||
#define _ANSI_STDARG_H_
|
||||
#endif /* not __need___va_list */
|
||||
#undef __need___va_list
|
||||
|
||||
/* Define __gnuc_va_list. */
|
||||
|
||||
#ifndef __GNUC_VA_LIST
|
||||
#define __GNUC_VA_LIST
|
||||
typedef __builtin_va_list __gnuc_va_list;
|
||||
#endif
|
||||
|
||||
/* Define the standard macros for the user,
|
||||
if this invocation was from the user program. */
|
||||
#ifdef _STDARG_H
|
||||
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#define va_start(v, ...) __builtin_va_start(v, 0)
|
||||
#else
|
||||
#define va_start(v,l) __builtin_va_start(v,l)
|
||||
#endif
|
||||
#define va_end(v) __builtin_va_end(v)
|
||||
#define va_arg(v,l) __builtin_va_arg(v,l)
|
||||
#if !defined(__STRICT_ANSI__) || __STDC_VERSION__ + 0 >= 199900L \
|
||||
|| __cplusplus + 0 >= 201103L
|
||||
#define va_copy(d,s) __builtin_va_copy(d,s)
|
||||
#endif
|
||||
#define __va_copy(d,s) __builtin_va_copy(d,s)
|
||||
|
||||
/* Define va_list, if desired, from __gnuc_va_list. */
|
||||
/* We deliberately do not define va_list when called from
|
||||
stdio.h, because ANSI C says that stdio.h is not supposed to define
|
||||
va_list. stdio.h needs to have access to that data type,
|
||||
but must not use that name. It should use the name __gnuc_va_list,
|
||||
which is safe because it is reserved for the implementation. */
|
||||
|
||||
#ifdef _BSD_VA_LIST
|
||||
#undef _BSD_VA_LIST
|
||||
#endif
|
||||
|
||||
#if defined(__svr4__) || (defined(_SCO_DS) && !defined(__VA_LIST))
|
||||
/* SVR4.2 uses _VA_LIST for an internal alias for va_list,
|
||||
so we must avoid testing it and setting it here.
|
||||
SVR4 uses _VA_LIST as a flag in stdarg.h, but we should
|
||||
have no conflict with that. */
|
||||
#ifndef _VA_LIST_
|
||||
#define _VA_LIST_
|
||||
#ifdef __i860__
|
||||
#ifndef _VA_LIST
|
||||
#define _VA_LIST va_list
|
||||
#endif
|
||||
#endif /* __i860__ */
|
||||
typedef __gnuc_va_list va_list;
|
||||
#ifdef _SCO_DS
|
||||
#define __VA_LIST
|
||||
#endif
|
||||
#endif /* _VA_LIST_ */
|
||||
#else /* not __svr4__ || _SCO_DS */
|
||||
|
||||
/* The macro _VA_LIST_ is the same thing used by this file in Ultrix.
|
||||
But on BSD NET2 we must not test or define or undef it.
|
||||
(Note that the comments in NET 2's ansi.h
|
||||
are incorrect for _VA_LIST_--see stdio.h!) */
|
||||
#if !defined (_VA_LIST_) || defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__) || defined(WINNT)
|
||||
/* The macro _VA_LIST_DEFINED is used in Windows NT 3.5 */
|
||||
#ifndef _VA_LIST_DEFINED
|
||||
/* The macro _VA_LIST is used in SCO Unix 3.2. */
|
||||
#ifndef _VA_LIST
|
||||
/* The macro _VA_LIST_T_H is used in the Bull dpx2 */
|
||||
#ifndef _VA_LIST_T_H
|
||||
/* The macro __va_list__ is used by BeOS. */
|
||||
#ifndef __va_list__
|
||||
typedef __gnuc_va_list va_list;
|
||||
#endif /* not __va_list__ */
|
||||
#endif /* not _VA_LIST_T_H */
|
||||
#endif /* not _VA_LIST */
|
||||
#endif /* not _VA_LIST_DEFINED */
|
||||
#if !(defined (__BSD_NET2__) || defined (____386BSD____) || defined (__bsdi__) || defined (__sequent__) || defined (__FreeBSD__))
|
||||
#define _VA_LIST_
|
||||
#endif
|
||||
#ifndef _VA_LIST
|
||||
#define _VA_LIST
|
||||
#endif
|
||||
#ifndef _VA_LIST_DEFINED
|
||||
#define _VA_LIST_DEFINED
|
||||
#endif
|
||||
#ifndef _VA_LIST_T_H
|
||||
#define _VA_LIST_T_H
|
||||
#endif
|
||||
#ifndef __va_list__
|
||||
#define __va_list__
|
||||
#endif
|
||||
|
||||
#endif /* not _VA_LIST_, except on certain systems */
|
||||
|
||||
#endif /* not __svr4__ */
|
||||
|
||||
#if defined __STDC_VERSION__ && __STDC_VERSION__ > 201710L
|
||||
#define __STDC_VERSION_STDARG_H__ 202311L
|
||||
#endif
|
||||
|
||||
#endif /* _STDARG_H */
|
||||
|
||||
#endif /* not _ANSI_STDARG_H_ */
|
||||
#endif /* not _STDARG_H */
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user