|
|
@ -20,6 +20,7 @@ template<class T, template<typename ...> class VT>
|
|
|
|
types::GetLongType<T>
|
|
|
|
types::GetLongType<T>
|
|
|
|
sum(const VT<T>& v) {
|
|
|
|
sum(const VT<T>& v) {
|
|
|
|
types::GetLongType<T> ret = 0;
|
|
|
|
types::GetLongType<T> ret = 0;
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (auto _v : v)
|
|
|
|
for (auto _v : v)
|
|
|
|
ret += _v;
|
|
|
|
ret += _v;
|
|
|
|
return ret;
|
|
|
|
return ret;
|
|
|
@ -32,6 +33,7 @@ double avg(const VT<T>& v) {
|
|
|
|
|
|
|
|
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void sqrt(const VT<T>& v, Ret& ret) {
|
|
|
|
void sqrt(const VT<T>& v, Ret& ret) {
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (uint32_t i = 0; i < v.size; ++i)
|
|
|
|
for (uint32_t i = 0; i < v.size; ++i)
|
|
|
|
ret[i] = sqrt(v[i]);
|
|
|
|
ret[i] = sqrt(v[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -59,6 +61,7 @@ VT<T> truncate(const VT<T>& v, const uint32_t precision) {
|
|
|
|
auto multiplier = pow(10, precision);
|
|
|
|
auto multiplier = pow(10, precision);
|
|
|
|
auto max_truncate = std::numeric_limits<T>::max()/multiplier;
|
|
|
|
auto max_truncate = std::numeric_limits<T>::max()/multiplier;
|
|
|
|
VT<T> ret(v.size);
|
|
|
|
VT<T> ret(v.size);
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (uint32_t i = 0; i < v.size; ++i) { // round or trunc??
|
|
|
|
for (uint32_t i = 0; i < v.size; ++i) { // round or trunc??
|
|
|
|
ret[i] = v[i] < max_truncate ? round(v[i] * multiplier)/multiplier : v[i];
|
|
|
|
ret[i] = v[i] < max_truncate ? round(v[i] * multiplier)/multiplier : v[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -68,6 +71,7 @@ VT<T> truncate(const VT<T>& v, const uint32_t precision) {
|
|
|
|
template <class T, template<typename ...> class VT>
|
|
|
|
template <class T, template<typename ...> class VT>
|
|
|
|
T max(const VT<T>& v) {
|
|
|
|
T max(const VT<T>& v) {
|
|
|
|
T max_v = std::numeric_limits<T>::min();
|
|
|
|
T max_v = std::numeric_limits<T>::min();
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (const auto& _v : v)
|
|
|
|
for (const auto& _v : v)
|
|
|
|
max_v = max_v > _v ? max_v : _v;
|
|
|
|
max_v = max_v > _v ? max_v : _v;
|
|
|
|
return max_v;
|
|
|
|
return max_v;
|
|
|
@ -75,6 +79,7 @@ T max(const VT<T>& v) {
|
|
|
|
template <class T, template<typename ...> class VT>
|
|
|
|
template <class T, template<typename ...> class VT>
|
|
|
|
T min(const VT<T>& v) {
|
|
|
|
T min(const VT<T>& v) {
|
|
|
|
T min_v = std::numeric_limits<T>::max();
|
|
|
|
T min_v = std::numeric_limits<T>::max();
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (const auto& _v : v)
|
|
|
|
for (const auto& _v : v)
|
|
|
|
min_v = min_v < _v ? min_v : _v;
|
|
|
|
min_v = min_v < _v ? min_v : _v;
|
|
|
|
return min_v;
|
|
|
|
return min_v;
|
|
|
@ -85,6 +90,7 @@ template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void mins(const VT<T>& arr, Ret& ret) {
|
|
|
|
void mins(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
T min = std::numeric_limits<T>::max();
|
|
|
|
T min = std::numeric_limits<T>::max();
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
if (arr[i] < min)
|
|
|
|
if (arr[i] < min)
|
|
|
|
min = arr[i];
|
|
|
|
min = arr[i];
|
|
|
@ -103,6 +109,7 @@ template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void maxs(const VT<T>& arr, Ret& ret) {
|
|
|
|
void maxs(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
T max = std::numeric_limits<T>::min();
|
|
|
|
T max = std::numeric_limits<T>::min();
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
if (arr[i] > max)
|
|
|
|
if (arr[i] > max)
|
|
|
|
max = arr[i];
|
|
|
|
max = arr[i];
|
|
|
@ -121,9 +128,10 @@ template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void minw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
void minw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
std::deque<std::pair<T, uint32_t>> cache;
|
|
|
|
std::deque<std::pair<T, uint32_t>> cache;
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
if (!cache.empty() && cache.front().second == i - w) cache.pop_front();
|
|
|
|
if (!cache.empty() && cache.front().second == i - w) cache.pop_front();
|
|
|
|
|
|
|
|
#pragma clang loop vectorize(enable) interleave(enable)
|
|
|
|
while (!cache.empty() && cache.back().first > arr[i]) cache.pop_back();
|
|
|
|
while (!cache.empty() && cache.back().first > arr[i]) cache.pop_back();
|
|
|
|
cache.push_back({ arr[i], i });
|
|
|
|
cache.push_back({ arr[i], i });
|
|
|
|
ret[i] = cache.front().first;
|
|
|
|
ret[i] = cache.front().first;
|
|
|
@ -131,7 +139,7 @@ void minw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<class T, template<typename ...> class VT>
|
|
|
|
template<class T, template<typename ...> class VT>
|
|
|
|
decayed_t<VT, T> minw(uint32_t w, const VT<T>& arr) {
|
|
|
|
inline decayed_t<VT, T> minw(uint32_t w, const VT<T>& arr) {
|
|
|
|
decayed_t<VT, T> ret(arr.size);
|
|
|
|
decayed_t<VT, T> ret(arr.size);
|
|
|
|
minw(w, arr, ret);
|
|
|
|
minw(w, arr, ret);
|
|
|
|
return ret;
|
|
|
|
return ret;
|
|
|
@ -141,8 +149,10 @@ template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void maxw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
void maxw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
std::deque<std::pair<T, uint32_t>> cache;
|
|
|
|
std::deque<std::pair<T, uint32_t>> cache;
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
for (int i = 0; i < len; ++i) {
|
|
|
|
if (!cache.empty() && cache.front().second == i - w) cache.pop_front();
|
|
|
|
if (!cache.empty() && cache.front().second == i - w) cache.pop_front();
|
|
|
|
|
|
|
|
#pragma clang loop vectorize(enable) interleave(enable)
|
|
|
|
while (!cache.empty() && cache.back().first < arr[i]) cache.pop_back();
|
|
|
|
while (!cache.empty() && cache.back().first < arr[i]) cache.pop_back();
|
|
|
|
cache.push_back({ arr[i], i });
|
|
|
|
cache.push_back({ arr[i], i });
|
|
|
|
ret[i] = cache.front().first;
|
|
|
|
ret[i] = cache.front().first;
|
|
|
@ -164,8 +174,10 @@ void ratiow(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
len = 1;
|
|
|
|
len = 1;
|
|
|
|
w = w > len ? len : w;
|
|
|
|
w = w > len ? len : w;
|
|
|
|
ret[0] = 0;
|
|
|
|
ret[0] = 0;
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (uint32_t i = 0; i < w; ++i)
|
|
|
|
for (uint32_t i = 0; i < w; ++i)
|
|
|
|
ret[i] = arr[i] / (FPType)arr[0];
|
|
|
|
ret[i] = arr[i] / (FPType)arr[0];
|
|
|
|
|
|
|
|
#pragma omp simd
|
|
|
|
for (uint32_t i = w; i < arr.size; ++i)
|
|
|
|
for (uint32_t i = w; i < arr.size; ++i)
|
|
|
|
ret[i] = arr[i] / (FPType) arr[i - w];
|
|
|
|
ret[i] = arr[i] / (FPType) arr[i - w];
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -191,9 +203,9 @@ inline void ratios(const VT<T>& arr, Ret& ret) {
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void sums(const VT<T>& arr, Ret& ret) {
|
|
|
|
void sums(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
if (len) ret[0] = arr[0];
|
|
|
|
if (len) ret[i++] = arr[0];
|
|
|
|
#pragma omp simd
|
|
|
|
for (; i < len; ++i)
|
|
|
|
for (uint32_t i = 1; i < len; ++i)
|
|
|
|
ret[i] = ret[i - 1] + arr[i];
|
|
|
|
ret[i] = ret[i - 1] + arr[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -208,10 +220,10 @@ template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void avgs(const VT<T>& arr, Ret& ret) {
|
|
|
|
void avgs(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
typedef types::GetFPType<types::GetLongType<T>> FPType;
|
|
|
|
typedef types::GetFPType<types::GetLongType<T>> FPType;
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
|
|
|
types::GetLongType<T> s;
|
|
|
|
types::GetLongType<T> s;
|
|
|
|
if (len) s = ret[i++] = arr[0];
|
|
|
|
if (len) s = ret[0] = arr[0];
|
|
|
|
for (; i < len; ++i)
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 1; i < len; ++i)
|
|
|
|
ret[i] = (s += arr[i]) / (FPType)(i + 1);
|
|
|
|
ret[i] = (s += arr[i]) / (FPType)(i + 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -226,12 +238,13 @@ inline decayed_t<VT, types::GetFPType<types::GetLongType<T>>> avgs(const VT<T>&
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void sumw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
void sumw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
|
|
|
w = w > len ? len : w;
|
|
|
|
w = w > len ? len : w;
|
|
|
|
if (len) ret[i++] = arr[0];
|
|
|
|
if (len) ret[0] = arr[0];
|
|
|
|
for (; i < w; ++i)
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 1; i < w; ++i)
|
|
|
|
ret[i] = ret[i - 1] + arr[i];
|
|
|
|
ret[i] = ret[i - 1] + arr[i];
|
|
|
|
for (; i < len; ++i)
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = w; i < len; ++i)
|
|
|
|
ret[i] = ret[i - 1] + arr[i] - arr[i - w];
|
|
|
|
ret[i] = ret[i - 1] + arr[i] - arr[i - w];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -246,13 +259,15 @@ template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void avgw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
void avgw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
typedef types::GetFPType<types::GetLongType<T>> FPType;
|
|
|
|
typedef types::GetFPType<types::GetLongType<T>> FPType;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
|
|
|
types::GetLongType<T> s{};
|
|
|
|
types::GetLongType<T> s{};
|
|
|
|
w = w > len ? len : w;
|
|
|
|
w = w > len ? len : w;
|
|
|
|
if (len) s = ret[i++] = arr[0];
|
|
|
|
if (len) s = ret[0] = arr[0];
|
|
|
|
for (; i < w; ++i)
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 1; i < w; ++i)
|
|
|
|
ret[i] = (s += arr[i]) / (FPType)(i + 1);
|
|
|
|
ret[i] = (s += arr[i]) / (FPType)(i + 1);
|
|
|
|
for (; i < len; ++i)
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = w; i < len; ++i)
|
|
|
|
ret[i] = ret[i - 1] + (arr[i] - arr[i - w]) / (FPType)w;
|
|
|
|
ret[i] = ret[i - 1] + (arr[i] - arr[i - w]) / (FPType)w;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -270,7 +285,7 @@ void varw(uint32_t w, const VT<T>& arr,
|
|
|
|
Ret& ret) {
|
|
|
|
Ret& ret) {
|
|
|
|
using FPType = types::GetFPType<types::GetLongType<T>>;
|
|
|
|
using FPType = types::GetFPType<types::GetLongType<T>>;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
|
|
|
types::GetLongType<T> s{};
|
|
|
|
types::GetLongType<T> s{};
|
|
|
|
w = w > len ? len : w;
|
|
|
|
w = w > len ? len : w;
|
|
|
|
FPType EnX {}, MnX{};
|
|
|
|
FPType EnX {}, MnX{};
|
|
|
@ -278,9 +293,10 @@ void varw(uint32_t w, const VT<T>& arr,
|
|
|
|
s = arr[0];
|
|
|
|
s = arr[0];
|
|
|
|
MnX = 0;
|
|
|
|
MnX = 0;
|
|
|
|
EnX = arr[0];
|
|
|
|
EnX = arr[0];
|
|
|
|
ret[i++] = 0;
|
|
|
|
ret[0] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (; i < len; ++i){
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 1; i < w; ++i) {
|
|
|
|
s += arr[i];
|
|
|
|
s += arr[i];
|
|
|
|
FPType _EnX = s / (FPType)(i + 1);
|
|
|
|
FPType _EnX = s / (FPType)(i + 1);
|
|
|
|
MnX += (arr[i] - EnX) * (arr[i] - _EnX);
|
|
|
|
MnX += (arr[i] - EnX) * (arr[i] - _EnX);
|
|
|
@ -290,7 +306,8 @@ void varw(uint32_t w, const VT<T>& arr,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const float rw = 1.f / (float)w;
|
|
|
|
const float rw = 1.f / (float)w;
|
|
|
|
s *= rw;
|
|
|
|
s *= rw;
|
|
|
|
for (; i < len; ++i){
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = w; i < len; ++i) {
|
|
|
|
const auto dw = arr[i] - arr[i - w - 1];
|
|
|
|
const auto dw = arr[i] - arr[i - w - 1];
|
|
|
|
const auto sw = arr[i] + arr[i - w - 1];
|
|
|
|
const auto sw = arr[i] + arr[i - w - 1];
|
|
|
|
const auto dex = dw * rw;
|
|
|
|
const auto dex = dw * rw;
|
|
|
@ -299,8 +316,8 @@ void varw(uint32_t w, const VT<T>& arr,
|
|
|
|
s += dex;
|
|
|
|
s += dex;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if constexpr(sd)
|
|
|
|
if constexpr(sd)
|
|
|
|
if(i)
|
|
|
|
if(len)
|
|
|
|
ret[i-1] = sqrt(ret[i-1]);
|
|
|
|
ret[len-1] = sqrt(ret[len-1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -316,14 +333,14 @@ template<class T, template<typename ...> class VT>
|
|
|
|
types::GetFPType<types::GetLongType<decays<T>>> var(const VT<T>& arr) {
|
|
|
|
types::GetFPType<types::GetLongType<decays<T>>> var(const VT<T>& arr) {
|
|
|
|
typedef types::GetFPType<types::GetLongType<decays<T>>> FPType;
|
|
|
|
typedef types::GetFPType<types::GetLongType<decays<T>>> FPType;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
|
|
|
types::GetLongType<T> s{0};
|
|
|
|
types::GetLongType<T> s{0};
|
|
|
|
types::GetLongType<T> ssq{0};
|
|
|
|
types::GetLongType<T> ssq{0};
|
|
|
|
if (len) {
|
|
|
|
if (len) {
|
|
|
|
s = arr[0];
|
|
|
|
s = arr[0];
|
|
|
|
ssq = arr[0] * arr[0];
|
|
|
|
ssq = arr[0] * arr[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (; i < len; ++i){
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 1; i < len; ++i){
|
|
|
|
s += arr[i];
|
|
|
|
s += arr[i];
|
|
|
|
ssq += arr[i] * arr[i];
|
|
|
|
ssq += arr[i] * arr[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -334,7 +351,6 @@ template<class T, template<typename ...> class VT, class Ret, bool sd = false>
|
|
|
|
void vars(const VT<T>& arr, Ret& ret) {
|
|
|
|
void vars(const VT<T>& arr, Ret& ret) {
|
|
|
|
typedef types::GetFPType<types::GetLongType<T>> FPType;
|
|
|
|
typedef types::GetFPType<types::GetLongType<T>> FPType;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
|
|
|
|
types::GetLongType<T> s{};
|
|
|
|
types::GetLongType<T> s{};
|
|
|
|
FPType MnX{};
|
|
|
|
FPType MnX{};
|
|
|
|
FPType EnX {};
|
|
|
|
FPType EnX {};
|
|
|
@ -342,9 +358,10 @@ void vars(const VT<T>& arr, Ret& ret) {
|
|
|
|
s = arr[0];
|
|
|
|
s = arr[0];
|
|
|
|
MnX = 0;
|
|
|
|
MnX = 0;
|
|
|
|
EnX = arr[0];
|
|
|
|
EnX = arr[0];
|
|
|
|
ret[i++] = 0;
|
|
|
|
ret[0] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (; i < len; ++i){
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 1; i < len; ++i){
|
|
|
|
s += arr[i];
|
|
|
|
s += arr[i];
|
|
|
|
FPType _EnX = s / (FPType)(i + 1);
|
|
|
|
FPType _EnX = s / (FPType)(i + 1);
|
|
|
|
MnX += (arr[i] - EnX) * (arr[i] - _EnX);
|
|
|
|
MnX += (arr[i] - EnX) * (arr[i] - _EnX);
|
|
|
@ -373,7 +390,8 @@ auto corr(const VT<T>& x, const VT2<T2>&y) {
|
|
|
|
// assert(x.size == y.size);
|
|
|
|
// assert(x.size == y.size);
|
|
|
|
const uint32_t& len = x.size;
|
|
|
|
const uint32_t& len = x.size;
|
|
|
|
LongType sx{0}, sy{0}, sxy{0}, sx2{0}, sy2{0};
|
|
|
|
LongType sx{0}, sy{0}, sxy{0}, sx2{0}, sy2{0};
|
|
|
|
for (uint32_t i = 0; i < len; ++i){
|
|
|
|
#pragma omp simd
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < len; ++i) {
|
|
|
|
sx += x[i];
|
|
|
|
sx += x[i];
|
|
|
|
sx2 += x[i] * x[i];
|
|
|
|
sx2 += x[i] * x[i];
|
|
|
|
sy += y[i];
|
|
|
|
sy += y[i];
|
|
|
@ -417,14 +435,13 @@ inline auto stddevw(uint32_t w, const VT<T>& arr, Ret& ret) {
|
|
|
|
return varw<T, VT, Ret, true>(w, arr, ret);
|
|
|
|
return varw<T, VT, Ret, true>(w, arr, ret);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// use getSignedType
|
|
|
|
// use getSignedType
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void deltas(const VT<T>& arr, Ret& ret) {
|
|
|
|
void deltas(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
if (len) ret[0] = 0;
|
|
|
|
if (len) ret[i++] = 0;
|
|
|
|
#pragma omp simd
|
|
|
|
for (; i < len; ++i)
|
|
|
|
for (uint32_t i = 1; i < len; ++i)
|
|
|
|
ret[i] = arr[i] - arr[i - 1];
|
|
|
|
ret[i] = arr[i] - arr[i - 1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -438,9 +455,9 @@ inline decayed_t<VT, T> deltas(const VT<T>& arr) {
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void prev(const VT<T>& arr, Ret& ret) {
|
|
|
|
void prev(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 0;
|
|
|
|
if (len) ret[0] = arr[0];
|
|
|
|
if (len) ret[i++] = arr[0];
|
|
|
|
#pragma omp simd
|
|
|
|
for (; i < len; ++i)
|
|
|
|
for (uint32_t i = 1; i < len; ++i)
|
|
|
|
ret[i] = arr[i - 1];
|
|
|
|
ret[i] = arr[i - 1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -454,8 +471,8 @@ inline decayed_t<VT, T> prev(const VT<T>& arr) {
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
template<class T, template<typename ...> class VT, class Ret>
|
|
|
|
void aggnext(const VT<T>& arr, Ret& ret) {
|
|
|
|
void aggnext(const VT<T>& arr, Ret& ret) {
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
const uint32_t& len = arr.size;
|
|
|
|
uint32_t i = 1;
|
|
|
|
#pragma omp simd
|
|
|
|
for (; i < len; ++i)
|
|
|
|
for (uint32_t i = 1; i < len; ++i)
|
|
|
|
ret[i - 1] = arr[i];
|
|
|
|
ret[i - 1] = arr[i];
|
|
|
|
if (len > 0) ret[len - 1] = arr[len - 1];
|
|
|
|
if (len > 0) ret[len - 1] = arr[len - 1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|