Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unroll and inline strings SSE and AVX #569

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,11 @@ set (SFIZZ_HEADERS
sfizz/effects/impl/ResonantArrayAVX.h
sfizz/effects/impl/ResonantArraySSE.h
sfizz/effects/impl/ResonantString.h
sfizz/effects/impl/ResonantStringInline.h
sfizz/effects/impl/ResonantStringAVX.h
sfizz/effects/impl/ResonantStringAVXInline.h
sfizz/effects/impl/ResonantStringSSE.h
sfizz/effects/impl/ResonantStringSSEInline.h
sfizz/effects/Apan.h
sfizz/effects/CommonLFO.h
sfizz/effects/CommonLFO.hpp
Expand Down
1 change: 1 addition & 0 deletions src/sfizz/effects/impl/ResonantArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "ResonantArray.h"
#include "ResonantString.h"
#include "ResonantStringInline.h"
#include "SIMDHelpers.h"

namespace sfz {
Expand Down
25 changes: 24 additions & 1 deletion src/sfizz/effects/impl/ResonantArrayAVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#include "ResonantArrayAVX.h"
#include "ResonantStringAVXInline.h"
#include "Config.h"
#include <cstring>

Expand Down Expand Up @@ -82,7 +83,29 @@ void ResonantArrayAVX::process(const float *inPtr, float *outPtr, unsigned numFr
__m256* outputs8 = reinterpret_cast<__m256*>(_workBuffer.data());
std::memset(outputs8, 0, numFrames * sizeof(__m256));

for (unsigned p = 0; p < numStringPacks; ++p) {
unsigned p = 0;
for (; p + 7 < numStringPacks; p += 8) {
ResonantStringAVX& rs = reinterpret_cast<ResonantStringAVX&>(stringPacks[p]);
for (unsigned i = 0; i < numFrames; ++i) {
__m256 o1 = rs.process(_mm256_broadcast_ss(&inPtr[i]));
__m256 o2 = rs.process(_mm256_broadcast_ss(&inPtr[i + 1]));
__m256 o3 = rs.process(_mm256_broadcast_ss(&inPtr[i + 2]));
__m256 o4 = rs.process(_mm256_broadcast_ss(&inPtr[i + 3]));
__m256 o5 = rs.process(_mm256_broadcast_ss(&inPtr[i + 4]));
__m256 o6 = rs.process(_mm256_broadcast_ss(&inPtr[i + 5]));
__m256 o7 = rs.process(_mm256_broadcast_ss(&inPtr[i + 6]));
__m256 o8 = rs.process(_mm256_broadcast_ss(&inPtr[i + 7]));
__m256 output8 = outputs8[i];
o1 = _mm256_add_ps(o1, o2);
o2 = _mm256_add_ps(o3, o4);
o3 = _mm256_add_ps(o5, o6);
o4 = _mm256_add_ps(o7, o8);
o1 = _mm256_add_ps(o1, o2);
o2 = _mm256_add_ps(o3, o4);
outputs8[i] = _mm256_add_ps(o1, _mm256_add_ps(output8, o2));
}
}
for (; p < numStringPacks; ++p) {
ResonantStringAVX& rs = reinterpret_cast<ResonantStringAVX&>(stringPacks[p]);
for (unsigned i = 0; i < numFrames; ++i)
outputs8[i] = _mm256_add_ps(
Expand Down
25 changes: 24 additions & 1 deletion src/sfizz/effects/impl/ResonantArraySSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#include "ResonantArraySSE.h"
#include "ResonantStringSSEInline.h"
#include "Config.h"
#include <cstring>

Expand Down Expand Up @@ -82,7 +83,29 @@ void ResonantArraySSE::process(const float *inPtr, float *outPtr, unsigned numFr
__m128* outputs4 = reinterpret_cast<__m128*>(_workBuffer.data());
std::memset(outputs4, 0, numFrames * sizeof(__m128));

for (unsigned p = 0; p < numStringPacks; ++p) {
unsigned p = 0;
for (; p + 7 < numStringPacks; p += 8) {
ResonantStringSSE& rs = reinterpret_cast<ResonantStringSSE&>(stringPacks[p]);
for (unsigned i = 0; i < numFrames; ++i) {
__m128 o1 = rs.process(_mm_load1_ps(&inPtr[i]));
__m128 o2 = rs.process(_mm_load1_ps(&inPtr[i + 1]));
__m128 o3 = rs.process(_mm_load1_ps(&inPtr[i + 2]));
__m128 o4 = rs.process(_mm_load1_ps(&inPtr[i + 3]));
__m128 o5 = rs.process(_mm_load1_ps(&inPtr[i + 4]));
__m128 o6 = rs.process(_mm_load1_ps(&inPtr[i + 5]));
__m128 o7 = rs.process(_mm_load1_ps(&inPtr[i + 6]));
__m128 o8 = rs.process(_mm_load1_ps(&inPtr[i + 7]));
__m128 output4 = outputs4[i];
o1 = _mm_add_ps(o1, o2);
o2 = _mm_add_ps(o3, o4);
o3 = _mm_add_ps(o5, o6);
o4 = _mm_add_ps(o7, o8);
o1 = _mm_add_ps(o1, o2);
o2 = _mm_add_ps(o3, o4);
outputs4[i] = _mm_add_ps(o1, _mm_add_ps(output4, o2));
}
}
for (; p < numStringPacks; ++p) {
ResonantStringSSE& rs = stringPacks[p];
for (unsigned i = 0; i < numFrames; ++i)
outputs4[i] = _mm_add_ps(
Expand Down
14 changes: 0 additions & 14 deletions src/sfizz/effects/impl/ResonantString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,19 +85,5 @@ void ResonantString::setResonanceFrequency(float frequency, float bandwidth)
fControl[17] = (fControl[10] + (4.0f - fControl[11]));
}

float ResonantString::process(float input)
{
fRec0[0] = (fControl[1] * ((fControl[4] * fRec1[1]) + (fControl[5] * fRec0[1])));
float fTemp0 = input;
fRec2[0] = (fTemp0 - (fControl[15] * ((fControl[16] * fRec2[1]) + (fControl[17] * fRec2[2]))));
fRec1[0] = (((fControl[14] * fRec2[2]) + ((fControl[5] * fRec1[1]) + (fControl[13] * fRec2[0]))) - (fControl[4] * fRec0[1]));
float output = float((fControl[0] * fRec0[0]));
fRec0[1] = fRec0[0];
fRec2[2] = fRec2[1];
fRec2[1] = fRec2[0];
fRec1[1] = fRec1[0];
return output;
}

} // namespace sfz
} // namespace fx
14 changes: 0 additions & 14 deletions src/sfizz/effects/impl/ResonantStringAVX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,6 @@ void ResonantStringAVX::setResonanceFrequency(__m256 frequency, __m256 bandwidth
fControl[17] = _mm256_add_ps(fControl[10], _mm256_sub_ps(_mm256_set1_ps(4.0f), fControl[11]));
}

__m256 ResonantStringAVX::process(__m256 input)
{
fRec0[0] = _mm256_mul_ps(fControl[1], _mm256_add_ps(_mm256_mul_ps(fControl[4], fRec1[1]), _mm256_mul_ps(fControl[5], fRec0[1])));
__m256 fTemp0 = input;
fRec2[0] = _mm256_sub_ps(fTemp0, _mm256_mul_ps(fControl[15], _mm256_add_ps(_mm256_mul_ps(fControl[16], fRec2[1]), _mm256_mul_ps(fControl[17], fRec2[2]))));
fRec1[0] = _mm256_sub_ps(_mm256_add_ps(_mm256_mul_ps(fControl[14], fRec2[2]), _mm256_add_ps(_mm256_mul_ps(fControl[5], fRec1[1]), _mm256_mul_ps(fControl[13], fRec2[0]))),_mm256_mul_ps(fControl[4], fRec0[1]));
__m256 output = _mm256_mul_ps(fControl[0], fRec0[0]);
fRec0[1] = fRec0[0];
fRec2[2] = fRec2[1];
fRec2[1] = fRec2[0];
fRec1[1] = fRec1[0];
return output;
}

} // namespace sfz
} // namespace fx
#endif
28 changes: 28 additions & 0 deletions src/sfizz/effects/impl/ResonantStringAVXInline.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// SPDX-License-Identifier: BSD-2-Clause

// This code is part of the sfizz library and is licensed under a BSD 2-clause
// license. You should have receive a LICENSE.md file along with the code.
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#pragma once
#include "ResonantStringAVX.h"

namespace sfz {
namespace fx {

inline __m256 ResonantStringAVX::process(__m256 input)
{
fRec0[0] = _mm256_mul_ps(fControl[1], _mm256_add_ps(_mm256_mul_ps(fControl[4], fRec1[1]), _mm256_mul_ps(fControl[5], fRec0[1])));
__m256 fTemp0 = input;
fRec2[0] = _mm256_sub_ps(fTemp0, _mm256_mul_ps(fControl[15], _mm256_add_ps(_mm256_mul_ps(fControl[16], fRec2[1]), _mm256_mul_ps(fControl[17], fRec2[2]))));
fRec1[0] = _mm256_sub_ps(_mm256_add_ps(_mm256_mul_ps(fControl[14], fRec2[2]), _mm256_add_ps(_mm256_mul_ps(fControl[5], fRec1[1]), _mm256_mul_ps(fControl[13], fRec2[0]))),_mm256_mul_ps(fControl[4], fRec0[1]));
__m256 output = _mm256_mul_ps(fControl[0], fRec0[0]);
fRec0[1] = fRec0[0];
fRec2[2] = fRec2[1];
fRec2[1] = fRec2[0];
fRec1[1] = fRec1[0];
return output;
}

} // namespace sfz
} // namespace fx
27 changes: 27 additions & 0 deletions src/sfizz/effects/impl/ResonantStringInline.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// SPDX-License-Identifier: BSD-2-Clause

// This code is part of the sfizz library and is licensed under a BSD 2-clause
// license. You should have receive a LICENSE.md file along with the code.
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#include "ResonantString.h"

namespace sfz {
namespace fx {

inline float ResonantString::process(float input)
{
fRec0[0] = (fControl[1] * ((fControl[4] * fRec1[1]) + (fControl[5] * fRec0[1])));
float fTemp0 = input;
fRec2[0] = (fTemp0 - (fControl[15] * ((fControl[16] * fRec2[1]) + (fControl[17] * fRec2[2]))));
fRec1[0] = (((fControl[14] * fRec2[2]) + ((fControl[5] * fRec1[1]) + (fControl[13] * fRec2[0]))) - (fControl[4] * fRec0[1]));
float output = float((fControl[0] * fRec0[0]));
fRec0[1] = fRec0[0];
fRec2[2] = fRec2[1];
fRec2[1] = fRec2[0];
fRec1[1] = fRec1[0];
return output;
}

} // namespace sfz
} // namespace fx
14 changes: 0 additions & 14 deletions src/sfizz/effects/impl/ResonantStringSSE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,6 @@ void ResonantStringSSE::setResonanceFrequency(__m128 frequency, __m128 bandwidth
fControl[17] = _mm_add_ps(fControl[10], _mm_sub_ps(_mm_set1_ps(4.0f), fControl[11]));
}

__m128 ResonantStringSSE::process(__m128 input)
{
fRec0[0] = _mm_mul_ps(fControl[1], _mm_add_ps(_mm_mul_ps(fControl[4], fRec1[1]), _mm_mul_ps(fControl[5], fRec0[1])));
__m128 fTemp0 = input;
fRec2[0] = _mm_sub_ps(fTemp0, _mm_mul_ps(fControl[15], _mm_add_ps(_mm_mul_ps(fControl[16], fRec2[1]), _mm_mul_ps(fControl[17], fRec2[2]))));
fRec1[0] = _mm_sub_ps(_mm_add_ps(_mm_mul_ps(fControl[14], fRec2[2]), _mm_add_ps(_mm_mul_ps(fControl[5], fRec1[1]), _mm_mul_ps(fControl[13], fRec2[0]))),_mm_mul_ps(fControl[4], fRec0[1]));
__m128 output = _mm_mul_ps(fControl[0], fRec0[0]);
fRec0[1] = fRec0[0];
fRec2[2] = fRec2[1];
fRec2[1] = fRec2[0];
fRec1[1] = fRec1[0];
return output;
}

} // namespace sfz
} // namespace fx
#endif
28 changes: 28 additions & 0 deletions src/sfizz/effects/impl/ResonantStringSSEInline.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// SPDX-License-Identifier: BSD-2-Clause

// This code is part of the sfizz library and is licensed under a BSD 2-clause
// license. You should have receive a LICENSE.md file along with the code.
// If not, contact the sfizz maintainers at https://github.com/sfztools/sfizz

#pragma once
#include "ResonantStringSSE.h"

namespace sfz {
namespace fx {

inline __m128 ResonantStringSSE::process(__m128 input)
{
fRec0[0] = _mm_mul_ps(fControl[1], _mm_add_ps(_mm_mul_ps(fControl[4], fRec1[1]), _mm_mul_ps(fControl[5], fRec0[1])));
__m128 fTemp0 = input;
fRec2[0] = _mm_sub_ps(fTemp0, _mm_mul_ps(fControl[15], _mm_add_ps(_mm_mul_ps(fControl[16], fRec2[1]), _mm_mul_ps(fControl[17], fRec2[2]))));
fRec1[0] = _mm_sub_ps(_mm_add_ps(_mm_mul_ps(fControl[14], fRec2[2]), _mm_add_ps(_mm_mul_ps(fControl[5], fRec1[1]), _mm_mul_ps(fControl[13], fRec2[0]))),_mm_mul_ps(fControl[4], fRec0[1]));
__m128 output = _mm_mul_ps(fControl[0], fRec0[0]);
fRec0[1] = fRec0[0];
fRec2[2] = fRec2[1];
fRec2[1] = fRec2[0];
fRec1[1] = fRec1[0];
return output;
}

} // namespace sfz
} // namespace fx