diff --git a/lib/rendering/geom/Procedural.cc b/lib/rendering/geom/Procedural.cc index 1ed000e..2ddda38 100644 --- a/lib/rendering/geom/Procedural.cc +++ b/lib/rendering/geom/Procedural.cc @@ -14,8 +14,6 @@ #include #include -#include - #include namespace moonray { @@ -25,7 +23,7 @@ class PrimitiveMemoryAccumulator : public PrimitiveVisitor { public: PrimitiveMemoryAccumulator( - tbb::atomic& usage, + std::atomic& usage, SharedPrimitiveSet& sharedPrimitives, bool inPrimitiveGroup = false) : mUsage(usage), mSharedPrimitives(sharedPrimitives), @@ -63,7 +61,7 @@ class PrimitiveMemoryAccumulator : public PrimitiveVisitor } private: - tbb::atomic& mUsage; + std::atomic& mUsage; SharedPrimitiveSet& mSharedPrimitives; bool mInPrimitiveGroup; }; @@ -123,7 +121,7 @@ class StatisticsAccumulator : public PrimitiveVisitor Procedural::size_type Procedural::getMemory() { - tbb::atomic usage {0u}; + std::atomic usage {0u}; SharedPrimitiveSet sharedPrimitives; PrimitiveMemoryAccumulator accumulator(usage, sharedPrimitives); forEachPrimitive(accumulator); diff --git a/lib/rendering/geom/Procedural.h b/lib/rendering/geom/Procedural.h index 54c8ec7..11706be 100644 --- a/lib/rendering/geom/Procedural.h +++ b/lib/rendering/geom/Procedural.h @@ -30,11 +30,18 @@ struct GeometryStatistics { GeometryStatistics(): mFaceCount {0}, mMeshVertexCount {0}, mCurvesCount {0}, mCVCount {0}, mInstanceCount {0} {} - tbb::atomic mFaceCount; - tbb::atomic mMeshVertexCount; - tbb::atomic mCurvesCount; - tbb::atomic mCVCount; - tbb::atomic mInstanceCount; + GeometryStatistics(const GeometryStatistics &other): + mFaceCount(other.mFaceCount.load()), + mMeshVertexCount(other.mMeshVertexCount.load()), + mCurvesCount(other.mCurvesCount.load()), + mCVCount(other.mCVCount.load()), + mInstanceCount(other.mInstanceCount.load()) {} + + std::atomic mFaceCount; + std::atomic mMeshVertexCount; + std::atomic mCurvesCount; + std::atomic mCVCount; + std::atomic mInstanceCount; }; //---------------------------------------------------------------------------- diff --git a/lib/rendering/mcrt_common/Bundle.h b/lib/rendering/mcrt_common/Bundle.h index b5402a5..37761a7 100644 --- a/lib/rendering/mcrt_common/Bundle.h +++ b/lib/rendering/mcrt_common/Bundle.h @@ -210,7 +210,7 @@ class LocalQueue return unsigned(entriesToFlush); } - tbb::atomic mQueueSize; // must not exceed mMaxEntries + std::atomic mQueueSize; // must not exceed mMaxEntries uint32_t mMaxEntries; EntryType * mEntries; // all data offsets are relative to this address Handler mHandler; @@ -502,7 +502,7 @@ class LocalLargeEntryQueue return unsigned(entriesToFlush); } - tbb::atomic mQueueSize; // must not exceed mMaxEntries + std::atomic mQueueSize; // must not exceed mMaxEntries uint32_t mMaxEntries; EntryType * mEntries; // all data offsets are relative to this address Handler mHandler; @@ -788,7 +788,7 @@ class CACHE_ALIGN SharedQueue return unsigned(entriesToFlush); } - tbb::atomic mQueueSize; // must not exceed mMaxEntries + std::atomic mQueueSize; // must not exceed mMaxEntries uint32_t mMaxEntries; EntryType * mEntries; // all data offsets are relative to this address Handler mHandler; diff --git a/lib/rendering/mcrt_common/ThreadLocalState.cc b/lib/rendering/mcrt_common/ThreadLocalState.cc index 941607f..1c55936 100644 --- a/lib/rendering/mcrt_common/ThreadLocalState.cc +++ b/lib/rendering/mcrt_common/ThreadLocalState.cc @@ -42,7 +42,7 @@ namespace { // Counter to hand out unique indices to TLSProxy objects. -tbb::atomic gNextFrameUpdateTLSIndex; +std::atomic gNextFrameUpdateTLSIndex; // These are lightweight objects which we put into a tbb::enumerable_thread_specific // container so that we can map OS thread ids to consistent top level ThreadLocalState @@ -50,7 +50,7 @@ tbb::atomic gNextFrameUpdateTLSIndex; struct FrameUpdateTLSProxy { FrameUpdateTLSProxy() : - mTLSIndex(gNextFrameUpdateTLSIndex.fetch_and_increment()) + mTLSIndex(gNextFrameUpdateTLSIndex++) { } diff --git a/lib/rendering/pbr/Types.h b/lib/rendering/pbr/Types.h index 4a1e4cf..09662cd 100644 --- a/lib/rendering/pbr/Types.h +++ b/lib/rendering/pbr/Types.h @@ -12,7 +12,6 @@ #include #include -#include #include diff --git a/lib/rendering/pbr/Types.hh b/lib/rendering/pbr/Types.hh index 0a6c453..bd7b72f 100644 --- a/lib/rendering/pbr/Types.hh +++ b/lib/rendering/pbr/Types.hh @@ -155,7 +155,7 @@ enum OcclTestType #define DEEP_DATA_MEMBERS \ - HUD_CPP_MEMBER(tbb::atomic, mRefCount, 4); \ + HUD_CPP_MEMBER(std::atomic, mRefCount, 4); \ HUD_MEMBER(uint32_t, mHitDeep); \ HUD_MEMBER(float, mSubpixelX); \ HUD_MEMBER(float, mSubpixelY); \ @@ -182,7 +182,7 @@ enum OcclTestType #define CRYPTOMATTE_DATA_MEMBERS \ - HUD_CPP_MEMBER(tbb::atomic, mRefCount, 4); \ + HUD_CPP_MEMBER(std::atomic, mRefCount, 4); \ HUD_CPP_PTR(pbr::CryptomatteBuffer*, mCryptomatteBuffer); \ HUD_MEMBER(uint32_t, mHit); \ HUD_MEMBER(uint32_t, mPrevPresence); \ @@ -211,7 +211,7 @@ enum OcclTestType // bytes in size due to them being allocated as one cache line. #define CRYPTOMATTE_DATA_MEMBERS_2 \ - HUD_CPP_MEMBER(tbb::atomic, mRefCount, 4); \ + HUD_CPP_MEMBER(std::atomic, mRefCount, 4); \ HUD_MEMBER(HVD_NAMESPACE(scene_rdl2::math, Vec3f), mRefP); \ HUD_MEMBER(HVD_NAMESPACE(scene_rdl2::math, Vec3f), mRefN); \ HUD_MEMBER(HVD_NAMESPACE(scene_rdl2::math, Vec2f), mUV) diff --git a/lib/rendering/pbr/core/PbrTLState.cc b/lib/rendering/pbr/core/PbrTLState.cc index 5281335..7578bd3 100644 --- a/lib/rendering/pbr/core/PbrTLState.cc +++ b/lib/rendering/pbr/core/PbrTLState.cc @@ -58,8 +58,8 @@ MNRY_STATIC_ASSERT(ALLOC_LIST_MAX_NUM_ITEMS <= ((ALLOC_LIST_INFO_BITS >> ALLOC_LIST_INFO_BIT_SHIFT) + 1)); // Per frame counter, gets reset each frame. -CACHE_ALIGN tbb::atomic gFailedRayStateAllocs; -CACHE_ALIGN tbb::atomic gFailedCL1Allocs; +CACHE_ALIGN std::atomic gFailedRayStateAllocs; +CACHE_ALIGN std::atomic gFailedCL1Allocs; // For memory profiling, see DEBUG_RECORD_PEAK_RAYSTATE_USAGE. unsigned MAYBE_UNUSED gPeakRayStateUsage = 0; @@ -416,7 +416,7 @@ TLState::poolAlloc(const char * const typeName, unsigned numEntries, ResType **entries, OverlappedAccType accumStall, - tbb::atomic &numFailedAllocs) + std::atomic &numFailedAllocs) { // 99.9999% case, allocation should succeed. bool success = pool.allocList(numEntries, entries); diff --git a/lib/rendering/pbr/core/PbrTLState.h b/lib/rendering/pbr/core/PbrTLState.h index 12a8a95..4eb7354 100644 --- a/lib/rendering/pbr/core/PbrTLState.h +++ b/lib/rendering/pbr/core/PbrTLState.h @@ -196,7 +196,7 @@ class CACHE_ALIGN TLState : public mcrt_common::BaseTLState unsigned numEntries, ResType **entries, OverlappedAccType accumStall, - tbb::atomic &numFailedAlloc); + std::atomic &numFailedAlloc); DISALLOW_COPY_OR_ASSIGNMENT(TLState); }; diff --git a/lib/rendering/rndr/RenderFramePasses.cc b/lib/rendering/rndr/RenderFramePasses.cc index f6f5e76..3210f53 100644 --- a/lib/rendering/rndr/RenderFramePasses.cc +++ b/lib/rendering/rndr/RenderFramePasses.cc @@ -103,8 +103,8 @@ RenderDriver::renderPasses(RenderDriver *driver, const FrameState &fs, // This counter verifies that we don't leave this function until all threads // have started working. - CACHE_ALIGN tbb::atomic numTBBThreads; - CACHE_ALIGN tbb::atomic canceled; + CACHE_ALIGN std::atomic numTBBThreads; + CACHE_ALIGN std::atomic canceled; numTBBThreads = 0; canceled = false; diff --git a/lib/rendering/rndr/RenderStatistics.cc b/lib/rendering/rndr/RenderStatistics.cc index 5bc7b27..2949b47 100644 --- a/lib/rendering/rndr/RenderStatistics.cc +++ b/lib/rendering/rndr/RenderStatistics.cc @@ -963,24 +963,24 @@ RenderStats::logGeometryUsage(const geom::GeometryStatistics& totalGeomStatistic for(std::size_t i = 0; i < geomStateInfo.size(); ++i) { geomTable.emplace_back(geomStateInfo[i].first, - geomStateInfo[i].second.mFaceCount, - geomStateInfo[i].second.mMeshVertexCount, - geomStateInfo[i].second.mCurvesCount, - geomStateInfo[i].second.mCVCount, - geomStateInfo[i].second.mInstanceCount); + geomStateInfo[i].second.mFaceCount.load(), + geomStateInfo[i].second.mMeshVertexCount.load(), + geomStateInfo[i].second.mCurvesCount.load(), + geomStateInfo[i].second.mCVCount.load(), + geomStateInfo[i].second.mInstanceCount.load()); } StatsTable<2> summaryTable("Geometry Statistics Summary"); summaryTable.emplace_back("Total Face Count", - totalGeomStatistics.mFaceCount); + totalGeomStatistics.mFaceCount.load()); summaryTable.emplace_back("Total Mesh Vertex Count", - totalGeomStatistics.mMeshVertexCount); + totalGeomStatistics.mMeshVertexCount.load()); summaryTable.emplace_back("Total Curves Count", - totalGeomStatistics.mCurvesCount); + totalGeomStatistics.mCurvesCount.load()); summaryTable.emplace_back("Total Curves CV Count", - totalGeomStatistics.mCVCount); + totalGeomStatistics.mCVCount.load()); summaryTable.emplace_back("Total Instance Count", - totalGeomStatistics.mInstanceCount); + totalGeomStatistics.mInstanceCount.load()); auto writeCSV = [&](std::ostream& outs, bool athenaFormat) { outs.precision(2); diff --git a/lib/rendering/rt/EmbreeAccelerator.cc b/lib/rendering/rt/EmbreeAccelerator.cc index cb348a0..84a7b5c 100644 --- a/lib/rendering/rt/EmbreeAccelerator.cc +++ b/lib/rendering/rt/EmbreeAccelerator.cc @@ -41,7 +41,7 @@ namespace rt { typedef tbb::concurrent_unordered_map, - tbb::atomic, geom::SharedPtrHash> SharedSceneMap; + std::atomic, geom::SharedPtrHash> SharedSceneMap; class BVHBuilder : public geom::PrimitiveVisitor diff --git a/lib/rendering/rt/GeometryManager.cc b/lib/rendering/rt/GeometryManager.cc index 1c61845..7752aa7 100644 --- a/lib/rendering/rt/GeometryManager.cc +++ b/lib/rendering/rt/GeometryManager.cc @@ -1361,7 +1361,7 @@ GeometryManager::getEmissiveRegions(const scene_rdl2::rdl2::Layer* layer, } // Counter to provide unique thread ids -tbb::atomic gThreadIdCounter; +std::atomic gThreadIdCounter; GeometryManager::GM_RESULT GeometryManager::tessellate(scene_rdl2::rdl2::Layer* layer, @@ -1396,7 +1396,7 @@ GeometryManager::tessellate(scene_rdl2::rdl2::Layer* layer, struct ThreadID { // When we create a ThreadID, the counter increments and so // each thread gets a unique human readable id. - ThreadID() : mId(gThreadIdCounter.fetch_and_increment()){} + ThreadID() : mId(gThreadIdCounter++){} unsigned mId; }; typedef tbb::enumerable_thread_specific< ThreadID > EnumerableThreadID; diff --git a/lib/rendering/rt/GeometryManager.h b/lib/rendering/rt/GeometryManager.h index bb1e8e1..2a01398 100644 --- a/lib/rendering/rt/GeometryManager.h +++ b/lib/rendering/rt/GeometryManager.h @@ -38,7 +38,7 @@ namespace rt { enum class ChangeFlag; -typedef tbb::concurrent_unordered_map> PrimitiveReferenceCountMap; +typedef tbb::concurrent_unordered_map> PrimitiveReferenceCountMap; struct GeometryManagerStats { @@ -187,7 +187,7 @@ class GeometryManager finline void compareAndSwapFlag(ChangeFlag swapFlag, ChangeFlag compareFlag) { - mChangeStatus.compare_and_swap(swapFlag, compareFlag); + mChangeStatus.compare_exchange_strong(swapFlag, compareFlag); } void updateGPUAccelerator(const scene_rdl2::rdl2::Layer* layer); @@ -255,7 +255,7 @@ class GeometryManager GeometryManagerOptions mOptions; - typedef tbb::atomic ChangeFlagAtomic; + typedef std::atomic ChangeFlagAtomic; /// Tracks current change status ChangeFlagAtomic mChangeStatus; diff --git a/lib/rendering/rt/gpu/GPUAcceleratorImpl.h b/lib/rendering/rt/gpu/GPUAcceleratorImpl.h index a4449d6..9afd705 100644 --- a/lib/rendering/rt/gpu/GPUAcceleratorImpl.h +++ b/lib/rendering/rt/gpu/GPUAcceleratorImpl.h @@ -23,7 +23,7 @@ namespace rt { // Also in EmbreeAccelerator.cc typedef tbb::concurrent_unordered_map, - tbb::atomic, geom::SharedPtrHash> SharedGroupMap; + std::atomic, geom::SharedPtrHash> SharedGroupMap; class GPUAcceleratorImpl diff --git a/lib/rendering/shading/Material.cc b/lib/rendering/shading/Material.cc index f3c52b5..1a5bb54 100644 --- a/lib/rendering/shading/Material.cc +++ b/lib/rendering/shading/Material.cc @@ -15,10 +15,10 @@ MaterialPtrList Material::sQueuelessMaterials; tbb::mutex Material::sShadeQueueMutex; ShadeQueueList Material::sShadeQueues; -tbb::atomic Material::sFlushCycleIdx; +std::atomic Material::sFlushCycleIdx; -tbb::atomic Material::sDeferredEntryCalls; -tbb::atomic Material::sTotalDeferredEntries; +std::atomic Material::sDeferredEntryCalls; +std::atomic Material::sTotalDeferredEntries; Material::Material(const scene_rdl2::rdl2::SceneObject & owner) : RootShader(owner), diff --git a/lib/rendering/shading/Material.h b/lib/rendering/shading/Material.h index 6d9e92e..1d86aff 100644 --- a/lib/rendering/shading/Material.h +++ b/lib/rendering/shading/Material.h @@ -180,11 +180,11 @@ class Material : public RootShader // This is used by the flushNonEmptyShadeQueue function to iterate through all queues // in a cyclic fashion as opposed to starting the iteration at the beginning of the // queue list each time. - static tbb::atomic sFlushCycleIdx; + static std::atomic sFlushCycleIdx; // Shared between all Materials. - static tbb::atomic sDeferredEntryCalls; - static tbb::atomic sTotalDeferredEntries; + static std::atomic sDeferredEntryCalls; + static std::atomic sTotalDeferredEntries; }; template diff --git a/tests/lib/rendering/pbr/TestLights.cc b/tests/lib/rendering/pbr/TestLights.cc index 409f893..8314b89 100644 --- a/tests/lib/rendering/pbr/TestLights.cc +++ b/tests/lib/rendering/pbr/TestLights.cc @@ -271,7 +271,7 @@ testLightPDF(const Vec3f &p, const Vec3f &n, const LightTester *lightTester, // compute ref pdfs (only for unit test debugging) // - tbb::atomic refValidSampleCount; + std::atomic refValidSampleCount; refValidSampleCount = 0; double refPdf = doReductionOverUnitSquare(0.0, @@ -326,7 +326,7 @@ testLightPDF(const Vec3f &p, const Vec3f &n, const LightTester *lightTester, // compute test pdf // - tbb::atomic testValidSampleCount; + std::atomic testValidSampleCount; testValidSampleCount = 0; double testPdf = doReductionOverUnitSquare(0.0, @@ -394,7 +394,7 @@ testLightPDF(const Vec3f &p, const Vec3f &n, const LightTester *lightTester, // compute test pdf of ISPC light // - tbb::atomic testIspcValidSampleCount; + std::atomic testIspcValidSampleCount; testIspcValidSampleCount = 0; double testIspcPdf = doReductionOverUnitSquare(0.0, @@ -525,7 +525,7 @@ testLightCanIlluminate(const Vec3f &, const Vec3f &, const LightTester *lightTes // Test C++ light implementation // - tbb::atomic seed; + std::atomic seed; seed = initialSeed; tbb::parallel_for(tbb::blocked_range(0, NUM_CAN_ILLUMINATE_TESTS, @@ -651,11 +651,11 @@ testLightIntersection(const Vec3f &p, const Vec3f &n, const LightTester *lightTe // Test C++ implementation // - tbb::atomic cppIsectsEqual; + std::atomic cppIsectsEqual; cppIsectsEqual = 0; - tbb::atomic cppNoIntersection; + std::atomic cppNoIntersection; cppNoIntersection = 0; - tbb::atomic cppInvalidSamples; + std::atomic cppInvalidSamples; cppInvalidSamples = 0; tbb::parallel_for (tbb::blocked_range(0u, NUM_SAMPLES_PER_AXIS, GRAINSIZE_PER_AXIS), @@ -780,11 +780,11 @@ testLightIntersection(const Vec3f &p, const Vec3f &n, const LightTester *lightTe // Test ISPC implementation // - tbb::atomic ispcIsectsEqual; + std::atomic ispcIsectsEqual; ispcIsectsEqual = 0; - tbb::atomic ispcNoIntersection; + std::atomic ispcNoIntersection; ispcNoIntersection = 0; - tbb::atomic ispcInvalidSamples; + std::atomic ispcInvalidSamples; ispcInvalidSamples = 0; tbb::parallel_for (tbb::blocked_range(0u, NUM_SAMPLES_PER_AXIS, GRAINSIZE_PER_AXIS),