105 #include <emmintrin.h>
108 #define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
109 static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
111 __m128 result = _mm_mul_ps( vec0, vec1);
112 return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
115 #if defined (BT_ALLOW_SSE4)
119 #define USE_FMA3_INSTEAD_FMA4 1
120 #define USE_SSE4_DOT 1
122 #define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f)
123 #define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f))
126 #define DOT_PRODUCT(a, b) SSE4_DP(a, b)
128 #define DOT_PRODUCT(a, b) btSimdDot3(a, b)
132 #if USE_FMA3_INSTEAD_FMA4
134 #define FMADD(a, b, c) _mm_fmadd_ps(a, b, c)
136 #define FMNADD(a, b, c) _mm_fnmadd_ps(a, b, c)
139 #define FMADD(a, b, c) _mm_macc_ps(a, b, c)
141 #define FMNADD(a, b, c) _mm_nmacc_ps(a, b, c)
145 #define FMADD(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b))
147 #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
160 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
161 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
164 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
165 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
166 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
167 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
168 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
169 __m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp);
170 deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied));
174 __m128 impulseMagnitude = deltaImpulse;
186 #if defined (BT_ALLOW_SSE4)
193 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
194 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
196 const __m128 maskLower = _mm_cmpgt_ps(tmp, lowerLimit);
197 const __m128 maskUpper = _mm_cmpgt_ps(upperLimit, tmp);
198 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), _mm_blendv_ps(_mm_sub_ps(upperLimit, c.
m_appliedImpulse), deltaImpulse, maskUpper), maskLower);
199 c.
m_appliedImpulse = _mm_blendv_ps(lowerLimit, _mm_blendv_ps(upperLimit, tmp, maskUpper), maskLower);
206 return gResolveSingleConstraintRowGeneric_sse2(body1,body2,c);
220 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
221 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
224 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
225 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
226 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
227 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
228 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
231 __m128 impulseMagnitude = deltaImpulse;
249 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
250 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
252 const __m128 mask = _mm_cmpgt_ps(tmp, lowerLimit);
253 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), deltaImpulse, mask);
261 return gResolveSingleConstraintRowLowerLimit_sse2(body1,body2,c);
262 #endif //BT_ALLOW_SSE4
339 deltaImpulse = _mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.
m_jacDiagABInv)));
340 deltaImpulse = _mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.
m_jacDiagABInv)));
343 resultLowerLess = _mm_cmplt_ps(
sum,lowerLimit1);
344 resultUpperLess = _mm_cmplt_ps(
sum,upperLimit1);
345 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
346 deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
347 c.
m_appliedPushImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum) );
350 __m128 impulseMagnitude = deltaImpulse;
389 #endif//BT_ALLOW_SSE4
412 return gResolveSingleConstraintRowGeneric_sse2;
416 return gResolveSingleConstraintRowLowerLimit_sse2;
421 return gResolveSingleConstraintRowGeneric_sse4_1_fma3;
425 return gResolveSingleConstraintRowLowerLimit_sse4_1_fma3;
427 #endif //BT_ALLOW_SSE4
442 const unsigned long un =
static_cast<unsigned long>(n);
447 if (un <= 0x00010000UL) {
449 if (un <= 0x00000100UL) {
451 if (un <= 0x00000010UL) {
453 if (un <= 0x00000004UL) {
455 if (un <= 0x00000002UL) {
463 return (
int) (r % un);
514 if (
btFabs(rel_vel)<velocityThreshold)
517 btScalar rest = restitution * -rel_vel;
533 loc_lateral *= friction_scaling;
543 void btSequentialImpulseConstraintSolver::setupFrictionConstraint(
btSolverConstraint& solverConstraint,
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
602 btScalar denom = relaxation/(denom0+denom1);
615 rel_vel = vel1Dotn+vel2Dotn;
619 btScalar velocityError = desiredVelocity - rel_vel;
628 penetrationImpulse = positionalError*solverConstraint.
m_jacDiagABInv;
631 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
633 solverConstraint.
m_cfm = cfmSlip;
640 btSolverConstraint&
btSequentialImpulseConstraintSolver::addFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
645 colObj0, colObj1, relaxation, infoGlobal, desiredVelocity, cfmSlip);
646 return solverConstraint;
670 solverConstraint.
m_friction = combinedTorsionalFriction;
706 rel_vel = vel1Dotn+vel2Dotn;
712 solverConstraint.
m_rhs = velocityImpulse;
713 solverConstraint.
m_cfm = cfmSlip;
727 btSolverConstraint&
btSequentialImpulseConstraintSolver::addTorsionalFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
btScalar combinedTorsionalFriction,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
btScalar desiredVelocity,
btScalar cfmSlip)
732 colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
733 return solverConstraint;
740 int solverBodyId = -1;
746 if ( solverBodyId < 0 )
767 const int INVALID_SOLVER_BODY_ID = -1;
774 if ( solverBodyId == INVALID_SOLVER_BODY_ID )
797 #else // BT_THREADSAFE
799 int solverBodyIdA = -1;
830 return solverBodyIdA;
831 #endif // BT_THREADSAFE
838 int solverBodyIdA,
int solverBodyIdB,
858 relaxation = infoGlobal.
m_sor;
896 #ifdef COMPUTE_IMPULSE_DENOM
913 #endif //COMPUTE_IMPULSE_DENOM
915 btScalar denom = relaxation/(denom0+denom1+cfm);
991 btScalar rel_vel = vel1Dotn+vel2Dotn;
994 btScalar velocityError = restitution - rel_vel;
1000 positionalError = 0;
1002 velocityError -= penetration *invTimeStep;
1005 positionalError = -penetration * erp*invTimeStep;
1015 solverConstraint.
m_rhs = penetrationImpulse+velocityImpulse;
1021 solverConstraint.
m_rhs = velocityImpulse;
1037 int solverBodyIdA,
int solverBodyIdB,
1104 int rollingFriction=1;
1128 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
1139 setupContactConstraint(solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, relaxation, rel_pos1, rel_pos2);
1152 addTorsionalFrictionConstraint(cp.
m_normalWorldOnB,solverBodyIdA,solverBodyIdB,frictionIndex,cp,cp.
m_combinedSpinningFriction, rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
1162 if (axis0.
length()>0.001)
1165 if (axis1.
length()>0.001)
1197 addFrictionConstraint(cp.
m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation,infoGlobal);
1205 addFrictionConstraint(cp.
m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, infoGlobal);
1214 addFrictionConstraint(cp.
m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, infoGlobal);
1220 addFrictionConstraint(cp.
m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, infoGlobal);
1232 addFrictionConstraint(cp.
m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, infoGlobal, cp.
m_contactMotion1, cp.
m_frictionCFM);
1235 addFrictionConstraint(cp.
m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, infoGlobal, cp.
m_contactMotion2, cp.
m_frictionCFM);
1254 for (i=0;i<numManifolds;i++)
1256 manifold = manifoldPtr[i];
1277 #ifdef BT_ADDITIONAL_DEBUG
1279 for (
int i=0;i<numConstraints;i++)
1287 for (
int b=0;b<numBodies;b++)
1301 for (
int b=0;b<numBodies;b++)
1314 for (
int i=0;i<numManifolds;i++)
1316 if (!manifoldPtr[i]->getBody0()->isStaticOrKinematicObject())
1319 for (
int b=0;b<numBodies;b++)
1322 if (manifoldPtr[i]->getBody0()==bodies[b])
1330 if (!manifoldPtr[i]->getBody1()->isStaticOrKinematicObject())
1333 for (
int b=0;b<numBodies;b++)
1335 if (manifoldPtr[i]->getBody1()==bodies[b])
1344 #endif //BT_ADDITIONAL_DEBUG
1347 for (
int i = 0; i < numBodies; i++)
1353 #endif // BT_THREADSAFE
1364 for (
int i=0;i<numBodies;i++)
1397 for (j=0;j<numConstraints;j++)
1411 int totalNumRows = 0;
1416 for (i=0;i<numConstraints;i++)
1428 if (constraints[i]->isEnabled())
1431 if (constraints[i]->isEnabled())
1447 for (i=0;i<numConstraints;i++)
1510 info2.
cfm = ¤tConstraintRow->
m_cfm;
1574 rel_vel = vel1Dotn+vel2Dotn;
1580 solverConstraint.
m_rhs = penetrationImpulse+velocityImpulse;
1612 for (i=0;i<numNonContactPool;i++)
1616 for (i=0;i<numConstraintPool;i++)
1620 for (i=0;i<numFrictionPool;i++)
1633 btScalar leastSquaresResidual = 0.f;
1644 for (
int j=0; j<numNonContactPool; ++j) {
1654 for (
int j=0; j<numConstraintPool; ++j) {
1661 for (
int j=0; j<numFrictionPool; ++j) {
1678 leastSquaresResidual += residual*residual;
1684 for (
int j=0;j<numConstraints;j++)
1686 if (constraints[j]->isEnabled())
1702 for (
int c=0;c<numPoolConstraints;c++)
1709 leastSquaresResidual += residual*residual;
1713 bool applyFriction =
true;
1726 leastSquaresResidual += residual*residual;
1741 leastSquaresResidual += residual*residual;
1754 for (j=0;j<numPoolConstraints;j++)
1758 leastSquaresResidual += residual*residual;
1766 for (j=0;j<numFrictionPoolConstraints;j++)
1777 leastSquaresResidual += residual*residual;
1784 for (
int j=0;j<numRollingFrictionPoolConstraints;j++)
1791 btScalar rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction*totalImpulse;
1792 if (rollingFrictionMagnitude>rollingFrictionConstraint.
m_friction)
1793 rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction;
1795 rollingFrictionConstraint.
m_lowerLimit = -rollingFrictionMagnitude;
1796 rollingFrictionConstraint.
m_upperLimit = rollingFrictionMagnitude;
1799 leastSquaresResidual += residual*residual;
1805 return leastSquaresResidual;
1817 btScalar leastSquaresResidual =0.f;
1821 for (j=0;j<numPoolConstraints;j++)
1826 leastSquaresResidual += residual*residual;
1829 if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration>=(infoGlobal.
m_numIterations-1))
1831 #ifdef VERBOSE_RESIDUAL_PRINTF
1832 printf(
"residual = %f at iteration #%d\n",leastSquaresResidual,iteration);
1843 BT_PROFILE(
"solveGroupCacheFriendlyIterations");
1851 for (
int iteration = 0 ; iteration<
maxIterations ; iteration++)
1858 #ifdef VERBOSE_RESIDUAL_PRINTF
1876 for (j=0;j<numPoolConstraints;j++)
1895 for (j=0;j<numPoolConstraints;j++)