perp_calc_using_dot_prod

Percentage Accurate: 100.0% → 100.0%
Time: 5.4s
Alternatives: 4
Speedup: 1.2×

Specification

?
\[\left(\left(\left(\left(\left(-1 \leq v1x \land v1x \leq 1\right) \land \left(-1 \leq v1y \land v1y \leq 1\right)\right) \land \left(-1 \leq v1z \land v1z \leq 1\right)\right) \land \left(-1 \leq v2x \land v2x \leq 1\right)\right) \land \left(-1 \leq v2y \land v2y \leq 1\right)\right) \land \left(-1 \leq v2z \land v2z \leq 1\right)\]
\[\begin{array}{l} \\ \left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \end{array} \]
(FPCore (v1x v1y v1z v2x v2y v2z)
 :precision binary64
 (fabs (+ (+ (* v1x v2x) (* v1y v2y)) (* v1z v2z))))
double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
	return fabs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)));
}
real(8) function code(v1x, v1y, v1z, v2x, v2y, v2z)
    real(8), intent (in) :: v1x
    real(8), intent (in) :: v1y
    real(8), intent (in) :: v1z
    real(8), intent (in) :: v2x
    real(8), intent (in) :: v2y
    real(8), intent (in) :: v2z
    code = abs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)))
end function
public static double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
	return Math.abs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)));
}
def code(v1x, v1y, v1z, v2x, v2y, v2z):
	return math.fabs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)))
function code(v1x, v1y, v1z, v2x, v2y, v2z)
	return abs(Float64(Float64(Float64(v1x * v2x) + Float64(v1y * v2y)) + Float64(v1z * v2z)))
end
function tmp = code(v1x, v1y, v1z, v2x, v2y, v2z)
	tmp = abs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)));
end
code[v1x_, v1y_, v1z_, v2x_, v2y_, v2z_] := N[Abs[N[(N[(N[(v1x * v2x), $MachinePrecision] + N[(v1y * v2y), $MachinePrecision]), $MachinePrecision] + N[(v1z * v2z), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]
\begin{array}{l}

\\
\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right|
\end{array}

Sampling outcomes in binary64 precision:

Local Percentage Accuracy vs ?

The average percentage accuracy by input value. Horizontal axis shows value of an input variable; the variable is choosen in the title. Vertical axis is accuracy; higher is better. Red represent the original program, while blue represents Herbie's suggestion. These can be toggled with buttons below the plot. The line is an average while dots represent individual samples.

Accuracy vs Speed?

Herbie found 4 alternatives:

AlternativeAccuracySpeedup
The accuracy (vertical axis) and speed (horizontal axis) of each alternatives. Up and to the right is better. The red square shows the initial program, and each blue circle shows an alternative.The line shows the best available speed-accuracy tradeoffs.

Initial Program: 100.0% accurate, 1.0× speedup?

\[\begin{array}{l} \\ \left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \end{array} \]
(FPCore (v1x v1y v1z v2x v2y v2z)
 :precision binary64
 (fabs (+ (+ (* v1x v2x) (* v1y v2y)) (* v1z v2z))))
double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
	return fabs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)));
}
real(8) function code(v1x, v1y, v1z, v2x, v2y, v2z)
    real(8), intent (in) :: v1x
    real(8), intent (in) :: v1y
    real(8), intent (in) :: v1z
    real(8), intent (in) :: v2x
    real(8), intent (in) :: v2y
    real(8), intent (in) :: v2z
    code = abs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)))
end function
public static double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
	return Math.abs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)));
}
def code(v1x, v1y, v1z, v2x, v2y, v2z):
	return math.fabs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)))
function code(v1x, v1y, v1z, v2x, v2y, v2z)
	return abs(Float64(Float64(Float64(v1x * v2x) + Float64(v1y * v2y)) + Float64(v1z * v2z)))
end
function tmp = code(v1x, v1y, v1z, v2x, v2y, v2z)
	tmp = abs((((v1x * v2x) + (v1y * v2y)) + (v1z * v2z)));
end
code[v1x_, v1y_, v1z_, v2x_, v2y_, v2z_] := N[Abs[N[(N[(N[(v1x * v2x), $MachinePrecision] + N[(v1y * v2y), $MachinePrecision]), $MachinePrecision] + N[(v1z * v2z), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]
\begin{array}{l}

\\
\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right|
\end{array}

Alternative 1: 100.0% accurate, 1.2× speedup?

\[\begin{array}{l} \\ \left|\mathsf{fma}\left(v2x, v1x, \mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)\right)\right| \end{array} \]
(FPCore (v1x v1y v1z v2x v2y v2z)
 :precision binary64
 (fabs (fma v2x v1x (fma v2z v1z (* v2y v1y)))))
double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
	return fabs(fma(v2x, v1x, fma(v2z, v1z, (v2y * v1y))));
}
function code(v1x, v1y, v1z, v2x, v2y, v2z)
	return abs(fma(v2x, v1x, fma(v2z, v1z, Float64(v2y * v1y))))
end
code[v1x_, v1y_, v1z_, v2x_, v2y_, v2z_] := N[Abs[N[(v2x * v1x + N[(v2z * v1z + N[(v2y * v1y), $MachinePrecision]), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]
\begin{array}{l}

\\
\left|\mathsf{fma}\left(v2x, v1x, \mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)\right)\right|
\end{array}
Derivation
  1. Initial program 100.0%

    \[\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \]
  2. Add Preprocessing
  3. Step-by-step derivation
    1. lift-+.f64N/A

      \[\leadsto \left|\color{blue}{\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z}\right| \]
    2. lift-+.f64N/A

      \[\leadsto \left|\color{blue}{\left(v1x \cdot v2x + v1y \cdot v2y\right)} + v1z \cdot v2z\right| \]
    3. associate-+l+N/A

      \[\leadsto \left|\color{blue}{v1x \cdot v2x + \left(v1y \cdot v2y + v1z \cdot v2z\right)}\right| \]
    4. lift-*.f64N/A

      \[\leadsto \left|\color{blue}{v1x \cdot v2x} + \left(v1y \cdot v2y + v1z \cdot v2z\right)\right| \]
    5. *-commutativeN/A

      \[\leadsto \left|\color{blue}{v2x \cdot v1x} + \left(v1y \cdot v2y + v1z \cdot v2z\right)\right| \]
    6. lower-fma.f64N/A

      \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2x, v1x, v1y \cdot v2y + v1z \cdot v2z\right)}\right| \]
    7. +-commutativeN/A

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \color{blue}{v1z \cdot v2z + v1y \cdot v2y}\right)\right| \]
    8. lift-*.f64N/A

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \color{blue}{v1z \cdot v2z} + v1y \cdot v2y\right)\right| \]
    9. *-commutativeN/A

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \color{blue}{v2z \cdot v1z} + v1y \cdot v2y\right)\right| \]
    10. lower-fma.f64100.0

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \color{blue}{\mathsf{fma}\left(v2z, v1z, v1y \cdot v2y\right)}\right)\right| \]
    11. lift-*.f64N/A

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \mathsf{fma}\left(v2z, v1z, \color{blue}{v1y \cdot v2y}\right)\right)\right| \]
    12. *-commutativeN/A

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \mathsf{fma}\left(v2z, v1z, \color{blue}{v2y \cdot v1y}\right)\right)\right| \]
    13. lower-*.f64100.0

      \[\leadsto \left|\mathsf{fma}\left(v2x, v1x, \mathsf{fma}\left(v2z, v1z, \color{blue}{v2y \cdot v1y}\right)\right)\right| \]
  4. Applied rewrites100.0%

    \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2x, v1x, \mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)\right)}\right| \]
  5. Add Preprocessing

Alternative 2: 89.4% accurate, 0.7× speedup?

\[\begin{array}{l} \\ \begin{array}{l} \mathbf{if}\;v1y \cdot v2y \leq -2 \cdot 10^{-254} \lor \neg \left(v1y \cdot v2y \leq 2 \cdot 10^{-253}\right):\\ \;\;\;\;\left|\mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)\right|\\ \mathbf{else}:\\ \;\;\;\;\left|\mathsf{fma}\left(v2x, v1x, v1z \cdot v2z\right)\right|\\ \end{array} \end{array} \]
(FPCore (v1x v1y v1z v2x v2y v2z)
 :precision binary64
 (if (or (<= (* v1y v2y) -2e-254) (not (<= (* v1y v2y) 2e-253)))
   (fabs (fma v2z v1z (* v2y v1y)))
   (fabs (fma v2x v1x (* v1z v2z)))))
double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
	double tmp;
	if (((v1y * v2y) <= -2e-254) || !((v1y * v2y) <= 2e-253)) {
		tmp = fabs(fma(v2z, v1z, (v2y * v1y)));
	} else {
		tmp = fabs(fma(v2x, v1x, (v1z * v2z)));
	}
	return tmp;
}
function code(v1x, v1y, v1z, v2x, v2y, v2z)
	tmp = 0.0
	if ((Float64(v1y * v2y) <= -2e-254) || !(Float64(v1y * v2y) <= 2e-253))
		tmp = abs(fma(v2z, v1z, Float64(v2y * v1y)));
	else
		tmp = abs(fma(v2x, v1x, Float64(v1z * v2z)));
	end
	return tmp
end
code[v1x_, v1y_, v1z_, v2x_, v2y_, v2z_] := If[Or[LessEqual[N[(v1y * v2y), $MachinePrecision], -2e-254], N[Not[LessEqual[N[(v1y * v2y), $MachinePrecision], 2e-253]], $MachinePrecision]], N[Abs[N[(v2z * v1z + N[(v2y * v1y), $MachinePrecision]), $MachinePrecision]], $MachinePrecision], N[Abs[N[(v2x * v1x + N[(v1z * v2z), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]]
\begin{array}{l}

\\
\begin{array}{l}
\mathbf{if}\;v1y \cdot v2y \leq -2 \cdot 10^{-254} \lor \neg \left(v1y \cdot v2y \leq 2 \cdot 10^{-253}\right):\\
\;\;\;\;\left|\mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)\right|\\

\mathbf{else}:\\
\;\;\;\;\left|\mathsf{fma}\left(v2x, v1x, v1z \cdot v2z\right)\right|\\


\end{array}
\end{array}
Derivation
  1. Split input into 2 regimes
  2. if (*.f64 v1y v2y) < -1.9999999999999998e-254 or 2.0000000000000001e-253 < (*.f64 v1y v2y)

    1. Initial program 99.9%

      \[\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \]
    2. Add Preprocessing
    3. Taylor expanded in v1x around 0

      \[\leadsto \left|\color{blue}{v1y \cdot v2y + v1z \cdot v2z}\right| \]
    4. Step-by-step derivation
      1. +-commutativeN/A

        \[\leadsto \left|\color{blue}{v1z \cdot v2z + v1y \cdot v2y}\right| \]
      2. *-commutativeN/A

        \[\leadsto \left|\color{blue}{v2z \cdot v1z} + v1y \cdot v2y\right| \]
      3. lower-fma.f64N/A

        \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2z, v1z, v1y \cdot v2y\right)}\right| \]
      4. *-commutativeN/A

        \[\leadsto \left|\mathsf{fma}\left(v2z, v1z, \color{blue}{v2y \cdot v1y}\right)\right| \]
      5. lower-*.f6483.9

        \[\leadsto \left|\mathsf{fma}\left(v2z, v1z, \color{blue}{v2y \cdot v1y}\right)\right| \]
    5. Applied rewrites83.9%

      \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)}\right| \]

    if -1.9999999999999998e-254 < (*.f64 v1y v2y) < 2.0000000000000001e-253

    1. Initial program 100.0%

      \[\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \]
    2. Add Preprocessing
    3. Taylor expanded in v1y around 0

      \[\leadsto \left|\color{blue}{v1x \cdot v2x + v1z \cdot v2z}\right| \]
    4. Step-by-step derivation
      1. +-commutativeN/A

        \[\leadsto \left|\color{blue}{v1z \cdot v2z + v1x \cdot v2x}\right| \]
      2. *-commutativeN/A

        \[\leadsto \left|\color{blue}{v2z \cdot v1z} + v1x \cdot v2x\right| \]
      3. lower-fma.f64N/A

        \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2z, v1z, v1x \cdot v2x\right)}\right| \]
      4. *-commutativeN/A

        \[\leadsto \left|\mathsf{fma}\left(v2z, v1z, \color{blue}{v2x \cdot v1x}\right)\right| \]
      5. lower-*.f6492.3

        \[\leadsto \left|\mathsf{fma}\left(v2z, v1z, \color{blue}{v2x \cdot v1x}\right)\right| \]
    5. Applied rewrites92.3%

      \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2z, v1z, v2x \cdot v1x\right)}\right| \]
    6. Step-by-step derivation
      1. Applied rewrites92.3%

        \[\leadsto \left|\mathsf{fma}\left(v2x, \color{blue}{v1x}, v1z \cdot v2z\right)\right| \]
    7. Recombined 2 regimes into one program.
    8. Final simplification89.5%

      \[\leadsto \begin{array}{l} \mathbf{if}\;v1y \cdot v2y \leq -2 \cdot 10^{-254} \lor \neg \left(v1y \cdot v2y \leq 2 \cdot 10^{-253}\right):\\ \;\;\;\;\left|\mathsf{fma}\left(v2z, v1z, v2y \cdot v1y\right)\right|\\ \mathbf{else}:\\ \;\;\;\;\left|\mathsf{fma}\left(v2x, v1x, v1z \cdot v2z\right)\right|\\ \end{array} \]
    9. Add Preprocessing

    Alternative 3: 71.8% accurate, 1.7× speedup?

    \[\begin{array}{l} \\ \left|\mathsf{fma}\left(v2x, v1x, v1z \cdot v2z\right)\right| \end{array} \]
    (FPCore (v1x v1y v1z v2x v2y v2z)
     :precision binary64
     (fabs (fma v2x v1x (* v1z v2z))))
    double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
    	return fabs(fma(v2x, v1x, (v1z * v2z)));
    }
    
    function code(v1x, v1y, v1z, v2x, v2y, v2z)
    	return abs(fma(v2x, v1x, Float64(v1z * v2z)))
    end
    
    code[v1x_, v1y_, v1z_, v2x_, v2y_, v2z_] := N[Abs[N[(v2x * v1x + N[(v1z * v2z), $MachinePrecision]), $MachinePrecision]], $MachinePrecision]
    
    \begin{array}{l}
    
    \\
    \left|\mathsf{fma}\left(v2x, v1x, v1z \cdot v2z\right)\right|
    \end{array}
    
    Derivation
    1. Initial program 100.0%

      \[\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \]
    2. Add Preprocessing
    3. Taylor expanded in v1y around 0

      \[\leadsto \left|\color{blue}{v1x \cdot v2x + v1z \cdot v2z}\right| \]
    4. Step-by-step derivation
      1. +-commutativeN/A

        \[\leadsto \left|\color{blue}{v1z \cdot v2z + v1x \cdot v2x}\right| \]
      2. *-commutativeN/A

        \[\leadsto \left|\color{blue}{v2z \cdot v1z} + v1x \cdot v2x\right| \]
      3. lower-fma.f64N/A

        \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2z, v1z, v1x \cdot v2x\right)}\right| \]
      4. *-commutativeN/A

        \[\leadsto \left|\mathsf{fma}\left(v2z, v1z, \color{blue}{v2x \cdot v1x}\right)\right| \]
      5. lower-*.f6472.2

        \[\leadsto \left|\mathsf{fma}\left(v2z, v1z, \color{blue}{v2x \cdot v1x}\right)\right| \]
    5. Applied rewrites72.2%

      \[\leadsto \left|\color{blue}{\mathsf{fma}\left(v2z, v1z, v2x \cdot v1x\right)}\right| \]
    6. Step-by-step derivation
      1. Applied rewrites72.2%

        \[\leadsto \left|\mathsf{fma}\left(v2x, \color{blue}{v1x}, v1z \cdot v2z\right)\right| \]
      2. Add Preprocessing

      Alternative 4: 43.3% accurate, 3.0× speedup?

      \[\begin{array}{l} \\ \left|v2z \cdot v1z\right| \end{array} \]
      (FPCore (v1x v1y v1z v2x v2y v2z) :precision binary64 (fabs (* v2z v1z)))
      double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
      	return fabs((v2z * v1z));
      }
      
      real(8) function code(v1x, v1y, v1z, v2x, v2y, v2z)
          real(8), intent (in) :: v1x
          real(8), intent (in) :: v1y
          real(8), intent (in) :: v1z
          real(8), intent (in) :: v2x
          real(8), intent (in) :: v2y
          real(8), intent (in) :: v2z
          code = abs((v2z * v1z))
      end function
      
      public static double code(double v1x, double v1y, double v1z, double v2x, double v2y, double v2z) {
      	return Math.abs((v2z * v1z));
      }
      
      def code(v1x, v1y, v1z, v2x, v2y, v2z):
      	return math.fabs((v2z * v1z))
      
      function code(v1x, v1y, v1z, v2x, v2y, v2z)
      	return abs(Float64(v2z * v1z))
      end
      
      function tmp = code(v1x, v1y, v1z, v2x, v2y, v2z)
      	tmp = abs((v2z * v1z));
      end
      
      code[v1x_, v1y_, v1z_, v2x_, v2y_, v2z_] := N[Abs[N[(v2z * v1z), $MachinePrecision]], $MachinePrecision]
      
      \begin{array}{l}
      
      \\
      \left|v2z \cdot v1z\right|
      \end{array}
      
      Derivation
      1. Initial program 100.0%

        \[\left|\left(v1x \cdot v2x + v1y \cdot v2y\right) + v1z \cdot v2z\right| \]
      2. Add Preprocessing
      3. Taylor expanded in v1z around inf

        \[\leadsto \left|\color{blue}{v1z \cdot v2z}\right| \]
      4. Step-by-step derivation
        1. *-commutativeN/A

          \[\leadsto \left|\color{blue}{v2z \cdot v1z}\right| \]
        2. lower-*.f6443.0

          \[\leadsto \left|\color{blue}{v2z \cdot v1z}\right| \]
      5. Applied rewrites43.0%

        \[\leadsto \left|\color{blue}{v2z \cdot v1z}\right| \]
      6. Add Preprocessing

      Reproduce

      ?
      herbie shell --seed 1 
      (FPCore (v1x v1y v1z v2x v2y v2z)
        :name "perp_calc_using_dot_prod"
        :precision binary64
        :pre (and (and (and (and (and (and (<= -1.0 v1x) (<= v1x 1.0)) (and (<= -1.0 v1y) (<= v1y 1.0))) (and (<= -1.0 v1z) (<= v1z 1.0))) (and (<= -1.0 v2x) (<= v2x 1.0))) (and (<= -1.0 v2y) (<= v2y 1.0))) (and (<= -1.0 v2z) (<= v2z 1.0)))
        (fabs (+ (+ (* v1x v2x) (* v1y v2y)) (* v1z v2z))))