Hi:
I was goint through the C implementation of the dsplib function dsp_gen_fir. I want to modify it for 40 bit accumulation instead of 32 bit.
Below is a snippet from the C code. My first question is regarding the following two instructions in the snippet:
sum_10 = _packh2(sum1 << 1, sum0 << 1);
sum_32 = _packh2(sum3 << 1, sum2 << 1);
Why the left shift?
Also, since _hill and _loll return unsigned 32 bit integers, sum0, sum1, sum2, and sum3 are generated by additions of 32 unsigned integers, as follows:
sum3 += _hill(r3) + _hill(r1);
sum2 += _loll(r3) + _loll(r1);
sum1 += _hill(r2) + _hill(r0);
sum0 += _loll(r2) + _loll(r0);
They should infact be generated as sum of 32 bit signed integers.
Thanks a lot!
Cheers,
Mushtaq
for (j = 0; j < nr; j += 4) {
sum0 = 0;
sum1 = 0;
sum2 = 0;
sum3 = 0;
_nassert((int)x % 8 == 0);
#pragma MUST_ITERATE(1,,1)
for (i = 0; i < nh; i += 4) {
h_3210 = _mem8_const(&h[i]);
x_3210 = _mem8_const(&x[i + j]);
x_4321 = _mem8_const(&x[i + j + 1]);
x_7654 = _mem8_const(&x[i + j + 4]);
/*-------------------------------------------------------------*/
/* Use modified taps during the last iteration of the loop. */
/*-------------------------------------------------------------*/
if (i >= nh - 4)
h_3210 = h_3210_mod;
h_32 = _hill(h_3210);
h_10 = _loll(h_3210);
r3 = _ddotpl2(x_7654, h_32); // x6h3+x5h2, x5h3+x4h2
r2 = _ddotph2(x_4321, h_32); // x4h3+x3h2, x3h3+x2h2
r1 = _ddotph2(x_4321, h_10); // x4h1+x3h0, x3h1+x2h0
r0 = _ddotpl2(x_3210, h_10); // x2h1+x1h0, x1h1+x0h0
sum3 += _hill(r3) + _hill(r1);
sum2 += _loll(r3) + _loll(r1);
sum1 += _hill(r2) + _hill(r0);
sum0 += _loll(r2) + _loll(r0);
}
sum_10 = _packh2(sum1 << 1, sum0 << 1);
sum_32 = _packh2(sum3 << 1, sum2 << 1);
_mem8(&r[j]) = _itoll(sum_32, sum_10);
}