Ignacio Castaño on Nostr: OK, I narrowed it down to: //t2.x ^= -i16(((sel.x >> 2) & u16(1))); //t2.y ^= ...
OK, I narrowed it down to:
//t2.x ^= -i16(((sel.x >> 2) & u16(1)));
//t2.y ^= -i16(((sel.y >> 2) & u16(1)));
if ((sel.x & u16(4)) != u16(0)) t2.x = ~t2.x;
if ((sel.y & u16(4)) != u16(0)) t2.y = ~t2.y;
//if (sel.x >= u16(4)) t2.x = ~t2.x;
//if (sel.y >= u16(4)) t2.y = ~t2.y;
somehow the uncommented variant is 30% faster on Mali-G57
//t2.x ^= -i16(((sel.x >> 2) & u16(1)));
//t2.y ^= -i16(((sel.y >> 2) & u16(1)));
if ((sel.x & u16(4)) != u16(0)) t2.x = ~t2.x;
if ((sel.y & u16(4)) != u16(0)) t2.y = ~t2.y;
//if (sel.x >= u16(4)) t2.x = ~t2.x;
//if (sel.y >= u16(4)) t2.y = ~t2.y;
somehow the uncommented variant is 30% faster on Mali-G57