@@ -556,62 +556,64 @@ class ThreeBoxApproxPass final : public Pass {
556
556
skvx::Vec<4 , uint32_t >* buffer0Cursor = fBuffer0Cursor ;
557
557
skvx::Vec<4 , uint32_t >* buffer1Cursor = fBuffer1Cursor ;
558
558
skvx::Vec<4 , uint32_t >* buffer2Cursor = fBuffer2Cursor ;
559
- v4u32 sum0 = __lsx_vld (fSum0 , 0 ); // same as skvx::Vec<4, uint32_t>::Load(fSum0);
560
- v4u32 sum1 = __lsx_vld (fSum1 , 0 );
561
- v4u32 sum2 = __lsx_vld (fSum2 , 0 );
559
+ v4u32 sum0 = (v4u32) __lsx_vld (fSum0 , 0 ); // same as skvx::Vec<4, uint32_t>::Load(fSum0);
560
+ v4u32 sum1 = (v4u32) __lsx_vld (fSum1 , 0 );
561
+ v4u32 sum2 = (v4u32) __lsx_vld (fSum2 , 0 );
562
562
563
563
auto processValue = [&](v4u32& vLeadingEdge){
564
564
sum0 += vLeadingEdge;
565
565
sum1 += sum0;
566
566
sum2 += sum1;
567
567
568
- v4u32 divisorFactor = __lsx_vreplgr2vr_w (fDivider .divisorFactor ());
569
- v4u32 blurred = __lsx_vmuh_w (divisorFactor, sum2);
568
+ v4u32 divisorFactor = (v4u32) __lsx_vreplgr2vr_w (fDivider .divisorFactor ());
569
+ v4u32 blurred = (v4u32) __lsx_vmuh_w ((__m128i) divisorFactor, (__m128i) sum2);
570
570
571
- v4u32 buffer2Value = __lsx_vld (buffer2Cursor, 0 ); // Not fBuffer0Cursor, out of bounds.
571
+ v4u32 buffer2Value = (v4u32) __lsx_vld (buffer2Cursor, 0 ); // Not fBuffer0Cursor, out of bounds.
572
572
sum2 -= buffer2Value;
573
- __lsx_vst (sum1, (void *)buffer2Cursor, 0 );
573
+ __lsx_vst ((__m128i) sum1, (void *)buffer2Cursor, 0 );
574
574
buffer2Cursor = (buffer2Cursor + 1 ) < fBuffersEnd ? buffer2Cursor + 1 : fBuffer2 ;
575
- v4u32 buffer1Value = __lsx_vld (buffer1Cursor, 0 );
575
+ v4u32 buffer1Value = (v4u32) __lsx_vld (buffer1Cursor, 0 );
576
576
sum1 -= buffer1Value;
577
- __lsx_vst (sum0, (void *)buffer1Cursor, 0 );
577
+ __lsx_vst ((__m128i) sum0, (void *)buffer1Cursor, 0 );
578
578
buffer1Cursor = (buffer1Cursor + 1 ) < fBuffer2 ? buffer1Cursor + 1 : fBuffer1 ;
579
- v4u32 buffer0Value = __lsx_vld (buffer0Cursor, 0 );
579
+ v4u32 buffer0Value = (v4u32) __lsx_vld (buffer0Cursor, 0 );
580
580
sum0 -= buffer0Value;
581
- __lsx_vst (vLeadingEdge, (void *)buffer0Cursor, 0 );
581
+ __lsx_vst ((__m128i) vLeadingEdge, (void *)buffer0Cursor, 0 );
582
582
buffer0Cursor = (buffer0Cursor + 1 ) < fBuffer1 ? buffer0Cursor + 1 : fBuffer0 ;
583
583
584
584
v16u8 shuf = {0x0 ,0x4 ,0x8 ,0xc ,0x0 };
585
- v16u8 ret = __lsx_vshuf_b (blurred, blurred, shuf);
585
+ v16u8 ret = (v16u8) __lsx_vshuf_b ((__m128i) blurred, (__m128i) blurred, (__m128i) shuf);
586
586
return ret;
587
587
};
588
588
589
- v4u32 zero = __lsx_vldi (0x0 );
589
+ v4u32 zero = (v4u32) __lsx_vldi (0x0 );
590
590
if (!src32 && !dst32) {
591
591
while (n --> 0 ) {
592
592
(void )processValue (zero);
593
593
}
594
594
} else if (src32 && !dst32) {
595
595
while (n --> 0 ) {
596
- v4u32 edge = __lsx_vinsgr2vr_w (zero, *src32, 0 );
597
- edge = __lsx_vilvl_b (zero, edge);
598
- edge = __lsx_vilvl_h (zero, edge);
596
+ v4u32 edge = (v4u32) __lsx_vinsgr2vr_w ((__m128i) zero, *src32, 0 );
597
+ edge = (v4u32) __lsx_vilvl_b ((__m128i) zero, (__m128i) edge);
598
+ edge = (v4u32) __lsx_vilvl_h ((__m128i) zero, (__m128i) edge);
599
599
(void )processValue (edge);
600
600
src32 += srcStride;
601
601
}
602
602
} else if (!src32 && dst32) {
603
603
while (n --> 0 ) {
604
- v4u32 ret = processValue (zero);
605
- __lsx_vstelm_w (ret, dst32, 0 , 0 ); // 3rd is offset, 4th is idx.
604
+ v16u8 ret_vec = processValue (zero);
605
+ v4u32 ret = (v4u32)ret_vec;
606
+ __lsx_vstelm_w ((__m128i)ret, dst32, 0 , 0 ); // 3rd is offset, 4th is idx.
606
607
dst32 += dstStride;
607
608
}
608
609
} else if (src32 && dst32) {
609
610
while (n --> 0 ) {
610
- v4u32 edge = __lsx_vinsgr2vr_w (zero, *src32, 0 );
611
- edge = __lsx_vilvl_b (zero, edge);
612
- edge = __lsx_vilvl_h (zero, edge);
613
- v4u32 ret = processValue (edge);
614
- __lsx_vstelm_w (ret, dst32, 0 , 0 );
611
+ v4u32 edge = (v4u32)__lsx_vinsgr2vr_w ((__m128i)zero, *src32, 0 );
612
+ edge = (v4u32)__lsx_vilvl_b ((__m128i)zero, (__m128i)edge);
613
+ edge = (v4u32)__lsx_vilvl_h ((__m128i)zero, (__m128i)edge);
614
+ v16u8 ret_vec = processValue (edge);
615
+ v4u32 ret = (v4u32)ret_vec;
616
+ __lsx_vstelm_w ((__m128i)ret, dst32, 0 , 0 );
615
617
src32 += srcStride;
616
618
dst32 += dstStride;
617
619
}
@@ -622,9 +624,9 @@ class ThreeBoxApproxPass final : public Pass {
622
624
fBuffer1Cursor = buffer1Cursor;
623
625
fBuffer2Cursor = buffer2Cursor;
624
626
625
- __lsx_vst (sum0, fSum0 , 0 );
626
- __lsx_vst (sum1, fSum1 , 0 );
627
- __lsx_vst (sum2, fSum2 , 0 );
627
+ __lsx_vst ((__m128i) sum0, fSum0 , 0 );
628
+ __lsx_vst ((__m128i) sum1, fSum1 , 0 );
629
+ __lsx_vst ((__m128i) sum2, fSum2 , 0 );
628
630
#else
629
631
skvx::Vec<4 , uint32_t >* buffer0Cursor = fBuffer0Cursor ;
630
632
skvx::Vec<4 , uint32_t >* buffer1Cursor = fBuffer1Cursor ;
0 commit comments