[Bug 2101084] Re: GCC produces wrong code for arm64+sve in some cases

gerald.yang 2101084 at bugs.launchpad.net
Wed Dec 17 05:40:33 UTC 2025


** Description changed:

  [Impact]
  This issue affects SVE vectorization on arm64 platforms, specifically in cases where bitwise-not operations are applied during optimization.
  
  [Fix]
  This issue has been resolved by an upstream patch.
  
  commit 78380fd7f743e23dfdf013d68a2f0347e1511550
  Author: Richard Sandiford <richard.sandiford at arm.com>
  Date: Tue Mar 4 10:44:35 2025 +0000
  
-     Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]
+     Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]
  
-     There was an embarrassing typo in the folding of BIT_NOT_EXPR for
-     POLY_INT_CSTs: it used - rather than ~ on the poly_int.  Not sure
-     how that happened, but it might have been due to the way that
-     ~x is implemented as -1 - x internally.
+     There was an embarrassing typo in the folding of BIT_NOT_EXPR for
+     POLY_INT_CSTs: it used - rather than ~ on the poly_int.  Not sure
+     how that happened, but it might have been due to the way that
+     ~x is implemented as -1 - x internally.
  
-     gcc/
-             PR tree-optimization/118976
-             * fold-const.cc (const_unop): Use ~ rather than - for BIT_NOT_EXPR.
-             * config/aarch64/aarch64.cc (aarch64_test_sve_folding): New function.
-             (aarch64_run_selftests): Run it.
+     gcc/
+             PR tree-optimization/118976
+             * fold-const.cc (const_unop): Use ~ rather than - for BIT_NOT_EXPR.
+             * config/aarch64/aarch64.cc (aarch64_test_sve_folding): New function.
+             (aarch64_run_selftests): Run it.
  
  [Test Plan]
  1. Launch an instance using the latest generation of Graviton processors (Graviton4).
- 2. Compile the code provided in the following link using the command `gcc -O3 -march=armv8.1-a+sve`
- https://godbolt.org/z/c99bMjene
+ 2. Compile the following code using the command `gcc -O3 -march=armv8.1-a+sve`:
+ 
+ #include <stdint.h>
+ #include <stdio.h>
+ 
+ 
+ #ifndef NCOUNTS
+ #define NCOUNTS 2
+ #endif
+ typedef struct {
+    uint32_t state[5];
+    uint32_t count[NCOUNTS];
+    unsigned char buffer[64];
+ } SHA1_CTX;
+ 
+ 
+ void finalcount_av(SHA1_CTX *restrict ctx, unsigned char *restrict finalcount) {
+    // ctx->count is:  uint32_t count[2];
+    int count_idx;
+    for (int i = 0; i < 4*NCOUNTS; i++) {
+        count_idx = (4*NCOUNTS - i - 1)/4; // generic but equivalent for NCOUNTS==2.
+        finalcount[i] = (unsigned char)((ctx->count[count_idx] >> ((3-(i & 3)) * 8) ) & 255);
+    }
+ }
+ 
+ 
+ void finalcount_bv(SHA1_CTX *restrict ctx, unsigned char *restrict finalcount) {
+    for (int i=0; i < 4*NCOUNTS; i += 4) {
+        int ci = (4*NCOUNTS - i - 1)/4;
+        finalcount[i+0] = (unsigned char)((ctx->count[ci] >> (3 * 8) ) & 255);
+        finalcount[i+1] = (unsigned char)((ctx->count[ci] >> (2 * 8) ) & 255);
+        finalcount[i+2] = (unsigned char)((ctx->count[ci] >> (1 * 8) ) & 255);
+        finalcount[i+3] = (unsigned char)((ctx->count[ci] >> (0 * 8) ) & 255);
+    }
+ }
+ 
+ 
+ int main() {
+    unsigned char fa[NCOUNTS*4];
+    unsigned char fb[NCOUNTS*4];
+    uint32_t *for_print;
+    int i;
+   
+    SHA1_CTX ctx;
+    ctx.count[0] = 0xaaaaaa00;
+    ctx.count[1] = 0xbbbbbb00;
+    if (NCOUNTS >2 ) ctx.count[2] = 0xcccccc00;
+    if (NCOUNTS >3 ) ctx.count[3] = 0xdddddd00;
+    finalcount_av(&ctx, fa);
+    finalcount_bv(&ctx, fb);
+ 
+ 
+    int ok = 1;
+    for (i=0; i<NCOUNTS*4; i++) {
+        ok &= fa[i] == fb[i];
+    }
+    if (!ok) {
+        for_print = (uint32_t*)fb;
+        printf("ERROR: expected ");
+        for (i=0; i<NCOUNTS; i++) {
+            printf("0x%08x ",for_print[i]);
+        }
+        for_print = (uint32_t*)fa;
+        printf("but got ");
+        for (i=0; i<NCOUNTS; i++) {
+            printf("0x%08x ",for_print[i]);
+        }
+        printf("\n");
+        return 1;
+    } else {
+        for_print = (uint32_t*)fa;
+        printf("PASS: got ");
+        for (i=0; i<NCOUNTS; i++) {
+            printf("0x%08x ",for_print[i]);
+        }
+        printf("as expected\n");
+        return 0;
+    }
+ }
+ 
  3. Verify that the execution output does not contain the string "ERROR".
  
  [Where problems could occur]
  The issue is caused by a typo. If any regressions occur, they are expected to impact only specific partial instructions under certain scenarios, rather than disrupting the overall functionality.

-- 
You received this bug notification because you are a member of Ubuntu
Foundations Bugs, which is subscribed to gcc-10 in Ubuntu.
https://bugs.launchpad.net/bugs/2101084

Title:
  GCC produces wrong code for arm64+sve in some cases

Status in gcc:
  Fix Released
Status in Ubuntu Pro:
  In Progress
Status in Ubuntu Pro 20.04 series:
  In Progress
Status in gcc-10 package in Ubuntu:
  New
Status in gcc-11 package in Ubuntu:
  New
Status in gcc-13 package in Ubuntu:
  New
Status in gcc-14 package in Ubuntu:
  New
Status in gcc-8 package in Ubuntu:
  Won't Fix
Status in gcc-9 package in Ubuntu:
  New
Status in gcc-10 source package in Focal:
  Won't Fix
Status in gcc-8 source package in Focal:
  Won't Fix
Status in gcc-9 source package in Focal:
  Won't Fix
Status in gcc-10 source package in Jammy:
  New
Status in gcc-11 source package in Jammy:
  In Progress
Status in gcc-9 source package in Jammy:
  New
Status in gcc-10 source package in Noble:
  New
Status in gcc-11 source package in Noble:
  New
Status in gcc-13 source package in Noble:
  In Progress
Status in gcc-14 source package in Noble:
  New
Status in gcc-9 source package in Noble:
  New
Status in gcc-11 source package in Oracular:
  Won't Fix
Status in gcc-13 source package in Oracular:
  Won't Fix
Status in gcc-14 source package in Oracular:
  Won't Fix
Status in gcc-11 source package in Plucky:
  New
Status in gcc-13 source package in Plucky:
  Won't Fix
Status in gcc-14 source package in Plucky:
  Won't Fix
Status in gcc-11 source package in Questing:
  New
Status in gcc-13 source package in Questing:
  Won't Fix
Status in gcc-14 source package in Questing:
  Won't Fix

Bug description:
  [Impact]
  This issue affects SVE vectorization on arm64 platforms, specifically in cases where bitwise-not operations are applied during optimization.

  [Fix]
  This issue has been resolved by an upstream patch.

  commit 78380fd7f743e23dfdf013d68a2f0347e1511550
  Author: Richard Sandiford <richard.sandiford at arm.com>
  Date: Tue Mar 4 10:44:35 2025 +0000

      Fix folding of BIT_NOT_EXPR for POLY_INT_CST [PR118976]

      There was an embarrassing typo in the folding of BIT_NOT_EXPR for
      POLY_INT_CSTs: it used - rather than ~ on the poly_int.  Not sure
      how that happened, but it might have been due to the way that
      ~x is implemented as -1 - x internally.

      gcc/
              PR tree-optimization/118976
              * fold-const.cc (const_unop): Use ~ rather than - for BIT_NOT_EXPR.
              * config/aarch64/aarch64.cc (aarch64_test_sve_folding): New function.
              (aarch64_run_selftests): Run it.

  [Test Plan]
  1. Launch an instance using the latest generation of Graviton processors (Graviton4).
  2. Compile the following code using the command `gcc -O3 -march=armv8.1-a+sve`:

  #include <stdint.h>
  #include <stdio.h>

  
  #ifndef NCOUNTS
  #define NCOUNTS 2
  #endif
  typedef struct {
     uint32_t state[5];
     uint32_t count[NCOUNTS];
     unsigned char buffer[64];
  } SHA1_CTX;

  
  void finalcount_av(SHA1_CTX *restrict ctx, unsigned char *restrict finalcount) {
     // ctx->count is:  uint32_t count[2];
     int count_idx;
     for (int i = 0; i < 4*NCOUNTS; i++) {
         count_idx = (4*NCOUNTS - i - 1)/4; // generic but equivalent for NCOUNTS==2.
         finalcount[i] = (unsigned char)((ctx->count[count_idx] >> ((3-(i & 3)) * 8) ) & 255);
     }
  }

  
  void finalcount_bv(SHA1_CTX *restrict ctx, unsigned char *restrict finalcount) {
     for (int i=0; i < 4*NCOUNTS; i += 4) {
         int ci = (4*NCOUNTS - i - 1)/4;
         finalcount[i+0] = (unsigned char)((ctx->count[ci] >> (3 * 8) ) & 255);
         finalcount[i+1] = (unsigned char)((ctx->count[ci] >> (2 * 8) ) & 255);
         finalcount[i+2] = (unsigned char)((ctx->count[ci] >> (1 * 8) ) & 255);
         finalcount[i+3] = (unsigned char)((ctx->count[ci] >> (0 * 8) ) & 255);
     }
  }

  
  int main() {
     unsigned char fa[NCOUNTS*4];
     unsigned char fb[NCOUNTS*4];
     uint32_t *for_print;
     int i;
    
     SHA1_CTX ctx;
     ctx.count[0] = 0xaaaaaa00;
     ctx.count[1] = 0xbbbbbb00;
     if (NCOUNTS >2 ) ctx.count[2] = 0xcccccc00;
     if (NCOUNTS >3 ) ctx.count[3] = 0xdddddd00;
     finalcount_av(&ctx, fa);
     finalcount_bv(&ctx, fb);

  
     int ok = 1;
     for (i=0; i<NCOUNTS*4; i++) {
         ok &= fa[i] == fb[i];
     }
     if (!ok) {
         for_print = (uint32_t*)fb;
         printf("ERROR: expected ");
         for (i=0; i<NCOUNTS; i++) {
             printf("0x%08x ",for_print[i]);
         }
         for_print = (uint32_t*)fa;
         printf("but got ");
         for (i=0; i<NCOUNTS; i++) {
             printf("0x%08x ",for_print[i]);
         }
         printf("\n");
         return 1;
     } else {
         for_print = (uint32_t*)fa;
         printf("PASS: got ");
         for (i=0; i<NCOUNTS; i++) {
             printf("0x%08x ",for_print[i]);
         }
         printf("as expected\n");
         return 0;
     }
  }

  3. Verify that the execution output does not contain the string
  "ERROR".

  [Where problems could occur]
  The issue is caused by a typo. If any regressions occur, they are expected to impact only specific partial instructions under certain scenarios, rather than disrupting the overall functionality.

To manage notifications about this bug go to:
https://bugs.launchpad.net/gcc/+bug/2101084/+subscriptions




More information about the foundations-bugs mailing list