SmallSteps:
!! Loop.unroll [cFor ~body:[cPlusEq ~lhs:[cVar "s"] ()] "k"];

(* !! postprocessing (); *)
)
[46] !! Loop.unroll [cFor ~body:[cPlusEq ~lhs:[cVar "s"] ()] "k"]; (* !! postprocessing (); *) )

    
tmp_before.cpp → tmp_after.cpp RENAMED
@@ -107,39 +107,88 @@ void mm1024(float* C, float* A, float* B) {
107
  "Group(range(0, 4, 1), fun k -> Group(range(0, 32, 1), fun j -> "
108
  "&pB[bj][bk][k][j] ~> Cell))");
109
  __sreads("A ~> Matrix2(1024, 1024)");
110
  __sreads("B ~> Matrix2(1024, 1024)");
111
  __xmodifies("Group(range(0, 32, 1), fun j -> &sum[i][j] ~> Cell)");
112
  float* const s = new float(32)();
113
  const __ghost_fn __ghost_pair_6 =
114
  __ghost_begin(mindex2_contiguous_ro, "M := sum");
115
  MMEMCPY(s, 0, sum, i * 32 + 0, 32, sizeof(float));
116
  __ghost_end(__ghost_pair_6);
 
 
 
 
 
 
117
- for (int k = 0; k < 4; k++) {
118
- __smodifies("Group(range(0, 32, 1), fun j -> &s[j] ~> Cell)");
119
  __sreads("A ~> Matrix2(1024, 1024)");
120
  __sreads("B ~> Matrix2(1024, 1024)");
121
- __xreads(
122
- "Group(range(0, 32, 1), fun j -> &pB[bj][bk][k][j] ~> Cell)");
 
 
 
 
 
 
 
 
 
 
 
123
  #pragma omp simd
124
  for (int j = 0; j < 32; j++) {
125
  __sreads("A ~> Matrix2(1024, 1024)");
126
  __sreads("B ~> Matrix2(1024, 1024)");
127
  __xmodifies("&s[j] ~> Cell");
128
- __xreads("&pB[bj][bk][k][j] ~> Cell");
129
- const __ghost_fn __ghost_pair_2 =
130
- __ghost_begin(matrix2_ro_focus,
131
- "M := A, i := bi * 32 + i, j := bk * 4 + k");
132
- s[j] += A[bi * 32 + i][bk * 4 + k] * pB[bj][bk][k][j];
133
  __ghost_end(__ghost_pair_2);
134
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  }
 
136
  __ghost(mindex2_contiguous_uninit, "M := sum");
137
  MMEMCPY(sum, i * 32 + 0, s, 0, 32, sizeof(float));
138
  __ghost(mindex2_contiguous_rev, "M := sum");
139
  }
140
  }
141
  for (int i = 0; i < 32; i++) {
142
  __consumes(
143
  "_Uninit(Group(range(0, 32, 1), fun j -> &C[bi * 32 + i][bj * 32 + "
144
  "j] ~> Cell))");
145
  __produces(
107
  "Group(range(0, 4, 1), fun k -> Group(range(0, 32, 1), fun j -> "
108
  "&pB[bj][bk][k][j] ~> Cell))");
109
  __sreads("A ~> Matrix2(1024, 1024)");
110
  __sreads("B ~> Matrix2(1024, 1024)");
111
  __xmodifies("Group(range(0, 32, 1), fun j -> &sum[i][j] ~> Cell)");
112
  float* const s = new float(32)();
113
  const __ghost_fn __ghost_pair_6 =
114
  __ghost_begin(mindex2_contiguous_ro, "M := sum");
115
  MMEMCPY(s, 0, sum, i * 32 + 0, 32, sizeof(float));
116
  __ghost_end(__ghost_pair_6);
117
+ const __ghost_fn __ghost_pair_7 = __ghost_begin(
118
+ group_ro_focus,
119
+ "i := 0, items := fun k -> Group(range(0, 32, 1), fun j -> "
120
+ "&pB[bj][bk][k][j] ~> Cell), bound_check_start := checked, "
121
+ "bound_check_stop := checked, bound_check_step := checked");
122
+ #pragma omp simd
123
+ for (int j = 0; j < 32; j++) {
 
124
  __sreads("A ~> Matrix2(1024, 1024)");
125
  __sreads("B ~> Matrix2(1024, 1024)");
126
+ __xmodifies("&s[j] ~> Cell");
127
+ __xreads("&pB[bj][bk][0][j] ~> Cell");
128
+ const __ghost_fn __ghost_pair_2 = __ghost_begin(
129
+ matrix2_ro_focus, "M := A, i := 32 * bi + i, j := 4 * bk");
130
+ s[j] += A[32 * bi + i][4 * bk] * pB[bj][bk][0][j];
131
+ __ghost_end(__ghost_pair_2);
132
+ }
133
+ __ghost_end(__ghost_pair_7);
134
+ const __ghost_fn __ghost_pair_712 = __ghost_begin(
135
+ group_ro_focus,
136
+ "i := 1, items := fun k -> Group(range(0, 32, 1), fun j -> "
137
+ "&pB[bj][bk][k][j] ~> Cell), bound_check_start := checked, "
138
+ "bound_check_stop := checked, bound_check_step := checked");
139
  #pragma omp simd
140
  for (int j = 0; j < 32; j++) {
141
  __sreads("A ~> Matrix2(1024, 1024)");
142
  __sreads("B ~> Matrix2(1024, 1024)");
143
  __xmodifies("&s[j] ~> Cell");
144
+ __xreads("&pB[bj][bk][1][j] ~> Cell");
145
+ const __ghost_fn __ghost_pair_2 = __ghost_begin(
 
146
+ matrix2_ro_focus, "M := A, i := 32 * bi + i, j := 4 * bk + 1");
147
+ s[j] += A[32 * bi + i][4 * bk + 1] * pB[bj][bk][1][j];
148
  __ghost_end(__ghost_pair_2);
149
  }
150
+ __ghost_end(__ghost_pair_712);
151
+ const __ghost_fn __ghost_pair_713 = __ghost_begin(
152
+ group_ro_focus,
153
+ "i := 2, items := fun k -> Group(range(0, 32, 1), fun j -> "
154
+ "&pB[bj][bk][k][j] ~> Cell), bound_check_start := checked, "
155
+ "bound_check_stop := checked, bound_check_step := checked");
156
+ #pragma omp simd
157
+ for (int j = 0; j < 32; j++) {
158
+ __sreads("A ~> Matrix2(1024, 1024)");
159
+ __sreads("B ~> Matrix2(1024, 1024)");
160
+ __xmodifies("&s[j] ~> Cell");
161
+ __xreads("&pB[bj][bk][2][j] ~> Cell");
162
+ const __ghost_fn __ghost_pair_2 = __ghost_begin(
163
+ matrix2_ro_focus, "M := A, i := 32 * bi + i, j := 4 * bk + 2");
164
+ s[j] += A[32 * bi + i][4 * bk + 2] * pB[bj][bk][2][j];
165
+ __ghost_end(__ghost_pair_2);
166
+ }
167
+ __ghost_end(__ghost_pair_713);
168
+ const __ghost_fn __ghost_pair_714 = __ghost_begin(
169
+ group_ro_focus,
170
+ "i := 3, items := fun k -> Group(range(0, 32, 1), fun j -> "
171
+ "&pB[bj][bk][k][j] ~> Cell), bound_check_start := checked, "
172
+ "bound_check_stop := checked, bound_check_step := checked");
173
+ #pragma omp simd
174
+ for (int j = 0; j < 32; j++) {
175
+ __sreads("A ~> Matrix2(1024, 1024)");
176
+ __sreads("B ~> Matrix2(1024, 1024)");
177
+ __xmodifies("&s[j] ~> Cell");
178
+ __xreads("&pB[bj][bk][3][j] ~> Cell");
179
+ const __ghost_fn __ghost_pair_2 = __ghost_begin(
180
+ matrix2_ro_focus, "M := A, i := 32 * bi + i, j := 4 * bk + 3");
181
+ s[j] += A[32 * bi + i][4 * bk + 3] * pB[bj][bk][3][j];
182
+ __ghost_end(__ghost_pair_2);
183
  }
184
+ __ghost_end(__ghost_pair_714);
185
  __ghost(mindex2_contiguous_uninit, "M := sum");
186
  MMEMCPY(sum, i * 32 + 0, s, 0, 32, sizeof(float));
187
  __ghost(mindex2_contiguous_rev, "M := sum");
188
  }
189
  }
190
  for (int i = 0; i < 32; i++) {
191
  __consumes(
192
  "_Uninit(Group(range(0, 32, 1), fun j -> &C[bi * 32 + i][bj * 32 + "
193
  "j] ~> Cell))");
194
  __produces(